From 9a4ef863699db6af3e97cf1dad2f713fea182609 Mon Sep 17 00:00:00 2001 From: bluew Date: Fri, 19 Oct 2018 19:04:27 +0200 Subject: [PATCH 01/23] Apply tracer patch from ultrasoc Specify --ust-trace FILE to record execution trace in cvs format to FILE. --- riscv/clint.cc | 1 + riscv/execute.cc | 4 +-- riscv/extension.h | 1 + riscv/processor.cc | 42 +++++++++++++++++++++++--- riscv/processor.h | 3 ++ riscv/riscv.mk.in | 2 ++ riscv/sim.cc | 19 +++++++++++- riscv/sim.h | 3 ++ riscv/ust_tracer.cc | 72 +++++++++++++++++++++++++++++++++++++++++++++ riscv/ust_tracer.h | 19 ++++++++++++ spike_main/spike.cc | 4 +++ 11 files changed, 163 insertions(+), 7 deletions(-) create mode 100644 riscv/ust_tracer.cc create mode 100644 riscv/ust_tracer.h diff --git a/riscv/clint.cc b/riscv/clint.cc index 08508b43fa..7fd4f0cc1b 100644 --- a/riscv/clint.cc +++ b/riscv/clint.cc @@ -4,6 +4,7 @@ clint_t::clint_t(std::vector& procs) : procs(procs), mtimecmp(procs.size()) { + mtime = 0; } /* 0000 msip hart 0 diff --git a/riscv/execute.cc b/riscv/execute.cc index e639e90462..b56db9f27e 100644 --- a/riscv/execute.cc +++ b/riscv/execute.cc @@ -85,7 +85,7 @@ static reg_t execute_insn(processor_t* p, reg_t pc, insn_fetch_t fetch) bool processor_t::slow_path() { - return debug || state.single_step != state.STEP_NONE || state.dcsr.cause; + return trace || debug || state.single_step != state.STEP_NONE || state.dcsr.cause; } // fetch/decode/execute loop @@ -142,7 +142,7 @@ void processor_t::step(size_t n) } insn_fetch_t fetch = mmu->load_insn(pc); - if (debug && !state.serialized) + if ((trace || debug) && !state.serialized) disasm(fetch.insn); pc = execute_insn(this, pc, fetch); diff --git a/riscv/extension.h b/riscv/extension.h index d1e847d9b8..419b172f29 100644 --- a/riscv/extension.h +++ b/riscv/extension.h @@ -16,6 +16,7 @@ class extension_t virtual const char* name() = 0; virtual void reset() {}; virtual void set_debug(bool value) {}; + virtual void set_trace(bool value) {}; virtual ~extension_t(); void set_processor(processor_t* _p) { p = _p; } diff --git a/riscv/processor.cc b/riscv/processor.cc index 00612f0944..58bca7efb2 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -5,6 +5,7 @@ #include "common.h" #include "config.h" #include "simif.h" +#include "ust_tracer.h" #include "mmu.h" #include "disasm.h" #include @@ -21,7 +22,7 @@ processor_t::processor_t(const char* isa, simif_t* sim, uint32_t id, bool halt_on_reset) - : debug(false), halt_request(false), sim(sim), ext(NULL), id(id), + : debug(false), trace(false), halt_request(false), sim(sim), ext(NULL), id(id), halt_on_reset(halt_on_reset), last_pc(1), executions(1) { parse_isa_string(isa); @@ -34,6 +35,8 @@ processor_t::processor_t(const char* isa, simif_t* sim, uint32_t id, for (auto disasm_insn : ext->get_disasms()) disassembler->add_insn(disasm_insn); + xlen = 0; + reset(); } @@ -125,6 +128,8 @@ void state_t::reset(reg_t max_isa) misa = max_isa; prv = PRV_M; pc = DEFAULT_RSTVEC; + mstatus = 0; + dcsr.cause = 0; tselect = 0; for (unsigned int i = 0; i < num_triggers; i++) mcontrol[i].type = 2; @@ -140,6 +145,13 @@ void processor_t::set_debug(bool value) ext->set_debug(value); } +void processor_t::set_trace(bool value) +{ + trace = value; + if (ext) + ext->set_trace(value); +} + void processor_t::set_histogram(bool value) { histogram_enabled = value; @@ -254,6 +266,13 @@ void processor_t::take_trap(trap_t& t, reg_t epc) t.get_tval()); } + if (trace) { + ust_set_exception(t.cause()); + if (t.has_tval()) + ust_set_tval(t.get_tval()); + ust_set_interrupt(0); + } + if (state.dcsr.cause) { if (t.cause() == CAUSE_BREAKPOINT) { state.pc = DEBUG_ROM_ENTRY; @@ -275,6 +294,11 @@ void processor_t::take_trap(trap_t& t, reg_t epc) reg_t bit = t.cause(); reg_t deleg = state.medeleg; bool interrupt = (bit & ((reg_t)1 << (max_xlen-1))) != 0; + + if (trace) { + ust_set_interrupt(interrupt); + } + if (interrupt) deleg = state.mideleg, bit &= ~((reg_t)1 << (max_xlen-1)); if (state.prv <= PRV_S && bit < max_xlen && ((deleg >> bit) & 1)) { @@ -310,12 +334,22 @@ void processor_t::disasm(insn_t insn) { uint64_t bits = insn.bits() & ((1ULL << (8 * insn_length(insn.bits()))) - 1); if (last_pc != state.pc || last_bits != bits) { - if (executions != 1) { + if (debug && executions != 1) { fprintf(stderr, "core %3d: Executed %" PRIx64 " times\n", id, executions); } - fprintf(stderr, "core %3d: 0x%016" PRIx64 " (0x%08" PRIx64 ") %s\n", - id, state.pc, bits, disassembler->disassemble(insn).c_str()); + if (debug) { + fprintf(stderr, "core %3d: 0x%016" PRIx64 " (0x%08" PRIx64 ") %s\n", + id, state.pc, bits, disassembler->disassemble(insn).c_str()); + } + + if (trace) { + ust_step(); + ust_set_addr(state.pc); + ust_set_insn(bits); + ust_set_priv(state.prv); + } + last_pc = state.pc; last_bits = bits; executions = 1; diff --git a/riscv/processor.h b/riscv/processor.h index de0be7829b..754d3e65f1 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -170,6 +170,7 @@ class processor_t : public abstract_device_t ~processor_t(); void set_debug(bool value); + void set_trace(bool value); void set_histogram(bool value); void reset(); void step(size_t n); // run for n cycles @@ -211,6 +212,8 @@ class processor_t : public abstract_device_t // When true, display disassembly of each instruction that's executed. bool debug; + // When true, write the UST trace + bool trace; // When true, take the slow simulation path. bool slow_path(); bool halted() { return state.dcsr.cause ? true : false; } diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 80755e711c..dc23eb893b 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -29,6 +29,7 @@ riscv_hdrs = \ debug_rom_defines.h \ remote_bitbang.h \ jtag_dtm.h \ + ust_tracer.h \ riscv_precompiled_hdrs = \ insn_template.h \ @@ -53,6 +54,7 @@ riscv_srcs = \ debug_module.cc \ remote_bitbang.cc \ jtag_dtm.cc \ + ust_tracer.cc \ $(riscv_gen_srcs) \ riscv_test_srcs = diff --git a/riscv/sim.cc b/riscv/sim.cc index 44223a7d90..6725e93536 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -1,6 +1,7 @@ // See LICENSE for license details. #include "sim.h" +#include "ust_tracer.h" #include "mmu.h" #include "dts.h" #include "remote_bitbang.h" @@ -30,7 +31,7 @@ sim_t::sim_t(const char* isa, size_t nprocs, bool halted, reg_t start_pc, std::vector const hartids, unsigned progsize, unsigned max_bus_master_bits, bool require_authentication) : htif_t(args), mems(mems), procs(std::max(nprocs, size_t(1))), - start_pc(start_pc), current_step(0), current_proc(0), debug(false), + start_pc(start_pc), current_step(0), current_proc(0), debug(false), trace(false), histogram_enabled(false), dtb_enabled(true), remote_bitbang(NULL), debug_module(this, progsize, max_bus_master_bits, require_authentication) { @@ -64,6 +65,7 @@ sim_t::sim_t(const char* isa, size_t nprocs, bool halted, reg_t start_pc, sim_t::~sim_t() { + ust_close(); for (size_t i = 0; i < procs.size(); i++) delete procs[i]; delete debug_mmu; @@ -79,6 +81,9 @@ void sim_t::main() if (!debug && log) set_procs_debug(true); + if (!debug && trace) + set_procs_trace(true); + while (!done()) { if (debug || ctrlc_pressed) @@ -130,6 +135,12 @@ void sim_t::set_log(bool value) log = value; } +void sim_t::set_ust_trace(const char * const ust_file) +{ + ust_open(ust_file); + trace = true; +} + void sim_t::set_histogram(bool value) { histogram_enabled = value; @@ -144,6 +155,12 @@ void sim_t::set_procs_debug(bool value) procs[i]->set_debug(value); } +void sim_t::set_procs_trace(bool value) +{ + for (size_t i=0; i< procs.size(); i++) + procs[i]->set_trace(value); +} + bool sim_t::mmio_load(reg_t addr, size_t len, uint8_t* bytes) { if (addr + len < addr) diff --git a/riscv/sim.h b/riscv/sim.h index e42808b4ae..90fbc97b8d 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -30,8 +30,10 @@ class sim_t : public htif_t, public simif_t int run(); void set_debug(bool value); void set_log(bool value); + void set_ust_trace(const char * const ust_file); void set_histogram(bool value); void set_procs_debug(bool value); + void set_procs_trace(bool value); void set_dtb_enabled(bool value) { this->dtb_enabled = value; } @@ -64,6 +66,7 @@ class sim_t : public htif_t, public simif_t size_t current_proc; bool debug; bool log; + bool trace; bool histogram_enabled; // provide a histogram of PCs bool dtb_enabled; remote_bitbang_t* remote_bitbang; diff --git a/riscv/ust_tracer.cc b/riscv/ust_tracer.cc new file mode 100644 index 0000000000..cf8bcd673c --- /dev/null +++ b/riscv/ust_tracer.cc @@ -0,0 +1,72 @@ +// See LICENSE for license details. + +#include "ust_tracer.h" +#include "trap.h" + +#include + +static reg_t s_addr = 0; +static uint64_t s_insn = 0; +static uint8_t s_prv = 0; +static uint8_t s_ex = 0; +static reg_t s_ex_cause = 0; +static reg_t s_tval = 0; +static uint8_t s_interrupt = 0; +static FILE* s_trace = NULL; +static bool has_output_header = false; + +void ust_open(const char *filename) +{ + s_trace = fopen(filename, "w"); + if (!s_trace) + fprintf(stderr, "Failed to open ust trace file %s: %s", + filename, strerror(errno)); +} + +void ust_step(void) +{ + if (!s_trace) + return; + + if (has_output_header) { + fprintf(s_trace, "1,%lx,%lx,%x,%x,%lx,%lx,%x\n", + s_addr, s_insn, s_prv, s_ex, s_ex_cause, s_tval, s_interrupt); + } else { + fprintf(s_trace, "VALID,ADDRESS,INSN,PRIVILEGE,EXCEPTION,ECAUSE,TVAL,INTERRUPT\n"); + has_output_header = true; + } + + s_ex = 0; +} + +void ust_close(void) +{ + if (s_trace) + fclose(s_trace); + s_trace = NULL; +} + +void ust_set_addr(reg_t addr) { + s_addr = addr; +} + +void ust_set_insn(uint64_t insn) { + s_insn = insn; +} + +void ust_set_priv(uint8_t prv) { + s_prv = prv; +} + +void ust_set_exception(reg_t cause) { + s_ex = 1; + s_ex_cause = cause; +} + +void ust_set_tval(reg_t tval) { + s_tval = tval; +} + +void ust_set_interrupt(uint8_t interrupt) { + s_interrupt = interrupt; +} diff --git a/riscv/ust_tracer.h b/riscv/ust_tracer.h new file mode 100644 index 0000000000..7eb6fc479a --- /dev/null +++ b/riscv/ust_tracer.h @@ -0,0 +1,19 @@ +// See LICENSE for license details. + +#ifndef _RISCV_UST_TRACER_H +#define _RISCV_UST_TRACER_H + +#include "processor.h" + +void ust_open(const char *filename); +void ust_start(void); +void ust_step(void); +void ust_close(void); +void ust_set_addr(reg_t addr); +void ust_set_insn(uint64_t insn); +void ust_set_priv(uint8_t prv); +void ust_set_exception(reg_t cause); +void ust_set_tval(reg_t tval); +void ust_set_interrupt(uint8_t interrupt); + +#endif diff --git a/spike_main/spike.cc b/spike_main/spike.cc index 3e5c7e6c60..d95ce469c5 100644 --- a/spike_main/spike.cc +++ b/spike_main/spike.cc @@ -37,6 +37,7 @@ static void help() fprintf(stderr, " --extlib= Shared library to load\n"); fprintf(stderr, " --rbb-port= Listen on for remote bitbang connection\n"); fprintf(stderr, " --dump-dts Print device tree string and exit\n"); + fprintf(stderr, " --ust-trace= Write UST trace file\n"); fprintf(stderr, " --disable-dtb Don't write the device tree blob into memory\n"); fprintf(stderr, " --progsize= Progsize for the debug module [default 2]\n"); fprintf(stderr, " --debug-sba= Debug bus master supports up to " @@ -93,6 +94,7 @@ int main(int argc, char** argv) bool log_cache = false; std::function extension; const char* isa = DEFAULT_ISA; + const char* ust_file = NULL; uint16_t rbb_port = 0; bool use_rbb = false; unsigned progsize = 2; @@ -132,6 +134,7 @@ int main(int argc, char** argv) parser.option(0, "isa", 1, [&](const char* s){isa = s;}); parser.option(0, "extension", 1, [&](const char* s){extension = find_extension(s);}); parser.option(0, "dump-dts", 0, [&](const char *s){dump_dts = true;}); + parser.option(0, "ust-trace", 1, [&](const char *s){ust_file = s;}); parser.option(0, "disable-dtb", 0, [&](const char *s){dtb_enabled = false;}); parser.option(0, "extlib", 1, [&](const char *s){ void *lib = dlopen(s, RTLD_NOW | RTLD_GLOBAL); @@ -182,6 +185,7 @@ int main(int argc, char** argv) s.set_debug(debug); s.set_log(log); + s.set_ust_trace(ust_file); s.set_histogram(histogram); return s.run(); } From 9a0f6218302700925e11c9fd17ac64ffe3dfe0f5 Mon Sep 17 00:00:00 2001 From: bluew Date: Fri, 19 Oct 2018 19:12:19 +0200 Subject: [PATCH 02/23] Fix ust_open when passing NULL This prevents an error message ("Failed to open ust trace file:") when --ust-trace is not specified. --- riscv/ust_tracer.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/riscv/ust_tracer.cc b/riscv/ust_tracer.cc index cf8bcd673c..fb68d02dd6 100644 --- a/riscv/ust_tracer.cc +++ b/riscv/ust_tracer.cc @@ -17,6 +17,8 @@ static bool has_output_header = false; void ust_open(const char *filename) { + if(!filename) + return; s_trace = fopen(filename, "w"); if (!s_trace) fprintf(stderr, "Failed to open ust trace file %s: %s", From d3b759fbf3f278d571b679186a357629ca5f3e56 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Thu, 21 Jul 2022 19:45:13 +0200 Subject: [PATCH 03/23] Overwrite with newer Spike from mempool The mempool version of Spike is a lot more up-do-date and implements many new instructions. Overwrite all with contents from toolchain/riscv-isa-sim pulp-platform/mempool@32eab8e823d100978b29325aba9910c56f7146f3 --- ChangeLog.md | 21 + Makefile.in | 116 +- README.md | 76 +- VERSION | 1 + aclocal.m4 | 43 - ax_append_flag.m4 | 50 + ax_append_link_flags.m4 | 44 + ax_check_link_flag.m4 | 53 + ax_require_defined.m4 | 37 + ci-tests/test-spike | 11 + config.h.in | 22 +- configure | 761 ++++-- configure.ac | 27 +- customext/cflush.cc | 41 + .../dummy_rocc.ac => customext/customext.ac | 0 customext/customext.mk.in | 12 + {dummy_rocc => customext}/dummy_rocc.cc | 0 {dummy_rocc => customext}/dummy_rocc_test.c | 0 debug_rom/debug_rom.S | 11 +- debug_rom/debug_rom.h | 13 +- disasm/disasm.ac | 0 disasm/disasm.cc | 1553 ++++++++++++ disasm/disasm.mk.in | 7 + {riscv => disasm}/regnames.cc | 7 + dummy_rocc/dummy_rocc.mk.in | 7 - fdt/fdt.ac | 0 fdt/fdt.c | 291 +++ fdt/fdt.h | 66 + fdt/fdt.mk.in | 17 + fdt/fdt_addresses.c | 101 + fdt/fdt_empty_tree.c | 38 + fdt/fdt_overlay.c | 881 +++++++ fdt/fdt_ro.c | 898 +++++++ fdt/fdt_rw.c | 476 ++++ fdt/fdt_strerror.c | 59 + fdt/fdt_sw.c | 376 +++ fdt/fdt_wip.c | 94 + fdt/libfdt.h | 2077 ++++++++++++++++ fdt/libfdt_env.h | 97 + fdt/libfdt_internal.h | 51 + fesvr/context.cc | 115 + fesvr/context.h | 54 + fesvr/debug_defines.h | 1418 +++++++++++ fesvr/device.cc | 155 ++ fesvr/device.h | 118 + fesvr/dtm.cc | 645 +++++ fesvr/dtm.h | 115 + fesvr/dummy.cc | 4 + fesvr/elf.h | 132 + fesvr/elf2hex.cc | 47 + fesvr/elfloader.cc | 94 + fesvr/elfloader.h | 13 + fesvr/fesvr.ac | 1 + fesvr/fesvr.mk.in | 40 + fesvr/fesvr.pc.in | 26 + fesvr/htif.cc | 371 +++ fesvr/htif.h | 126 + fesvr/htif_hexwriter.cc | 76 + fesvr/htif_hexwriter.h | 32 + fesvr/htif_pthread.cc | 66 + fesvr/htif_pthread.h | 38 + fesvr/memif.cc | 183 ++ fesvr/memif.h | 62 + fesvr/option_parser.cc | 51 + fesvr/option_parser.h | 31 + fesvr/rfb.cc | 230 ++ fesvr/rfb.h | 53 + fesvr/syscall.cc | 395 +++ fesvr/syscall.h | 72 + fesvr/term.cc | 53 + fesvr/term.h | 11 + fesvr/tsi.cc | 115 + fesvr/tsi.h | 57 + riscv-disasm.pc.in | 11 + riscv-dummy_rocc.pc.in | 11 - riscv-fesvr.pc.in | 11 + riscv-riscv.pc.in | 11 - riscv-softfloat.pc.in | 11 - riscv-spike.pc.in | 10 - riscv-spike_main.pc.in | 12 - riscv/arith.h | 123 + riscv/byteorder.h | 30 + riscv/clint.cc | 24 +- riscv/common.h | 2 + riscv/debug_defines.h | 735 ++++-- riscv/debug_module.cc | 375 ++- riscv/debug_module.h | 53 +- riscv/decode.h | 2154 ++++++++++++++++- riscv/devices.cc | 40 + riscv/devices.h | 21 +- riscv/disasm.h | 42 +- riscv/dts.cc | 123 +- riscv/dts.h | 8 + riscv/encoding.h | 1472 +---------- riscv/execute.cc | 232 +- riscv/extension.h | 1 - riscv/extensions.cc | 19 +- riscv/gen_icache | 5 +- riscv/insn_template.h | 2 +- riscv/insns/c_ebreak.h | 2 +- riscv/insns/csrrc.h | 2 +- riscv/insns/csrrci.h | 2 +- riscv/insns/csrrs.h | 2 +- riscv/insns/csrrsi.h | 2 +- riscv/insns/csrrw.h | 2 +- riscv/insns/csrrwi.h | 2 +- riscv/insns/dret.h | 4 +- riscv/insns/ebreak.h | 2 +- riscv/insns/ecall.h | 6 +- riscv/insns/fadd_h.h | 5 + riscv/insns/fclass_h.h | 3 + riscv/insns/fcvt_d_h.h | 6 + riscv/insns/fcvt_h_d.h | 6 + riscv/insns/fcvt_h_l.h | 6 + riscv/insns/fcvt_h_lu.h | 6 + riscv/insns/fcvt_h_q.h | 6 + riscv/insns/fcvt_h_s.h | 5 + riscv/insns/fcvt_h_w.h | 5 + riscv/insns/fcvt_h_wu.h | 5 + riscv/insns/fcvt_l_h.h | 6 + riscv/insns/fcvt_lu_h.h | 6 + riscv/insns/fcvt_q_h.h | 6 + riscv/insns/fcvt_s_h.h | 5 + riscv/insns/fcvt_w_h.h | 5 + riscv/insns/fcvt_wu_h.h | 5 + riscv/insns/fdiv_h.h | 5 + riscv/insns/feq_h.h | 4 + riscv/insns/fle_h.h | 4 + riscv/insns/flh.h | 3 + riscv/insns/flt_h.h | 4 + riscv/insns/fmadd_h.h | 5 + riscv/insns/fmax_h.h | 4 + riscv/insns/fmin_h.h | 4 + riscv/insns/fmsub_h.h | 5 + riscv/insns/fmul_h.h | 5 + riscv/insns/fmv_h_x.h | 3 + riscv/insns/fmv_x_h.h | 3 + riscv/insns/fnmadd_h.h | 5 + riscv/insns/fnmsub_h.h | 5 + riscv/insns/fsgnj_h.h | 3 + riscv/insns/fsgnjn_h.h | 3 + riscv/insns/fsgnjx_h.h | 3 + riscv/insns/fsh.h | 3 + riscv/insns/fsqrt_h.h | 5 + riscv/insns/fsub_h.h | 5 + riscv/insns/hfence_gvma.h | 4 + riscv/insns/hfence_vvma.h | 4 + riscv/insns/hlv_b.h | 4 + riscv/insns/hlv_bu.h | 4 + riscv/insns/hlv_d.h | 5 + riscv/insns/hlv_h.h | 4 + riscv/insns/hlv_hu.h | 4 + riscv/insns/hlv_w.h | 4 + riscv/insns/hlv_wu.h | 5 + riscv/insns/hlvx_hu.h | 4 + riscv/insns/hlvx_wu.h | 4 + riscv/insns/hsv_b.h | 4 + riscv/insns/hsv_d.h | 5 + riscv/insns/hsv_h.h | 4 + riscv/insns/hsv_w.h | 4 + riscv/insns/lr_d.h | 3 +- riscv/insns/lr_w.h | 3 +- riscv/insns/mret.h | 4 +- riscv/insns/p_abs.h | 4 + riscv/insns/p_beqimm.h | 2 + riscv/insns/p_bneimm.h | 2 + riscv/insns/p_clip.h | 9 + riscv/insns/p_clipr.h | 6 + riscv/insns/p_clipu.h | 8 + riscv/insns/p_clipur.h | 6 + riscv/insns/p_extbs.h | 1 + riscv/insns/p_extbz.h | 1 + riscv/insns/p_exths.h | 1 + riscv/insns/p_exthz.h | 1 + riscv/insns/p_lb_irpost.h | 2 + riscv/insns/p_lb_rr.h | 1 + riscv/insns/p_lb_rrpost.h | 2 + riscv/insns/p_lbu_irpost.h | 2 + riscv/insns/p_lbu_rr.h | 1 + riscv/insns/p_lbu_rrpost.h | 2 + riscv/insns/p_lh_irpost.h | 2 + riscv/insns/p_lh_rr.h | 1 + riscv/insns/p_lh_rrpost.h | 2 + riscv/insns/p_lhu_irpost.h | 2 + riscv/insns/p_lhu_rr.h | 1 + riscv/insns/p_lhu_rrpost.h | 2 + riscv/insns/p_lw_irpost.h | 2 + riscv/insns/p_lw_rr.h | 1 + riscv/insns/p_lw_rrpost.h | 2 + riscv/insns/p_mac.h | 1 + riscv/insns/p_max.h | 4 + riscv/insns/p_maxu.h | 4 + riscv/insns/p_min.h | 4 + riscv/insns/p_minu.h | 4 + riscv/insns/p_msu.h | 1 + riscv/insns/p_sb_irpost.h | 2 + riscv/insns/p_sb_rr.h | 1 + riscv/insns/p_sb_rrpost.h | 2 + riscv/insns/p_sh_irpost.h | 2 + riscv/insns/p_sh_rr.h | 1 + riscv/insns/p_sh_rrpost.h | 2 + riscv/insns/p_slet.h | 1 + riscv/insns/p_sletu.h | 1 + riscv/insns/p_sw_irpost.h | 2 + riscv/insns/p_sw_rr.h | 1 + riscv/insns/p_sw_rrpost.h | 2 + riscv/insns/pv_abs_b.h | 9 + riscv/insns/pv_abs_h.h | 9 + riscv/insns/pv_add_b.h | 9 + riscv/insns/pv_add_h.h | 9 + riscv/insns/pv_add_sc_b.h | 9 + riscv/insns/pv_add_sc_h.h | 9 + riscv/insns/pv_add_sci_b.h | 9 + riscv/insns/pv_add_sci_h.h | 9 + riscv/insns/pv_and_b.h | 9 + riscv/insns/pv_and_h.h | 9 + riscv/insns/pv_and_sc_b.h | 9 + riscv/insns/pv_and_sc_h.h | 9 + riscv/insns/pv_and_sci_b.h | 9 + riscv/insns/pv_and_sci_h.h | 9 + riscv/insns/pv_avg_b.h | 9 + riscv/insns/pv_avg_h.h | 9 + riscv/insns/pv_avg_sc_b.h | 9 + riscv/insns/pv_avg_sc_h.h | 9 + riscv/insns/pv_avg_sci_b.h | 9 + riscv/insns/pv_avg_sci_h.h | 9 + riscv/insns/pv_avgu_b.h | 9 + riscv/insns/pv_avgu_h.h | 9 + riscv/insns/pv_avgu_sc_b.h | 9 + riscv/insns/pv_avgu_sc_h.h | 9 + riscv/insns/pv_avgu_sci_b.h | 9 + riscv/insns/pv_avgu_sci_h.h | 9 + riscv/insns/pv_dotsp_b.h | 6 + riscv/insns/pv_dotsp_h.h | 6 + riscv/insns/pv_dotsp_sc_b.h | 6 + riscv/insns/pv_dotsp_sc_h.h | 6 + riscv/insns/pv_dotsp_sci_b.h | 6 + riscv/insns/pv_dotsp_sci_h.h | 6 + riscv/insns/pv_dotup_b.h | 6 + riscv/insns/pv_dotup_h.h | 6 + riscv/insns/pv_dotup_sc_b.h | 6 + riscv/insns/pv_dotup_sc_h.h | 6 + riscv/insns/pv_dotup_sci_b.h | 6 + riscv/insns/pv_dotup_sci_h.h | 6 + riscv/insns/pv_dotusp_b.h | 6 + riscv/insns/pv_dotusp_h.h | 6 + riscv/insns/pv_dotusp_sc_b.h | 6 + riscv/insns/pv_dotusp_sc_h.h | 6 + riscv/insns/pv_dotusp_sci_b.h | 6 + riscv/insns/pv_dotusp_sci_h.h | 6 + riscv/insns/pv_extract_b.h | 1 + riscv/insns/pv_extract_h.h | 1 + riscv/insns/pv_extractu_b.h | 1 + riscv/insns/pv_extractu_h.h | 1 + riscv/insns/pv_insert_b.h | 6 + riscv/insns/pv_insert_h.h | 6 + riscv/insns/pv_max_b.h | 9 + riscv/insns/pv_max_h.h | 9 + riscv/insns/pv_max_sc_b.h | 9 + riscv/insns/pv_max_sc_h.h | 9 + riscv/insns/pv_max_sci_b.h | 9 + riscv/insns/pv_max_sci_h.h | 9 + riscv/insns/pv_maxu_b.h | 9 + riscv/insns/pv_maxu_h.h | 9 + riscv/insns/pv_maxu_sc_b.h | 9 + riscv/insns/pv_maxu_sc_h.h | 9 + riscv/insns/pv_maxu_sci_b.h | 10 + riscv/insns/pv_maxu_sci_h.h | 9 + riscv/insns/pv_min_b.h | 9 + riscv/insns/pv_min_h.h | 9 + riscv/insns/pv_min_sc_b.h | 9 + riscv/insns/pv_min_sc_h.h | 9 + riscv/insns/pv_min_sci_b.h | 9 + riscv/insns/pv_min_sci_h.h | 9 + riscv/insns/pv_minu_b.h | 9 + riscv/insns/pv_minu_h.h | 9 + riscv/insns/pv_minu_sc_b.h | 9 + riscv/insns/pv_minu_sc_h.h | 9 + riscv/insns/pv_minu_sci_b.h | 9 + riscv/insns/pv_minu_sci_h.h | 9 + riscv/insns/pv_or_b.h | 9 + riscv/insns/pv_or_h.h | 9 + riscv/insns/pv_or_sc_b.h | 9 + riscv/insns/pv_or_sc_h.h | 9 + riscv/insns/pv_or_sci_b.h | 9 + riscv/insns/pv_or_sci_h.h | 9 + riscv/insns/pv_sdotsp_b.h | 6 + riscv/insns/pv_sdotsp_h.h | 6 + riscv/insns/pv_sdotsp_sc_b.h | 6 + riscv/insns/pv_sdotsp_sc_h.h | 6 + riscv/insns/pv_sdotsp_sci_b.h | 6 + riscv/insns/pv_sdotsp_sci_h.h | 6 + riscv/insns/pv_sdotup_b.h | 6 + riscv/insns/pv_sdotup_h.h | 6 + riscv/insns/pv_sdotup_sc_b.h | 6 + riscv/insns/pv_sdotup_sc_h.h | 6 + riscv/insns/pv_sdotup_sci_b.h | 6 + riscv/insns/pv_sdotup_sci_h.h | 6 + riscv/insns/pv_sdotusp_b.h | 6 + riscv/insns/pv_sdotusp_h.h | 6 + riscv/insns/pv_sdotusp_sc_b.h | 6 + riscv/insns/pv_sdotusp_sc_h.h | 6 + riscv/insns/pv_sdotusp_sci_b.h | 6 + riscv/insns/pv_sdotusp_sci_h.h | 6 + riscv/insns/pv_shuffle2_b.h | 14 + riscv/insns/pv_shuffle2_h.h | 14 + riscv/insns/pv_sll_b.h | 9 + riscv/insns/pv_sll_h.h | 9 + riscv/insns/pv_sll_sc_b.h | 9 + riscv/insns/pv_sll_sc_h.h | 9 + riscv/insns/pv_sll_sci_b.h | 9 + riscv/insns/pv_sll_sci_h.h | 9 + riscv/insns/pv_sra_b.h | 9 + riscv/insns/pv_sra_h.h | 9 + riscv/insns/pv_sra_sc_b.h | 9 + riscv/insns/pv_sra_sc_h.h | 9 + riscv/insns/pv_sra_sci_b.h | 9 + riscv/insns/pv_sra_sci_h.h | 9 + riscv/insns/pv_srl_b.h | 9 + riscv/insns/pv_srl_h.h | 9 + riscv/insns/pv_srl_sc_b.h | 9 + riscv/insns/pv_srl_sc_h.h | 9 + riscv/insns/pv_srl_sci_b.h | 9 + riscv/insns/pv_srl_sci_h.h | 9 + riscv/insns/pv_sub_b.h | 9 + riscv/insns/pv_sub_h.h | 9 + riscv/insns/pv_sub_sc_b.h | 9 + riscv/insns/pv_sub_sc_h.h | 9 + riscv/insns/pv_sub_sci_b.h | 9 + riscv/insns/pv_sub_sci_h.h | 9 + riscv/insns/pv_xor_b.h | 9 + riscv/insns/pv_xor_h.h | 9 + riscv/insns/pv_xor_sc_b.h | 9 + riscv/insns/pv_xor_sc_h.h | 9 + riscv/insns/pv_xor_sci_b.h | 9 + riscv/insns/pv_xor_sci_h.h | 9 + riscv/insns/sc_d.h | 12 +- riscv/insns/sc_w.h | 12 +- riscv/insns/sfence_vma.h | 8 +- riscv/insns/sret.h | 17 +- riscv/insns/vaadd_vv.h | 2 + riscv/insns/vaadd_vx.h | 2 + riscv/insns/vaaddu_vv.h | 2 + riscv/insns/vaaddu_vx.h | 2 + riscv/insns/vadc_vim.h | 10 + riscv/insns/vadc_vvm.h | 10 + riscv/insns/vadc_vxm.h | 10 + riscv/insns/vadd_vi.h | 5 + riscv/insns/vadd_vv.h | 5 + riscv/insns/vadd_vx.h | 5 + riscv/insns/vamoaddei16_v.h | 2 + riscv/insns/vamoaddei32_v.h | 2 + riscv/insns/vamoaddei64_v.h | 2 + riscv/insns/vamoaddei8_v.h | 2 + riscv/insns/vamoandei16_v.h | 2 + riscv/insns/vamoandei32_v.h | 2 + riscv/insns/vamoandei64_v.h | 2 + riscv/insns/vamoandei8_v.h | 2 + riscv/insns/vamomaxei16_v.h | 2 + riscv/insns/vamomaxei32_v.h | 2 + riscv/insns/vamomaxei64_v.h | 2 + riscv/insns/vamomaxei8_v.h | 2 + riscv/insns/vamomaxuei16_v.h | 2 + riscv/insns/vamomaxuei32_v.h | 2 + riscv/insns/vamomaxuei64_v.h | 2 + riscv/insns/vamomaxuei8_v.h | 2 + riscv/insns/vamominei16_v.h | 2 + riscv/insns/vamominei32_v.h | 2 + riscv/insns/vamominei64_v.h | 2 + riscv/insns/vamominei8_v.h | 2 + riscv/insns/vamominuei16_v.h | 2 + riscv/insns/vamominuei32_v.h | 2 + riscv/insns/vamominuei64_v.h | 2 + riscv/insns/vamominuei8_v.h | 2 + riscv/insns/vamoorei16_v.h | 2 + riscv/insns/vamoorei32_v.h | 2 + riscv/insns/vamoorei64_v.h | 2 + riscv/insns/vamoorei8_v.h | 2 + riscv/insns/vamoswapei16_v.h | 2 + riscv/insns/vamoswapei32_v.h | 2 + riscv/insns/vamoswapei64_v.h | 2 + riscv/insns/vamoswapei8_v.h | 2 + riscv/insns/vamoxorei16_v.h | 2 + riscv/insns/vamoxorei32_v.h | 2 + riscv/insns/vamoxorei64_v.h | 2 + riscv/insns/vamoxorei8_v.h | 2 + riscv/insns/vand_vi.h | 5 + riscv/insns/vand_vv.h | 5 + riscv/insns/vand_vx.h | 5 + riscv/insns/vasub_vv.h | 2 + riscv/insns/vasub_vx.h | 2 + riscv/insns/vasubu_vv.h | 2 + riscv/insns/vasubu_vx.h | 2 + riscv/insns/vcompress_vm.h | 33 + riscv/insns/vdiv_vv.h | 10 + riscv/insns/vdiv_vx.h | 10 + riscv/insns/vdivu_vv.h | 8 + riscv/insns/vdivu_vx.h | 8 + riscv/insns/vdot_vv.h | 5 + riscv/insns/vdotu_vv.h | 5 + riscv/insns/vfadd_vf.h | 11 + riscv/insns/vfadd_vv.h | 11 + riscv/insns/vfclass_v.h | 11 + riscv/insns/vfcvt_f_x_v.h | 14 + riscv/insns/vfcvt_f_xu_v.h | 14 + riscv/insns/vfcvt_rtz_x_f_v.h | 11 + riscv/insns/vfcvt_rtz_xu_f_v.h | 11 + riscv/insns/vfcvt_x_f_v.h | 11 + riscv/insns/vfcvt_xu_f_v.h | 11 + riscv/insns/vfdiv_vf.h | 11 + riscv/insns/vfdiv_vv.h | 11 + riscv/insns/vfdot_vv.h | 11 + riscv/insns/vfirst_m.h | 20 + riscv/insns/vfmacc_vf.h | 11 + riscv/insns/vfmacc_vv.h | 11 + riscv/insns/vfmadd_vf.h | 11 + riscv/insns/vfmadd_vv.h | 11 + riscv/insns/vfmax_vf.h | 11 + riscv/insns/vfmax_vv.h | 11 + riscv/insns/vfmerge_vfm.h | 50 + riscv/insns/vfmin_vf.h | 11 + riscv/insns/vfmin_vv.h | 11 + riscv/insns/vfmsac_vf.h | 11 + riscv/insns/vfmsac_vv.h | 11 + riscv/insns/vfmsub_vf.h | 11 + riscv/insns/vfmsub_vv.h | 11 + riscv/insns/vfmul_vf.h | 11 + riscv/insns/vfmul_vv.h | 11 + riscv/insns/vfmv_f_s.h | 38 + riscv/insns/vfmv_s_f.h | 29 + riscv/insns/vfmv_v_f.h | 31 + riscv/insns/vfncvt_f_f_w.h | 23 + riscv/insns/vfncvt_f_x_w.h | 23 + riscv/insns/vfncvt_f_xu_w.h | 23 + riscv/insns/vfncvt_rod_f_f_w.h | 25 + riscv/insns/vfncvt_rtz_x_f_w.h | 24 + riscv/insns/vfncvt_rtz_xu_f_w.h | 24 + riscv/insns/vfncvt_x_f_w.h | 24 + riscv/insns/vfncvt_xu_f_w.h | 24 + riscv/insns/vfnmacc_vf.h | 11 + riscv/insns/vfnmacc_vv.h | 11 + riscv/insns/vfnmadd_vf.h | 11 + riscv/insns/vfnmadd_vv.h | 11 + riscv/insns/vfnmsac_vf.h | 11 + riscv/insns/vfnmsac_vv.h | 11 + riscv/insns/vfnmsub_vf.h | 11 + riscv/insns/vfnmsub_vv.h | 11 + riscv/insns/vfrdiv_vf.h | 11 + riscv/insns/vfrece7_v.h | 11 + riscv/insns/vfredmax_vs.h | 12 + riscv/insns/vfredmin_vs.h | 12 + riscv/insns/vfredosum_vs.h | 12 + riscv/insns/vfredsum_vs.h | 12 + riscv/insns/vfrsqrte7_v.h | 11 + riscv/insns/vfrsub_vf.h | 11 + riscv/insns/vfsgnj_vf.h | 11 + riscv/insns/vfsgnj_vv.h | 11 + riscv/insns/vfsgnjn_vf.h | 11 + riscv/insns/vfsgnjn_vv.h | 11 + riscv/insns/vfsgnjx_vf.h | 11 + riscv/insns/vfsgnjx_vv.h | 11 + riscv/insns/vfslide1down_vf.h | 36 + riscv/insns/vfslide1up_vf.h | 36 + riscv/insns/vfsqrt_v.h | 11 + riscv/insns/vfsub_vf.h | 11 + riscv/insns/vfsub_vv.h | 11 + riscv/insns/vfwadd_vf.h | 8 + riscv/insns/vfwadd_vv.h | 8 + riscv/insns/vfwadd_wf.h | 8 + riscv/insns/vfwadd_wv.h | 8 + riscv/insns/vfwcvt_f_f_v.h | 23 + riscv/insns/vfwcvt_f_x_v.h | 24 + riscv/insns/vfwcvt_f_xu_v.h | 24 + riscv/insns/vfwcvt_rtz_x_f_v.h | 23 + riscv/insns/vfwcvt_rtz_xu_f_v.h | 23 + riscv/insns/vfwcvt_x_f_v.h | 23 + riscv/insns/vfwcvt_xu_f_v.h | 23 + riscv/insns/vfwmacc_vf.h | 8 + riscv/insns/vfwmacc_vv.h | 8 + riscv/insns/vfwmsac_vf.h | 8 + riscv/insns/vfwmsac_vv.h | 8 + riscv/insns/vfwmul_vf.h | 8 + riscv/insns/vfwmul_vv.h | 8 + riscv/insns/vfwnmacc_vf.h | 8 + riscv/insns/vfwnmacc_vv.h | 8 + riscv/insns/vfwnmsac_vf.h | 8 + riscv/insns/vfwnmsac_vv.h | 8 + riscv/insns/vfwredosum_vs.h | 9 + riscv/insns/vfwredsum_vs.h | 9 + riscv/insns/vfwsub_vf.h | 8 + riscv/insns/vfwsub_vv.h | 8 + riscv/insns/vfwsub_wf.h | 8 + riscv/insns/vfwsub_wv.h | 8 + riscv/insns/vid_v.h | 31 + riscv/insns/viota_m.h | 53 + riscv/insns/vl1re16_v.h | 2 + riscv/insns/vl1re32_v.h | 2 + riscv/insns/vl1re64_v.h | 2 + riscv/insns/vl1re8_v.h | 2 + riscv/insns/vl2re16_v.h | 2 + riscv/insns/vl2re32_v.h | 2 + riscv/insns/vl2re64_v.h | 2 + riscv/insns/vl2re8_v.h | 2 + riscv/insns/vl4re16_v.h | 2 + riscv/insns/vl4re32_v.h | 2 + riscv/insns/vl4re64_v.h | 2 + riscv/insns/vl4re8_v.h | 2 + riscv/insns/vl8re16_v.h | 2 + riscv/insns/vl8re32_v.h | 2 + riscv/insns/vl8re64_v.h | 2 + riscv/insns/vl8re8_v.h | 2 + riscv/insns/vle16_v.h | 2 + riscv/insns/vle16ff_v.h | 2 + riscv/insns/vle32_v.h | 2 + riscv/insns/vle32ff_v.h | 2 + riscv/insns/vle64_v.h | 2 + riscv/insns/vle64ff_v.h | 2 + riscv/insns/vle8_v.h | 2 + riscv/insns/vle8ff_v.h | 2 + riscv/insns/vlse16_v.h | 2 + riscv/insns/vlse32_v.h | 2 + riscv/insns/vlse64_v.h | 2 + riscv/insns/vlse8_v.h | 2 + riscv/insns/vlxei16_v.h | 2 + riscv/insns/vlxei32_v.h | 2 + riscv/insns/vlxei64_v.h | 3 + riscv/insns/vlxei8_v.h | 2 + riscv/insns/vmacc_vv.h | 5 + riscv/insns/vmacc_vx.h | 5 + riscv/insns/vmadc_vim.h | 13 + riscv/insns/vmadc_vvm.h | 13 + riscv/insns/vmadc_vxm.h | 13 + riscv/insns/vmadd_vv.h | 5 + riscv/insns/vmadd_vx.h | 5 + riscv/insns/vmand_mm.h | 2 + riscv/insns/vmandnot_mm.h | 2 + riscv/insns/vmax_vv.h | 10 + riscv/insns/vmax_vx.h | 10 + riscv/insns/vmaxu_vv.h | 9 + riscv/insns/vmaxu_vx.h | 9 + riscv/insns/vmerge_vim.h | 11 + riscv/insns/vmerge_vvm.h | 11 + riscv/insns/vmerge_vxm.h | 11 + riscv/insns/vmfeq_vf.h | 12 + riscv/insns/vmfeq_vv.h | 12 + riscv/insns/vmfge_vf.h | 12 + riscv/insns/vmfgt_vf.h | 12 + riscv/insns/vmfle_vf.h | 12 + riscv/insns/vmfle_vv.h | 12 + riscv/insns/vmflt_vf.h | 12 + riscv/insns/vmflt_vv.h | 12 + riscv/insns/vmfne_vf.h | 12 + riscv/insns/vmfne_vv.h | 12 + riscv/insns/vmin_vv.h | 11 + riscv/insns/vmin_vx.h | 11 + riscv/insns/vminu_vv.h | 9 + riscv/insns/vminu_vx.h | 10 + riscv/insns/vmnand_mm.h | 2 + riscv/insns/vmnor_mm.h | 2 + riscv/insns/vmor_mm.h | 2 + riscv/insns/vmornot_mm.h | 2 + riscv/insns/vmsbc_vvm.h | 13 + riscv/insns/vmsbc_vxm.h | 13 + riscv/insns/vmsbf_m.h | 32 + riscv/insns/vmseq_vi.h | 5 + riscv/insns/vmseq_vv.h | 6 + riscv/insns/vmseq_vx.h | 5 + riscv/insns/vmsgt_vi.h | 5 + riscv/insns/vmsgt_vx.h | 5 + riscv/insns/vmsgtu_vi.h | 5 + riscv/insns/vmsgtu_vx.h | 5 + riscv/insns/vmsif_m.h | 32 + riscv/insns/vmsle_vi.h | 5 + riscv/insns/vmsle_vv.h | 5 + riscv/insns/vmsle_vx.h | 5 + riscv/insns/vmsleu_vi.h | 5 + riscv/insns/vmsleu_vv.h | 5 + riscv/insns/vmsleu_vx.h | 5 + riscv/insns/vmslt_vv.h | 5 + riscv/insns/vmslt_vx.h | 5 + riscv/insns/vmsltu_vv.h | 5 + riscv/insns/vmsltu_vx.h | 5 + riscv/insns/vmsne_vi.h | 5 + riscv/insns/vmsne_vv.h | 5 + riscv/insns/vmsne_vx.h | 5 + riscv/insns/vmsof_m.h | 30 + riscv/insns/vmul_vv.h | 5 + riscv/insns/vmul_vx.h | 5 + riscv/insns/vmulh_vv.h | 5 + riscv/insns/vmulh_vx.h | 5 + riscv/insns/vmulhsu_vv.h | 38 + riscv/insns/vmulhsu_vx.h | 38 + riscv/insns/vmulhu_vv.h | 5 + riscv/insns/vmulhu_vx.h | 5 + riscv/insns/vmv1r_v.h | 2 + riscv/insns/vmv2r_v.h | 2 + riscv/insns/vmv4r_v.h | 2 + riscv/insns/vmv8r_v.h | 2 + riscv/insns/vmv_s_x.h | 29 + riscv/insns/vmv_v_i.h | 7 + riscv/insns/vmv_v_v.h | 7 + riscv/insns/vmv_v_x.h | 7 + riscv/insns/vmv_x_s.h | 31 + riscv/insns/vmvnfr_v.h | 27 + riscv/insns/vmxnor_mm.h | 2 + riscv/insns/vmxor_mm.h | 2 + riscv/insns/vnclip_wi.h | 25 + riscv/insns/vnclip_wv.h | 25 + riscv/insns/vnclip_wx.h | 25 + riscv/insns/vnclipu_wi.h | 23 + riscv/insns/vnclipu_wv.h | 22 + riscv/insns/vnclipu_wx.h | 22 + riscv/insns/vnmsac_vv.h | 5 + riscv/insns/vnmsac_vx.h | 5 + riscv/insns/vnmsub_vv.h | 5 + riscv/insns/vnmsub_vx.h | 5 + riscv/insns/vnsra_wi.h | 5 + riscv/insns/vnsra_wv.h | 5 + riscv/insns/vnsra_wx.h | 5 + riscv/insns/vnsrl_wi.h | 5 + riscv/insns/vnsrl_wv.h | 5 + riscv/insns/vnsrl_wx.h | 5 + riscv/insns/vor_vi.h | 5 + riscv/insns/vor_vv.h | 5 + riscv/insns/vor_vx.h | 5 + riscv/insns/vpopc_m.h | 23 + riscv/insns/vredand_vs.h | 5 + riscv/insns/vredmax_vs.h | 5 + riscv/insns/vredmaxu_vs.h | 5 + riscv/insns/vredmin_vs.h | 5 + riscv/insns/vredminu_vs.h | 5 + riscv/insns/vredor_vs.h | 5 + riscv/insns/vredsum_vs.h | 5 + riscv/insns/vredxor_vs.h | 5 + riscv/insns/vrem_vv.h | 11 + riscv/insns/vrem_vx.h | 10 + riscv/insns/vremu_vv.h | 8 + riscv/insns/vremu_vx.h | 8 + riscv/insns/vrgather_vi.h | 30 + riscv/insns/vrgather_vv.h | 32 + riscv/insns/vrgather_vx.h | 24 + riscv/insns/vrgatherei16_vv.h | 34 + riscv/insns/vrsub_vi.h | 5 + riscv/insns/vrsub_vx.h | 5 + riscv/insns/vs1r_v.h | 2 + riscv/insns/vs2r_v.h | 2 + riscv/insns/vs4r_v.h | 2 + riscv/insns/vs8r_v.h | 2 + riscv/insns/vsadd_vi.h | 28 + riscv/insns/vsadd_vv.h | 29 + riscv/insns/vsadd_vx.h | 28 + riscv/insns/vsaddu_vi.h | 11 + riscv/insns/vsaddu_vv.h | 11 + riscv/insns/vsaddu_vx.h | 12 + riscv/insns/vsbc_vvm.h | 10 + riscv/insns/vsbc_vxm.h | 10 + riscv/insns/vse16_v.h | 2 + riscv/insns/vse32_v.h | 2 + riscv/insns/vse64_v.h | 2 + riscv/insns/vse8_v.h | 2 + riscv/insns/vsetvl.h | 2 + riscv/insns/vsetvli.h | 2 + riscv/insns/vsext_vf2.h | 1 + riscv/insns/vsext_vf4.h | 1 + riscv/insns/vsext_vf8.h | 1 + riscv/insns/vslide1down_vx.h | 44 + riscv/insns/vslide1up_vx.h | 30 + riscv/insns/vslidedown_vi.h | 36 + riscv/insns/vslidedown_vx.h | 36 + riscv/insns/vslideup_vi.h | 31 + riscv/insns/vslideup_vx.h | 31 + riscv/insns/vsll_vi.h | 5 + riscv/insns/vsll_vv.h | 5 + riscv/insns/vsll_vx.h | 5 + riscv/insns/vsmul_vv.h | 32 + riscv/insns/vsmul_vx.h | 33 + riscv/insns/vsra_vi.h | 5 + riscv/insns/vsra_vv.h | 5 + riscv/insns/vsra_vx.h | 5 + riscv/insns/vsrl_vi.h | 5 + riscv/insns/vsrl_vv.h | 5 + riscv/insns/vsrl_vx.h | 5 + riscv/insns/vsse16_v.h | 2 + riscv/insns/vsse32_v.h | 2 + riscv/insns/vsse64_v.h | 2 + riscv/insns/vsse8_v.h | 2 + riscv/insns/vssra_vi.h | 10 + riscv/insns/vssra_vv.h | 10 + riscv/insns/vssra_vx.h | 10 + riscv/insns/vssrl_vi.h | 10 + riscv/insns/vssrl_vv.h | 10 + riscv/insns/vssrl_vx.h | 10 + riscv/insns/vssub_vv.h | 29 + riscv/insns/vssub_vx.h | 29 + riscv/insns/vssubu_vv.h | 30 + riscv/insns/vssubu_vx.h | 29 + riscv/insns/vsub_vv.h | 5 + riscv/insns/vsub_vx.h | 5 + riscv/insns/vsuxei16_v.h | 2 + riscv/insns/vsuxei32_v.h | 2 + riscv/insns/vsuxei64_v.h | 2 + riscv/insns/vsuxei8_v.h | 2 + riscv/insns/vsxei16_v.h | 2 + riscv/insns/vsxei32_v.h | 2 + riscv/insns/vsxei64_v.h | 2 + riscv/insns/vsxei8_v.h | 2 + riscv/insns/vwadd_vv.h | 6 + riscv/insns/vwadd_vx.h | 6 + riscv/insns/vwadd_wv.h | 6 + riscv/insns/vwadd_wx.h | 6 + riscv/insns/vwaddu_vv.h | 6 + riscv/insns/vwaddu_vx.h | 6 + riscv/insns/vwaddu_wv.h | 6 + riscv/insns/vwaddu_wx.h | 6 + riscv/insns/vwmacc_vv.h | 6 + riscv/insns/vwmacc_vx.h | 6 + riscv/insns/vwmaccsu_vv.h | 6 + riscv/insns/vwmaccsu_vx.h | 6 + riscv/insns/vwmaccu_vv.h | 6 + riscv/insns/vwmaccu_vx.h | 6 + riscv/insns/vwmaccus_vx.h | 6 + riscv/insns/vwmul_vv.h | 6 + riscv/insns/vwmul_vx.h | 6 + riscv/insns/vwmulsu_vv.h | 16 + riscv/insns/vwmulsu_vx.h | 16 + riscv/insns/vwmulu_vv.h | 6 + riscv/insns/vwmulu_vx.h | 6 + riscv/insns/vwredsum_vs.h | 5 + riscv/insns/vwredsumu_vs.h | 5 + riscv/insns/vwsub_vv.h | 6 + riscv/insns/vwsub_vx.h | 6 + riscv/insns/vwsub_wv.h | 6 + riscv/insns/vwsub_wx.h | 6 + riscv/insns/vwsubu_vv.h | 6 + riscv/insns/vwsubu_vx.h | 6 + riscv/insns/vwsubu_wv.h | 6 + riscv/insns/vwsubu_wx.h | 6 + riscv/insns/vxor_vi.h | 5 + riscv/insns/vxor_vv.h | 5 + riscv/insns/vxor_vx.h | 5 + riscv/insns/vzext_vf2.h | 1 + riscv/insns/vzext_vf4.h | 1 + riscv/insns/vzext_vf8.h | 1 + riscv/insns/wfi.h | 11 +- riscv/interactive.cc | 63 +- riscv/jtag_dtm.cc | 87 +- riscv/jtag_dtm.h | 9 +- riscv/log_file.h | 37 + riscv/mmio_plugin.h | 91 + riscv/mmu.cc | 205 +- riscv/mmu.h | 210 +- riscv/mulhi.h | 43 - riscv/opcodes.h | 7 +- riscv/processor.cc | 1331 ++++++++-- riscv/processor.h | 229 +- riscv/remote_bitbang.cc | 7 + riscv/riscv.ac | 29 +- riscv/riscv.mk.in | 953 +++++++- riscv/sim.cc | 180 +- riscv/sim.h | 49 +- riscv/simif.h | 3 + riscv/trap.h | 44 +- riscv/ust_tracer.cc | 74 - riscv/ust_tracer.h | 19 - scripts/vcs-version.sh | 2 +- softfloat/f16_classify.c | 36 + softfloat/f16_to_i16.c | 57 + softfloat/f16_to_i8.c | 57 + softfloat/f16_to_ui16.c | 54 + softfloat/f16_to_ui8.c | 54 + softfloat/f32_to_i16.c | 57 + softfloat/f32_to_ui16.c | 53 + softfloat/fall_maxmin.c | 81 + softfloat/fall_reciprocal.c | 392 +++ softfloat/platform.h | 4 + softfloat/softfloat.h | 19 + softfloat/softfloat.mk.in | 13 + softfloat/specialize.h | 14 + {spike_main => spike_dasm}/spike-dasm.cc | 27 +- spike_dasm/spike_dasm.ac | 0 spike_dasm/spike_dasm.mk.in | 9 + spike_dasm/spike_dasm_option_parser.cc | 1 + spike_main/disasm.cc | 648 ----- spike_main/spike-log-parser.cc | 60 + spike_main/spike.cc | 279 ++- spike_main/spike_main.mk.in | 5 +- 786 files changed, 26171 insertions(+), 3657 deletions(-) create mode 100644 ChangeLog.md create mode 100644 VERSION create mode 100644 ax_append_flag.m4 create mode 100644 ax_append_link_flags.m4 create mode 100644 ax_check_link_flag.m4 create mode 100644 ax_require_defined.m4 create mode 100755 ci-tests/test-spike create mode 100644 customext/cflush.cc rename dummy_rocc/dummy_rocc.ac => customext/customext.ac (100%) create mode 100644 customext/customext.mk.in rename {dummy_rocc => customext}/dummy_rocc.cc (100%) rename {dummy_rocc => customext}/dummy_rocc_test.c (100%) create mode 100644 disasm/disasm.ac create mode 100644 disasm/disasm.cc create mode 100644 disasm/disasm.mk.in rename {riscv => disasm}/regnames.cc (75%) delete mode 100644 dummy_rocc/dummy_rocc.mk.in create mode 100644 fdt/fdt.ac create mode 100644 fdt/fdt.c create mode 100644 fdt/fdt.h create mode 100644 fdt/fdt.mk.in create mode 100644 fdt/fdt_addresses.c create mode 100644 fdt/fdt_empty_tree.c create mode 100644 fdt/fdt_overlay.c create mode 100644 fdt/fdt_ro.c create mode 100644 fdt/fdt_rw.c create mode 100644 fdt/fdt_strerror.c create mode 100644 fdt/fdt_sw.c create mode 100644 fdt/fdt_wip.c create mode 100644 fdt/libfdt.h create mode 100644 fdt/libfdt_env.h create mode 100644 fdt/libfdt_internal.h create mode 100644 fesvr/context.cc create mode 100644 fesvr/context.h create mode 100644 fesvr/debug_defines.h create mode 100644 fesvr/device.cc create mode 100644 fesvr/device.h create mode 100644 fesvr/dtm.cc create mode 100644 fesvr/dtm.h create mode 100644 fesvr/dummy.cc create mode 100644 fesvr/elf.h create mode 100644 fesvr/elf2hex.cc create mode 100644 fesvr/elfloader.cc create mode 100644 fesvr/elfloader.h create mode 100644 fesvr/fesvr.ac create mode 100644 fesvr/fesvr.mk.in create mode 100644 fesvr/fesvr.pc.in create mode 100644 fesvr/htif.cc create mode 100644 fesvr/htif.h create mode 100644 fesvr/htif_hexwriter.cc create mode 100644 fesvr/htif_hexwriter.h create mode 100644 fesvr/htif_pthread.cc create mode 100644 fesvr/htif_pthread.h create mode 100644 fesvr/memif.cc create mode 100644 fesvr/memif.h create mode 100644 fesvr/option_parser.cc create mode 100644 fesvr/option_parser.h create mode 100644 fesvr/rfb.cc create mode 100644 fesvr/rfb.h create mode 100644 fesvr/syscall.cc create mode 100644 fesvr/syscall.h create mode 100644 fesvr/term.cc create mode 100644 fesvr/term.h create mode 100644 fesvr/tsi.cc create mode 100644 fesvr/tsi.h create mode 100644 riscv-disasm.pc.in delete mode 100644 riscv-dummy_rocc.pc.in create mode 100644 riscv-fesvr.pc.in delete mode 100644 riscv-riscv.pc.in delete mode 100644 riscv-softfloat.pc.in delete mode 100644 riscv-spike.pc.in delete mode 100644 riscv-spike_main.pc.in create mode 100644 riscv/arith.h create mode 100644 riscv/byteorder.h mode change 100644 => 120000 riscv/encoding.h create mode 100644 riscv/insns/fadd_h.h create mode 100644 riscv/insns/fclass_h.h create mode 100644 riscv/insns/fcvt_d_h.h create mode 100644 riscv/insns/fcvt_h_d.h create mode 100644 riscv/insns/fcvt_h_l.h create mode 100644 riscv/insns/fcvt_h_lu.h create mode 100644 riscv/insns/fcvt_h_q.h create mode 100644 riscv/insns/fcvt_h_s.h create mode 100644 riscv/insns/fcvt_h_w.h create mode 100644 riscv/insns/fcvt_h_wu.h create mode 100644 riscv/insns/fcvt_l_h.h create mode 100644 riscv/insns/fcvt_lu_h.h create mode 100644 riscv/insns/fcvt_q_h.h create mode 100644 riscv/insns/fcvt_s_h.h create mode 100644 riscv/insns/fcvt_w_h.h create mode 100644 riscv/insns/fcvt_wu_h.h create mode 100644 riscv/insns/fdiv_h.h create mode 100644 riscv/insns/feq_h.h create mode 100644 riscv/insns/fle_h.h create mode 100644 riscv/insns/flh.h create mode 100644 riscv/insns/flt_h.h create mode 100644 riscv/insns/fmadd_h.h create mode 100644 riscv/insns/fmax_h.h create mode 100644 riscv/insns/fmin_h.h create mode 100644 riscv/insns/fmsub_h.h create mode 100644 riscv/insns/fmul_h.h create mode 100644 riscv/insns/fmv_h_x.h create mode 100644 riscv/insns/fmv_x_h.h create mode 100644 riscv/insns/fnmadd_h.h create mode 100644 riscv/insns/fnmsub_h.h create mode 100644 riscv/insns/fsgnj_h.h create mode 100644 riscv/insns/fsgnjn_h.h create mode 100644 riscv/insns/fsgnjx_h.h create mode 100644 riscv/insns/fsh.h create mode 100644 riscv/insns/fsqrt_h.h create mode 100644 riscv/insns/fsub_h.h create mode 100644 riscv/insns/hfence_gvma.h create mode 100644 riscv/insns/hfence_vvma.h create mode 100644 riscv/insns/hlv_b.h create mode 100644 riscv/insns/hlv_bu.h create mode 100644 riscv/insns/hlv_d.h create mode 100644 riscv/insns/hlv_h.h create mode 100644 riscv/insns/hlv_hu.h create mode 100644 riscv/insns/hlv_w.h create mode 100644 riscv/insns/hlv_wu.h create mode 100644 riscv/insns/hlvx_hu.h create mode 100644 riscv/insns/hlvx_wu.h create mode 100644 riscv/insns/hsv_b.h create mode 100644 riscv/insns/hsv_d.h create mode 100644 riscv/insns/hsv_h.h create mode 100644 riscv/insns/hsv_w.h create mode 100644 riscv/insns/p_abs.h create mode 100644 riscv/insns/p_beqimm.h create mode 100644 riscv/insns/p_bneimm.h create mode 100644 riscv/insns/p_clip.h create mode 100644 riscv/insns/p_clipr.h create mode 100644 riscv/insns/p_clipu.h create mode 100644 riscv/insns/p_clipur.h create mode 100644 riscv/insns/p_extbs.h create mode 100644 riscv/insns/p_extbz.h create mode 100644 riscv/insns/p_exths.h create mode 100644 riscv/insns/p_exthz.h create mode 100644 riscv/insns/p_lb_irpost.h create mode 100644 riscv/insns/p_lb_rr.h create mode 100644 riscv/insns/p_lb_rrpost.h create mode 100644 riscv/insns/p_lbu_irpost.h create mode 100644 riscv/insns/p_lbu_rr.h create mode 100644 riscv/insns/p_lbu_rrpost.h create mode 100644 riscv/insns/p_lh_irpost.h create mode 100644 riscv/insns/p_lh_rr.h create mode 100644 riscv/insns/p_lh_rrpost.h create mode 100644 riscv/insns/p_lhu_irpost.h create mode 100644 riscv/insns/p_lhu_rr.h create mode 100644 riscv/insns/p_lhu_rrpost.h create mode 100644 riscv/insns/p_lw_irpost.h create mode 100644 riscv/insns/p_lw_rr.h create mode 100644 riscv/insns/p_lw_rrpost.h create mode 100644 riscv/insns/p_mac.h create mode 100644 riscv/insns/p_max.h create mode 100644 riscv/insns/p_maxu.h create mode 100644 riscv/insns/p_min.h create mode 100644 riscv/insns/p_minu.h create mode 100644 riscv/insns/p_msu.h create mode 100644 riscv/insns/p_sb_irpost.h create mode 100644 riscv/insns/p_sb_rr.h create mode 100644 riscv/insns/p_sb_rrpost.h create mode 100644 riscv/insns/p_sh_irpost.h create mode 100644 riscv/insns/p_sh_rr.h create mode 100644 riscv/insns/p_sh_rrpost.h create mode 100644 riscv/insns/p_slet.h create mode 100644 riscv/insns/p_sletu.h create mode 100644 riscv/insns/p_sw_irpost.h create mode 100644 riscv/insns/p_sw_rr.h create mode 100644 riscv/insns/p_sw_rrpost.h create mode 100644 riscv/insns/pv_abs_b.h create mode 100644 riscv/insns/pv_abs_h.h create mode 100644 riscv/insns/pv_add_b.h create mode 100644 riscv/insns/pv_add_h.h create mode 100644 riscv/insns/pv_add_sc_b.h create mode 100644 riscv/insns/pv_add_sc_h.h create mode 100644 riscv/insns/pv_add_sci_b.h create mode 100644 riscv/insns/pv_add_sci_h.h create mode 100644 riscv/insns/pv_and_b.h create mode 100644 riscv/insns/pv_and_h.h create mode 100644 riscv/insns/pv_and_sc_b.h create mode 100644 riscv/insns/pv_and_sc_h.h create mode 100644 riscv/insns/pv_and_sci_b.h create mode 100644 riscv/insns/pv_and_sci_h.h create mode 100644 riscv/insns/pv_avg_b.h create mode 100644 riscv/insns/pv_avg_h.h create mode 100644 riscv/insns/pv_avg_sc_b.h create mode 100644 riscv/insns/pv_avg_sc_h.h create mode 100644 riscv/insns/pv_avg_sci_b.h create mode 100644 riscv/insns/pv_avg_sci_h.h create mode 100644 riscv/insns/pv_avgu_b.h create mode 100644 riscv/insns/pv_avgu_h.h create mode 100644 riscv/insns/pv_avgu_sc_b.h create mode 100644 riscv/insns/pv_avgu_sc_h.h create mode 100644 riscv/insns/pv_avgu_sci_b.h create mode 100644 riscv/insns/pv_avgu_sci_h.h create mode 100644 riscv/insns/pv_dotsp_b.h create mode 100644 riscv/insns/pv_dotsp_h.h create mode 100644 riscv/insns/pv_dotsp_sc_b.h create mode 100644 riscv/insns/pv_dotsp_sc_h.h create mode 100644 riscv/insns/pv_dotsp_sci_b.h create mode 100644 riscv/insns/pv_dotsp_sci_h.h create mode 100644 riscv/insns/pv_dotup_b.h create mode 100644 riscv/insns/pv_dotup_h.h create mode 100644 riscv/insns/pv_dotup_sc_b.h create mode 100644 riscv/insns/pv_dotup_sc_h.h create mode 100644 riscv/insns/pv_dotup_sci_b.h create mode 100644 riscv/insns/pv_dotup_sci_h.h create mode 100644 riscv/insns/pv_dotusp_b.h create mode 100644 riscv/insns/pv_dotusp_h.h create mode 100644 riscv/insns/pv_dotusp_sc_b.h create mode 100644 riscv/insns/pv_dotusp_sc_h.h create mode 100644 riscv/insns/pv_dotusp_sci_b.h create mode 100644 riscv/insns/pv_dotusp_sci_h.h create mode 100644 riscv/insns/pv_extract_b.h create mode 100644 riscv/insns/pv_extract_h.h create mode 100644 riscv/insns/pv_extractu_b.h create mode 100644 riscv/insns/pv_extractu_h.h create mode 100644 riscv/insns/pv_insert_b.h create mode 100644 riscv/insns/pv_insert_h.h create mode 100644 riscv/insns/pv_max_b.h create mode 100644 riscv/insns/pv_max_h.h create mode 100644 riscv/insns/pv_max_sc_b.h create mode 100644 riscv/insns/pv_max_sc_h.h create mode 100644 riscv/insns/pv_max_sci_b.h create mode 100644 riscv/insns/pv_max_sci_h.h create mode 100644 riscv/insns/pv_maxu_b.h create mode 100644 riscv/insns/pv_maxu_h.h create mode 100644 riscv/insns/pv_maxu_sc_b.h create mode 100644 riscv/insns/pv_maxu_sc_h.h create mode 100644 riscv/insns/pv_maxu_sci_b.h create mode 100644 riscv/insns/pv_maxu_sci_h.h create mode 100644 riscv/insns/pv_min_b.h create mode 100644 riscv/insns/pv_min_h.h create mode 100644 riscv/insns/pv_min_sc_b.h create mode 100644 riscv/insns/pv_min_sc_h.h create mode 100644 riscv/insns/pv_min_sci_b.h create mode 100644 riscv/insns/pv_min_sci_h.h create mode 100644 riscv/insns/pv_minu_b.h create mode 100644 riscv/insns/pv_minu_h.h create mode 100644 riscv/insns/pv_minu_sc_b.h create mode 100644 riscv/insns/pv_minu_sc_h.h create mode 100644 riscv/insns/pv_minu_sci_b.h create mode 100644 riscv/insns/pv_minu_sci_h.h create mode 100644 riscv/insns/pv_or_b.h create mode 100644 riscv/insns/pv_or_h.h create mode 100644 riscv/insns/pv_or_sc_b.h create mode 100644 riscv/insns/pv_or_sc_h.h create mode 100644 riscv/insns/pv_or_sci_b.h create mode 100644 riscv/insns/pv_or_sci_h.h create mode 100644 riscv/insns/pv_sdotsp_b.h create mode 100644 riscv/insns/pv_sdotsp_h.h create mode 100644 riscv/insns/pv_sdotsp_sc_b.h create mode 100644 riscv/insns/pv_sdotsp_sc_h.h create mode 100644 riscv/insns/pv_sdotsp_sci_b.h create mode 100644 riscv/insns/pv_sdotsp_sci_h.h create mode 100644 riscv/insns/pv_sdotup_b.h create mode 100644 riscv/insns/pv_sdotup_h.h create mode 100644 riscv/insns/pv_sdotup_sc_b.h create mode 100644 riscv/insns/pv_sdotup_sc_h.h create mode 100644 riscv/insns/pv_sdotup_sci_b.h create mode 100644 riscv/insns/pv_sdotup_sci_h.h create mode 100644 riscv/insns/pv_sdotusp_b.h create mode 100644 riscv/insns/pv_sdotusp_h.h create mode 100644 riscv/insns/pv_sdotusp_sc_b.h create mode 100644 riscv/insns/pv_sdotusp_sc_h.h create mode 100644 riscv/insns/pv_sdotusp_sci_b.h create mode 100644 riscv/insns/pv_sdotusp_sci_h.h create mode 100644 riscv/insns/pv_shuffle2_b.h create mode 100644 riscv/insns/pv_shuffle2_h.h create mode 100644 riscv/insns/pv_sll_b.h create mode 100644 riscv/insns/pv_sll_h.h create mode 100644 riscv/insns/pv_sll_sc_b.h create mode 100644 riscv/insns/pv_sll_sc_h.h create mode 100644 riscv/insns/pv_sll_sci_b.h create mode 100644 riscv/insns/pv_sll_sci_h.h create mode 100644 riscv/insns/pv_sra_b.h create mode 100644 riscv/insns/pv_sra_h.h create mode 100644 riscv/insns/pv_sra_sc_b.h create mode 100644 riscv/insns/pv_sra_sc_h.h create mode 100644 riscv/insns/pv_sra_sci_b.h create mode 100644 riscv/insns/pv_sra_sci_h.h create mode 100644 riscv/insns/pv_srl_b.h create mode 100644 riscv/insns/pv_srl_h.h create mode 100644 riscv/insns/pv_srl_sc_b.h create mode 100644 riscv/insns/pv_srl_sc_h.h create mode 100644 riscv/insns/pv_srl_sci_b.h create mode 100644 riscv/insns/pv_srl_sci_h.h create mode 100644 riscv/insns/pv_sub_b.h create mode 100644 riscv/insns/pv_sub_h.h create mode 100644 riscv/insns/pv_sub_sc_b.h create mode 100644 riscv/insns/pv_sub_sc_h.h create mode 100644 riscv/insns/pv_sub_sci_b.h create mode 100644 riscv/insns/pv_sub_sci_h.h create mode 100644 riscv/insns/pv_xor_b.h create mode 100644 riscv/insns/pv_xor_h.h create mode 100644 riscv/insns/pv_xor_sc_b.h create mode 100644 riscv/insns/pv_xor_sc_h.h create mode 100644 riscv/insns/pv_xor_sci_b.h create mode 100644 riscv/insns/pv_xor_sci_h.h create mode 100644 riscv/insns/vaadd_vv.h create mode 100644 riscv/insns/vaadd_vx.h create mode 100644 riscv/insns/vaaddu_vv.h create mode 100644 riscv/insns/vaaddu_vx.h create mode 100644 riscv/insns/vadc_vim.h create mode 100644 riscv/insns/vadc_vvm.h create mode 100644 riscv/insns/vadc_vxm.h create mode 100644 riscv/insns/vadd_vi.h create mode 100644 riscv/insns/vadd_vv.h create mode 100644 riscv/insns/vadd_vx.h create mode 100644 riscv/insns/vamoaddei16_v.h create mode 100644 riscv/insns/vamoaddei32_v.h create mode 100644 riscv/insns/vamoaddei64_v.h create mode 100644 riscv/insns/vamoaddei8_v.h create mode 100644 riscv/insns/vamoandei16_v.h create mode 100644 riscv/insns/vamoandei32_v.h create mode 100644 riscv/insns/vamoandei64_v.h create mode 100644 riscv/insns/vamoandei8_v.h create mode 100644 riscv/insns/vamomaxei16_v.h create mode 100644 riscv/insns/vamomaxei32_v.h create mode 100644 riscv/insns/vamomaxei64_v.h create mode 100644 riscv/insns/vamomaxei8_v.h create mode 100644 riscv/insns/vamomaxuei16_v.h create mode 100644 riscv/insns/vamomaxuei32_v.h create mode 100644 riscv/insns/vamomaxuei64_v.h create mode 100644 riscv/insns/vamomaxuei8_v.h create mode 100644 riscv/insns/vamominei16_v.h create mode 100644 riscv/insns/vamominei32_v.h create mode 100644 riscv/insns/vamominei64_v.h create mode 100644 riscv/insns/vamominei8_v.h create mode 100644 riscv/insns/vamominuei16_v.h create mode 100644 riscv/insns/vamominuei32_v.h create mode 100644 riscv/insns/vamominuei64_v.h create mode 100644 riscv/insns/vamominuei8_v.h create mode 100644 riscv/insns/vamoorei16_v.h create mode 100644 riscv/insns/vamoorei32_v.h create mode 100644 riscv/insns/vamoorei64_v.h create mode 100644 riscv/insns/vamoorei8_v.h create mode 100644 riscv/insns/vamoswapei16_v.h create mode 100644 riscv/insns/vamoswapei32_v.h create mode 100644 riscv/insns/vamoswapei64_v.h create mode 100644 riscv/insns/vamoswapei8_v.h create mode 100644 riscv/insns/vamoxorei16_v.h create mode 100644 riscv/insns/vamoxorei32_v.h create mode 100644 riscv/insns/vamoxorei64_v.h create mode 100644 riscv/insns/vamoxorei8_v.h create mode 100644 riscv/insns/vand_vi.h create mode 100644 riscv/insns/vand_vv.h create mode 100644 riscv/insns/vand_vx.h create mode 100644 riscv/insns/vasub_vv.h create mode 100644 riscv/insns/vasub_vx.h create mode 100644 riscv/insns/vasubu_vv.h create mode 100644 riscv/insns/vasubu_vx.h create mode 100644 riscv/insns/vcompress_vm.h create mode 100644 riscv/insns/vdiv_vv.h create mode 100644 riscv/insns/vdiv_vx.h create mode 100644 riscv/insns/vdivu_vv.h create mode 100644 riscv/insns/vdivu_vx.h create mode 100644 riscv/insns/vdot_vv.h create mode 100644 riscv/insns/vdotu_vv.h create mode 100644 riscv/insns/vfadd_vf.h create mode 100644 riscv/insns/vfadd_vv.h create mode 100644 riscv/insns/vfclass_v.h create mode 100644 riscv/insns/vfcvt_f_x_v.h create mode 100644 riscv/insns/vfcvt_f_xu_v.h create mode 100644 riscv/insns/vfcvt_rtz_x_f_v.h create mode 100644 riscv/insns/vfcvt_rtz_xu_f_v.h create mode 100644 riscv/insns/vfcvt_x_f_v.h create mode 100644 riscv/insns/vfcvt_xu_f_v.h create mode 100644 riscv/insns/vfdiv_vf.h create mode 100644 riscv/insns/vfdiv_vv.h create mode 100644 riscv/insns/vfdot_vv.h create mode 100644 riscv/insns/vfirst_m.h create mode 100644 riscv/insns/vfmacc_vf.h create mode 100644 riscv/insns/vfmacc_vv.h create mode 100644 riscv/insns/vfmadd_vf.h create mode 100644 riscv/insns/vfmadd_vv.h create mode 100644 riscv/insns/vfmax_vf.h create mode 100644 riscv/insns/vfmax_vv.h create mode 100644 riscv/insns/vfmerge_vfm.h create mode 100644 riscv/insns/vfmin_vf.h create mode 100644 riscv/insns/vfmin_vv.h create mode 100644 riscv/insns/vfmsac_vf.h create mode 100644 riscv/insns/vfmsac_vv.h create mode 100644 riscv/insns/vfmsub_vf.h create mode 100644 riscv/insns/vfmsub_vv.h create mode 100644 riscv/insns/vfmul_vf.h create mode 100644 riscv/insns/vfmul_vv.h create mode 100644 riscv/insns/vfmv_f_s.h create mode 100644 riscv/insns/vfmv_s_f.h create mode 100644 riscv/insns/vfmv_v_f.h create mode 100644 riscv/insns/vfncvt_f_f_w.h create mode 100644 riscv/insns/vfncvt_f_x_w.h create mode 100644 riscv/insns/vfncvt_f_xu_w.h create mode 100644 riscv/insns/vfncvt_rod_f_f_w.h create mode 100644 riscv/insns/vfncvt_rtz_x_f_w.h create mode 100644 riscv/insns/vfncvt_rtz_xu_f_w.h create mode 100644 riscv/insns/vfncvt_x_f_w.h create mode 100644 riscv/insns/vfncvt_xu_f_w.h create mode 100644 riscv/insns/vfnmacc_vf.h create mode 100644 riscv/insns/vfnmacc_vv.h create mode 100644 riscv/insns/vfnmadd_vf.h create mode 100644 riscv/insns/vfnmadd_vv.h create mode 100644 riscv/insns/vfnmsac_vf.h create mode 100644 riscv/insns/vfnmsac_vv.h create mode 100644 riscv/insns/vfnmsub_vf.h create mode 100644 riscv/insns/vfnmsub_vv.h create mode 100644 riscv/insns/vfrdiv_vf.h create mode 100644 riscv/insns/vfrece7_v.h create mode 100644 riscv/insns/vfredmax_vs.h create mode 100644 riscv/insns/vfredmin_vs.h create mode 100644 riscv/insns/vfredosum_vs.h create mode 100644 riscv/insns/vfredsum_vs.h create mode 100644 riscv/insns/vfrsqrte7_v.h create mode 100644 riscv/insns/vfrsub_vf.h create mode 100644 riscv/insns/vfsgnj_vf.h create mode 100644 riscv/insns/vfsgnj_vv.h create mode 100644 riscv/insns/vfsgnjn_vf.h create mode 100644 riscv/insns/vfsgnjn_vv.h create mode 100644 riscv/insns/vfsgnjx_vf.h create mode 100644 riscv/insns/vfsgnjx_vv.h create mode 100644 riscv/insns/vfslide1down_vf.h create mode 100644 riscv/insns/vfslide1up_vf.h create mode 100644 riscv/insns/vfsqrt_v.h create mode 100644 riscv/insns/vfsub_vf.h create mode 100644 riscv/insns/vfsub_vv.h create mode 100644 riscv/insns/vfwadd_vf.h create mode 100644 riscv/insns/vfwadd_vv.h create mode 100644 riscv/insns/vfwadd_wf.h create mode 100644 riscv/insns/vfwadd_wv.h create mode 100644 riscv/insns/vfwcvt_f_f_v.h create mode 100644 riscv/insns/vfwcvt_f_x_v.h create mode 100644 riscv/insns/vfwcvt_f_xu_v.h create mode 100644 riscv/insns/vfwcvt_rtz_x_f_v.h create mode 100644 riscv/insns/vfwcvt_rtz_xu_f_v.h create mode 100644 riscv/insns/vfwcvt_x_f_v.h create mode 100644 riscv/insns/vfwcvt_xu_f_v.h create mode 100644 riscv/insns/vfwmacc_vf.h create mode 100644 riscv/insns/vfwmacc_vv.h create mode 100644 riscv/insns/vfwmsac_vf.h create mode 100644 riscv/insns/vfwmsac_vv.h create mode 100644 riscv/insns/vfwmul_vf.h create mode 100644 riscv/insns/vfwmul_vv.h create mode 100644 riscv/insns/vfwnmacc_vf.h create mode 100644 riscv/insns/vfwnmacc_vv.h create mode 100644 riscv/insns/vfwnmsac_vf.h create mode 100644 riscv/insns/vfwnmsac_vv.h create mode 100644 riscv/insns/vfwredosum_vs.h create mode 100644 riscv/insns/vfwredsum_vs.h create mode 100644 riscv/insns/vfwsub_vf.h create mode 100644 riscv/insns/vfwsub_vv.h create mode 100644 riscv/insns/vfwsub_wf.h create mode 100644 riscv/insns/vfwsub_wv.h create mode 100644 riscv/insns/vid_v.h create mode 100644 riscv/insns/viota_m.h create mode 100644 riscv/insns/vl1re16_v.h create mode 100644 riscv/insns/vl1re32_v.h create mode 100644 riscv/insns/vl1re64_v.h create mode 100644 riscv/insns/vl1re8_v.h create mode 100644 riscv/insns/vl2re16_v.h create mode 100644 riscv/insns/vl2re32_v.h create mode 100644 riscv/insns/vl2re64_v.h create mode 100644 riscv/insns/vl2re8_v.h create mode 100644 riscv/insns/vl4re16_v.h create mode 100644 riscv/insns/vl4re32_v.h create mode 100644 riscv/insns/vl4re64_v.h create mode 100644 riscv/insns/vl4re8_v.h create mode 100644 riscv/insns/vl8re16_v.h create mode 100644 riscv/insns/vl8re32_v.h create mode 100644 riscv/insns/vl8re64_v.h create mode 100644 riscv/insns/vl8re8_v.h create mode 100644 riscv/insns/vle16_v.h create mode 100644 riscv/insns/vle16ff_v.h create mode 100644 riscv/insns/vle32_v.h create mode 100644 riscv/insns/vle32ff_v.h create mode 100644 riscv/insns/vle64_v.h create mode 100644 riscv/insns/vle64ff_v.h create mode 100644 riscv/insns/vle8_v.h create mode 100644 riscv/insns/vle8ff_v.h create mode 100644 riscv/insns/vlse16_v.h create mode 100644 riscv/insns/vlse32_v.h create mode 100644 riscv/insns/vlse64_v.h create mode 100644 riscv/insns/vlse8_v.h create mode 100644 riscv/insns/vlxei16_v.h create mode 100644 riscv/insns/vlxei32_v.h create mode 100644 riscv/insns/vlxei64_v.h create mode 100644 riscv/insns/vlxei8_v.h create mode 100644 riscv/insns/vmacc_vv.h create mode 100644 riscv/insns/vmacc_vx.h create mode 100644 riscv/insns/vmadc_vim.h create mode 100644 riscv/insns/vmadc_vvm.h create mode 100644 riscv/insns/vmadc_vxm.h create mode 100644 riscv/insns/vmadd_vv.h create mode 100644 riscv/insns/vmadd_vx.h create mode 100644 riscv/insns/vmand_mm.h create mode 100644 riscv/insns/vmandnot_mm.h create mode 100644 riscv/insns/vmax_vv.h create mode 100644 riscv/insns/vmax_vx.h create mode 100644 riscv/insns/vmaxu_vv.h create mode 100644 riscv/insns/vmaxu_vx.h create mode 100644 riscv/insns/vmerge_vim.h create mode 100644 riscv/insns/vmerge_vvm.h create mode 100644 riscv/insns/vmerge_vxm.h create mode 100644 riscv/insns/vmfeq_vf.h create mode 100644 riscv/insns/vmfeq_vv.h create mode 100644 riscv/insns/vmfge_vf.h create mode 100644 riscv/insns/vmfgt_vf.h create mode 100644 riscv/insns/vmfle_vf.h create mode 100644 riscv/insns/vmfle_vv.h create mode 100644 riscv/insns/vmflt_vf.h create mode 100644 riscv/insns/vmflt_vv.h create mode 100644 riscv/insns/vmfne_vf.h create mode 100644 riscv/insns/vmfne_vv.h create mode 100644 riscv/insns/vmin_vv.h create mode 100644 riscv/insns/vmin_vx.h create mode 100644 riscv/insns/vminu_vv.h create mode 100644 riscv/insns/vminu_vx.h create mode 100644 riscv/insns/vmnand_mm.h create mode 100644 riscv/insns/vmnor_mm.h create mode 100644 riscv/insns/vmor_mm.h create mode 100644 riscv/insns/vmornot_mm.h create mode 100644 riscv/insns/vmsbc_vvm.h create mode 100644 riscv/insns/vmsbc_vxm.h create mode 100644 riscv/insns/vmsbf_m.h create mode 100644 riscv/insns/vmseq_vi.h create mode 100644 riscv/insns/vmseq_vv.h create mode 100644 riscv/insns/vmseq_vx.h create mode 100644 riscv/insns/vmsgt_vi.h create mode 100644 riscv/insns/vmsgt_vx.h create mode 100644 riscv/insns/vmsgtu_vi.h create mode 100644 riscv/insns/vmsgtu_vx.h create mode 100644 riscv/insns/vmsif_m.h create mode 100644 riscv/insns/vmsle_vi.h create mode 100644 riscv/insns/vmsle_vv.h create mode 100644 riscv/insns/vmsle_vx.h create mode 100644 riscv/insns/vmsleu_vi.h create mode 100644 riscv/insns/vmsleu_vv.h create mode 100644 riscv/insns/vmsleu_vx.h create mode 100644 riscv/insns/vmslt_vv.h create mode 100644 riscv/insns/vmslt_vx.h create mode 100644 riscv/insns/vmsltu_vv.h create mode 100644 riscv/insns/vmsltu_vx.h create mode 100644 riscv/insns/vmsne_vi.h create mode 100644 riscv/insns/vmsne_vv.h create mode 100644 riscv/insns/vmsne_vx.h create mode 100644 riscv/insns/vmsof_m.h create mode 100644 riscv/insns/vmul_vv.h create mode 100644 riscv/insns/vmul_vx.h create mode 100644 riscv/insns/vmulh_vv.h create mode 100644 riscv/insns/vmulh_vx.h create mode 100644 riscv/insns/vmulhsu_vv.h create mode 100644 riscv/insns/vmulhsu_vx.h create mode 100644 riscv/insns/vmulhu_vv.h create mode 100644 riscv/insns/vmulhu_vx.h create mode 100644 riscv/insns/vmv1r_v.h create mode 100644 riscv/insns/vmv2r_v.h create mode 100644 riscv/insns/vmv4r_v.h create mode 100644 riscv/insns/vmv8r_v.h create mode 100644 riscv/insns/vmv_s_x.h create mode 100644 riscv/insns/vmv_v_i.h create mode 100644 riscv/insns/vmv_v_v.h create mode 100644 riscv/insns/vmv_v_x.h create mode 100644 riscv/insns/vmv_x_s.h create mode 100644 riscv/insns/vmvnfr_v.h create mode 100644 riscv/insns/vmxnor_mm.h create mode 100644 riscv/insns/vmxor_mm.h create mode 100644 riscv/insns/vnclip_wi.h create mode 100644 riscv/insns/vnclip_wv.h create mode 100644 riscv/insns/vnclip_wx.h create mode 100644 riscv/insns/vnclipu_wi.h create mode 100644 riscv/insns/vnclipu_wv.h create mode 100644 riscv/insns/vnclipu_wx.h create mode 100644 riscv/insns/vnmsac_vv.h create mode 100644 riscv/insns/vnmsac_vx.h create mode 100644 riscv/insns/vnmsub_vv.h create mode 100644 riscv/insns/vnmsub_vx.h create mode 100644 riscv/insns/vnsra_wi.h create mode 100644 riscv/insns/vnsra_wv.h create mode 100644 riscv/insns/vnsra_wx.h create mode 100644 riscv/insns/vnsrl_wi.h create mode 100644 riscv/insns/vnsrl_wv.h create mode 100644 riscv/insns/vnsrl_wx.h create mode 100644 riscv/insns/vor_vi.h create mode 100644 riscv/insns/vor_vv.h create mode 100644 riscv/insns/vor_vx.h create mode 100644 riscv/insns/vpopc_m.h create mode 100644 riscv/insns/vredand_vs.h create mode 100644 riscv/insns/vredmax_vs.h create mode 100644 riscv/insns/vredmaxu_vs.h create mode 100644 riscv/insns/vredmin_vs.h create mode 100644 riscv/insns/vredminu_vs.h create mode 100644 riscv/insns/vredor_vs.h create mode 100644 riscv/insns/vredsum_vs.h create mode 100644 riscv/insns/vredxor_vs.h create mode 100644 riscv/insns/vrem_vv.h create mode 100644 riscv/insns/vrem_vx.h create mode 100644 riscv/insns/vremu_vv.h create mode 100644 riscv/insns/vremu_vx.h create mode 100644 riscv/insns/vrgather_vi.h create mode 100644 riscv/insns/vrgather_vv.h create mode 100644 riscv/insns/vrgather_vx.h create mode 100644 riscv/insns/vrgatherei16_vv.h create mode 100644 riscv/insns/vrsub_vi.h create mode 100644 riscv/insns/vrsub_vx.h create mode 100644 riscv/insns/vs1r_v.h create mode 100644 riscv/insns/vs2r_v.h create mode 100644 riscv/insns/vs4r_v.h create mode 100644 riscv/insns/vs8r_v.h create mode 100644 riscv/insns/vsadd_vi.h create mode 100644 riscv/insns/vsadd_vv.h create mode 100644 riscv/insns/vsadd_vx.h create mode 100644 riscv/insns/vsaddu_vi.h create mode 100644 riscv/insns/vsaddu_vv.h create mode 100644 riscv/insns/vsaddu_vx.h create mode 100644 riscv/insns/vsbc_vvm.h create mode 100644 riscv/insns/vsbc_vxm.h create mode 100644 riscv/insns/vse16_v.h create mode 100644 riscv/insns/vse32_v.h create mode 100644 riscv/insns/vse64_v.h create mode 100644 riscv/insns/vse8_v.h create mode 100644 riscv/insns/vsetvl.h create mode 100644 riscv/insns/vsetvli.h create mode 100644 riscv/insns/vsext_vf2.h create mode 100644 riscv/insns/vsext_vf4.h create mode 100644 riscv/insns/vsext_vf8.h create mode 100644 riscv/insns/vslide1down_vx.h create mode 100644 riscv/insns/vslide1up_vx.h create mode 100644 riscv/insns/vslidedown_vi.h create mode 100644 riscv/insns/vslidedown_vx.h create mode 100644 riscv/insns/vslideup_vi.h create mode 100644 riscv/insns/vslideup_vx.h create mode 100644 riscv/insns/vsll_vi.h create mode 100644 riscv/insns/vsll_vv.h create mode 100644 riscv/insns/vsll_vx.h create mode 100644 riscv/insns/vsmul_vv.h create mode 100644 riscv/insns/vsmul_vx.h create mode 100644 riscv/insns/vsra_vi.h create mode 100644 riscv/insns/vsra_vv.h create mode 100644 riscv/insns/vsra_vx.h create mode 100644 riscv/insns/vsrl_vi.h create mode 100644 riscv/insns/vsrl_vv.h create mode 100644 riscv/insns/vsrl_vx.h create mode 100644 riscv/insns/vsse16_v.h create mode 100644 riscv/insns/vsse32_v.h create mode 100644 riscv/insns/vsse64_v.h create mode 100644 riscv/insns/vsse8_v.h create mode 100644 riscv/insns/vssra_vi.h create mode 100644 riscv/insns/vssra_vv.h create mode 100644 riscv/insns/vssra_vx.h create mode 100644 riscv/insns/vssrl_vi.h create mode 100644 riscv/insns/vssrl_vv.h create mode 100644 riscv/insns/vssrl_vx.h create mode 100644 riscv/insns/vssub_vv.h create mode 100644 riscv/insns/vssub_vx.h create mode 100644 riscv/insns/vssubu_vv.h create mode 100644 riscv/insns/vssubu_vx.h create mode 100644 riscv/insns/vsub_vv.h create mode 100644 riscv/insns/vsub_vx.h create mode 100644 riscv/insns/vsuxei16_v.h create mode 100644 riscv/insns/vsuxei32_v.h create mode 100644 riscv/insns/vsuxei64_v.h create mode 100644 riscv/insns/vsuxei8_v.h create mode 100644 riscv/insns/vsxei16_v.h create mode 100644 riscv/insns/vsxei32_v.h create mode 100644 riscv/insns/vsxei64_v.h create mode 100644 riscv/insns/vsxei8_v.h create mode 100644 riscv/insns/vwadd_vv.h create mode 100644 riscv/insns/vwadd_vx.h create mode 100644 riscv/insns/vwadd_wv.h create mode 100644 riscv/insns/vwadd_wx.h create mode 100644 riscv/insns/vwaddu_vv.h create mode 100644 riscv/insns/vwaddu_vx.h create mode 100644 riscv/insns/vwaddu_wv.h create mode 100644 riscv/insns/vwaddu_wx.h create mode 100644 riscv/insns/vwmacc_vv.h create mode 100644 riscv/insns/vwmacc_vx.h create mode 100644 riscv/insns/vwmaccsu_vv.h create mode 100644 riscv/insns/vwmaccsu_vx.h create mode 100644 riscv/insns/vwmaccu_vv.h create mode 100644 riscv/insns/vwmaccu_vx.h create mode 100644 riscv/insns/vwmaccus_vx.h create mode 100644 riscv/insns/vwmul_vv.h create mode 100644 riscv/insns/vwmul_vx.h create mode 100644 riscv/insns/vwmulsu_vv.h create mode 100644 riscv/insns/vwmulsu_vx.h create mode 100644 riscv/insns/vwmulu_vv.h create mode 100644 riscv/insns/vwmulu_vx.h create mode 100644 riscv/insns/vwredsum_vs.h create mode 100644 riscv/insns/vwredsumu_vs.h create mode 100644 riscv/insns/vwsub_vv.h create mode 100644 riscv/insns/vwsub_vx.h create mode 100644 riscv/insns/vwsub_wv.h create mode 100644 riscv/insns/vwsub_wx.h create mode 100644 riscv/insns/vwsubu_vv.h create mode 100644 riscv/insns/vwsubu_vx.h create mode 100644 riscv/insns/vwsubu_wv.h create mode 100644 riscv/insns/vwsubu_wx.h create mode 100644 riscv/insns/vxor_vi.h create mode 100644 riscv/insns/vxor_vv.h create mode 100644 riscv/insns/vxor_vx.h create mode 100644 riscv/insns/vzext_vf2.h create mode 100644 riscv/insns/vzext_vf4.h create mode 100644 riscv/insns/vzext_vf8.h create mode 100644 riscv/log_file.h create mode 100644 riscv/mmio_plugin.h delete mode 100644 riscv/mulhi.h delete mode 100644 riscv/ust_tracer.cc delete mode 100644 riscv/ust_tracer.h create mode 100755 softfloat/f16_classify.c create mode 100644 softfloat/f16_to_i16.c create mode 100644 softfloat/f16_to_i8.c create mode 100644 softfloat/f16_to_ui16.c create mode 100644 softfloat/f16_to_ui8.c create mode 100644 softfloat/f32_to_i16.c create mode 100644 softfloat/f32_to_ui16.c create mode 100644 softfloat/fall_maxmin.c create mode 100644 softfloat/fall_reciprocal.c rename {spike_main => spike_dasm}/spike-dasm.cc (70%) create mode 100644 spike_dasm/spike_dasm.ac create mode 100644 spike_dasm/spike_dasm.mk.in create mode 120000 spike_dasm/spike_dasm_option_parser.cc delete mode 100644 spike_main/disasm.cc create mode 100644 spike_main/spike-log-parser.cc diff --git a/ChangeLog.md b/ChangeLog.md new file mode 100644 index 0000000000..2be2d4adbe --- /dev/null +++ b/ChangeLog.md @@ -0,0 +1,21 @@ +Version 1.0.1-dev +----------------- +- Preliminary support for a subset of the Vector Extension, v0.7.1. +- Support S-mode vectored interrupts (i.e. `stvec[0]` is now writable). +- Added support for dynamic linking of libraries containing MMIO devices. +- Added `--priv` flag to control which privilege modes are available. +- When the commit log is enabled at configure time (`--enable-commitlog`), + it must also be enabled at runtime with the `--log-commits` option. +- Several debug-related additions and changes: + - Added `hasel` debug feature. + - Added `--dm-no-abstract-csr` command-line option. + - Added `--dm-no-halt-groups` command line option. + - Renamed `--progsize` to `--dm-progsize`. + - Renamed `--debug-sba` to `--dm-sba`. + - Renamed `--debug-auth` to `--dm-auth`. + - Renamed `--abstract-rti` to `--dm-abstract-rti`. + - Renamed `--without-hasel` to `--dm-no-hasel`. + +Version 1.0.0 (2019-03-30) +-------------------------- +- First versioned release. diff --git a/Makefile.in b/Makefile.in index c09fc50588..66e8df08c5 100644 --- a/Makefile.in +++ b/Makefile.in @@ -36,6 +36,9 @@ project_name := @PACKAGE_TARNAME@ src_dir := @srcdir@ scripts_dir := $(src_dir)/scripts +HAVE_INT128 := @HAVE_INT128@ +HAVE_DLOPEN := @HAVE_DLOPEN@ + # If the version information is not in the configure script, then we # assume that we are in a working directory. We use the vcs-version.sh # script in the scripts directory to generate an appropriate version @@ -50,17 +53,11 @@ endif # Installation directories -prefix := @prefix@ -enable_stow := @enable_stow@ +prefix ?= @prefix@ -ifeq ($(enable_stow),yes) - stow_pkg_dir := $(prefix)/pkgs - INSTALLDIR ?= $(DESTDIR)$(stow_pkg_dir)/$(project_name)-$(project_ver) -else - INSTALLDIR ?= $(DESTDIR)$(prefix) -endif +INSTALLDIR ?= $(DESTDIR)$(prefix) -install_hdrs_dir := $(INSTALLDIR)/include/$(project_name) +install_hdrs_dir := $(INSTALLDIR)/include install_libs_dir := $(INSTALLDIR)/lib install_exes_dir := $(INSTALLDIR)/bin @@ -81,25 +78,52 @@ VPATH := $(addprefix $(src_dir)/, $(sprojs_enabled)) # C++ compiler # - CPPFLAGS : flags for the preprocessor (eg. -I,-D) # - CXXFLAGS : flags for C++ compiler (eg. -Wall,-g,-O3) +# +# To allow a user to specify CFLAGS or similar as part of the Make +# command, we also have mcpps-CFLAGS etc. with stuff that shouldn't be +# lost in such a case. +# +# The order of precedence (highest to lowest) is then: +# +# - Specified as part of Make command line +# - Specified as part of running configure +# - Specified here (default-CFLAGS) +# +# These all appear on the command line, from lowest precedence to +# highest. + +default-CFLAGS := -DPREFIX=\"$(prefix)\" -Wall -Wno-unused -g -O2 +default-CXXFLAGS := $(default-CFLAGS) -std=c++11 + +mcppbs-CPPFLAGS := @CPPFLAGS@ +mcppbs-CFLAGS := $(default-CFLAGS) @CFLAGS@ +mcppbs-CXXFLAGS := $(default-CXXFLAGS) @CXXFLAGS@ CC := @CC@ CXX := @CXX@ -CFLAGS += @CFLAGS@ -DPREFIX=\"$(prefix)\" -CPPFLAGS += @CPPFLAGS@ -CXXFLAGS += @CXXFLAGS@ -DPREFIX=\"$(prefix)\" -COMPILE := $(CXX) -fPIC -MMD -MP $(CPPFLAGS) $(CXXFLAGS) \ - $(sprojs_include) -COMPILE_C := $(CC) -fPIC -MMD -MP $(CPPFLAGS) $(CFLAGS) \ - $(sprojs_include) + +# These are the flags actually used for a C++ compile or a C compile. +# The language-specific flags come after the preprocessor flags, but +# user-supplied flags always take precedence. +all-cxx-flags := \ + $(mcppbs-CPPFLAGS) $(mcppbs-CXXFLAGS) $(CPPFLAGS) $(CXXFLAGS) +all-c-flags := \ + $(mcppbs-CPPFLAGS) $(mcppbs-CFLAGS) $(CPPFLAGS) $(CFLAGS) + +COMPILE := $(CXX) -MMD -MP $(all-cxx-flags) $(sprojs_include) +COMPILE_C := $(CC) -MMD -MP $(all-c-flags) $(sprojs_include) + # Linker # - LDFLAGS : Flags for the linker (eg. -L) # - LIBS : Library flags (eg. -l) +mcppbs-LDFLAGS := @LDFLAGS@ +all-link-flags := $(mcppbs-LDFLAGS) $(LDFLAGS) + comma := , LD := $(CXX) -LDFLAGS := @LDFLAGS@ LIBS := @LIBS@ -LINK := $(LD) -L. $(LDFLAGS) -Wl,-rpath,$(install_libs_dir) $(patsubst -L%,-Wl$(comma)-rpath$(comma)%,$(filter -L%,$(LDFLAGS))) +LINK := $(LD) -L. $(all-link-flags) -Wl,-rpath,$(install_libs_dir) $(patsubst -L%,-Wl$(comma)-rpath$(comma)%,$(filter -L%,$(LDFLAGS))) # Library creation @@ -115,9 +139,9 @@ RUNFLAGS := @RUNFLAGS@ MKINSTALLDIRS := $(scripts_dir)/mk-install-dirs.sh INSTALL := @INSTALL@ -INSTALL_HDR := $(INSTALL) -m 444 +INSTALL_HDR := $(INSTALL) -m 644 INSTALL_LIB := $(INSTALL) -m 644 -INSTALL_EXE := $(INSTALL) -m 555 +INSTALL_EXE := $(INSTALL) -m 755 STOW := @stow@ # Tests @@ -194,12 +218,12 @@ $(2)_deps := $$(patsubst %.o, %.d, $$($(2)_objs)) $(2)_deps += $$(patsubst %.o, %.d, $$($(2)_c_objs)) $(2)_deps += $$(patsubst %.h, %.h.d, $$($(2)_precompiled_hdrs)) $$($(2)_pch) : %.h.gch : %.h - $(COMPILE) -x c++-header $$< -o $$@ + $(COMPILE) -x c++-header -c $$< -o $$@ # If using clang, don't depend (and thus don't build) precompiled headers $$($(2)_objs) : %.o : %.cc $$($(2)_gen_hdrs) $(if $(filter-out clang,$(CC)),$$($(2)_pch)) - $(COMPILE) -c $$< + $(COMPILE) $$($(2)_CFLAGS) -c $$< $$($(2)_c_objs) : %.o : %.c $$($(2)_gen_hdrs) - $(COMPILE_C) -c $$< + $(COMPILE_C) $$($(2)_CFLAGS) -c $$< $(2)_junk += $$($(2)_pch) $$($(2)_objs) $$($(2)_c_objs) $$($(2)_deps) \ $$($(2)_gen_hdrs) @@ -213,13 +237,17 @@ $(2)_reverse_deps := $$(call reverse_list,$$($(2)_subproject_deps)) # Build a library for this subproject $(2)_lib_libs := $$($(2)_reverse_deps) -$(2)_lib_libnames := $$(patsubst %, lib%.so, $$($(2)_lib_libs)) +$(2)_lib_libnames := $$(patsubst %, lib%.a, $$($(2)_lib_libs)) $(2)_lib_libarg := $$(patsubst %, -l%, $$($(2)_lib_libs)) +$(2)_lib_libnames_shared := $$(if $$($(2)_install_shared_lib),lib$(1).so,) -lib$(1).so : $$($(2)_objs) $$($(2)_c_objs) $$($(2)_lib_libnames) - $(LINK) -shared -o $$@ $(if $(filter Darwin,$(shell uname -s)),-install_name $(install_libs_dir)/$$@) $$^ $$($(2)_lib_libarg) $(LIBS) +lib$(1).a : $$($(2)_objs) $$($(2)_c_objs) $$($(2)_lib_libnames) + $(AR) rcs $$@ $$^ +lib$(1).so : $$($(2)_objs) $$($(2)_c_objs) $$($(2)_lib_libnames_shared) $$($(2)_lib_libnames) + $(LINK) -shared -o $$@ $(if $(filter Darwin,$(shell uname -s)),-install_name $(install_libs_dir)/$$@) $$^ $$($(2)_lib_libnames) $(LIBS) -$(2)_junk += lib$(1).so +$(2)_junk += lib$(1).a +$(2)_junk += $$(if $$($(2)_install_shared_lib),lib$(1).so,) # Build unit tests @@ -228,14 +256,14 @@ $(2)_test_deps := $$(patsubst %.o, %.d, $$($(2)_test_objs)) $(2)_test_exes := $$(patsubst %.t.cc, %-utst, $$($(2)_test_srcs)) $(2)_test_outs := $$(patsubst %, %.out, $$($(2)_test_exes)) $(2)_test_libs := $(1) $$($(2)_reverse_deps) utst -$(2)_test_libnames := $$(patsubst %, lib%.so, $$($(2)_test_libs)) +$(2)_test_libnames := $$(patsubst %, lib%.a, $$($(2)_test_libs)) $(2)_test_libarg := $$(patsubst %, -l%, $$($(2)_test_libs)) $$($(2)_test_objs) : %.o : %.cc $(COMPILE) -c $$< $$($(2)_test_exes) : %-utst : %.t.o $$($(2)_test_libnames) - $(LINK) -o $$@ $$< $$($(2)_test_libarg) $(LIBS) + $(LINK) -o $$@ $$< $$($(2)_test_libnames) $(LIBS) $(2)_deps += $$($(2)_test_deps) $(2)_junk += \ @@ -255,14 +283,14 @@ $(2)_prog_objs := $$(patsubst %.cc, %.o, $$($(2)_prog_srcs)) $(2)_prog_deps := $$(patsubst %.o, %.d, $$($(2)_prog_objs)) $(2)_prog_exes := $$(patsubst %.cc, %, $$($(2)_prog_srcs)) $(2)_prog_libs := $(1) $$($(2)_reverse_deps) -$(2)_prog_libnames := $$(patsubst %, lib%.so, $$($(2)_prog_libs)) +$(2)_prog_libnames := $$(patsubst %, lib%.a, $$($(2)_prog_libs)) $(2)_prog_libarg := $$(patsubst %, -l%, $$($(2)_prog_libs)) $$($(2)_prog_objs) : %.o : %.cc $(COMPILE) -c $$< $$($(2)_prog_exes) : % : %.o $$($(2)_prog_libnames) - $(LINK) -o $$@ $$< $$($(2)_prog_libarg) $(LIBS) + $(LINK) -o $$@ $$< $$($(2)_prog_libnames) $(LIBS) $(2)_deps += $$($(2)_prog_deps) $(2)_junk += $$($(2)_prog_objs) $$($(2)_prog_deps) $$($(2)_prog_exes) @@ -277,7 +305,7 @@ $$($(2)_install_prog_objs) : %.o : %.cc $$($(2)_gen_hdrs) $(COMPILE) -c $$< $$($(2)_install_prog_exes) : % : %.o $$($(2)_prog_libnames) - $(LINK) -o $$@ $$< $$($(2)_prog_libarg) $(LIBS) + $(LINK) -o $$@ $$< $$($(2)_prog_libnames) $(LIBS) $(2)_deps += $$($(2)_install_prog_deps) $(2)_junk += \ @@ -286,7 +314,7 @@ $(2)_junk += \ # Subproject specific targets -all-$(1) : lib$(1).so $$($(2)_install_prog_exes) +all-$(1) : lib$(1).a $$($(2)_install_prog_exes) check-$(1) : $$($(2)_test_outs) echo; grep -h -e'Unit Tests' -e'FAILED' -e'Segementation' $$^; echo @@ -298,7 +326,7 @@ clean-$(1) : # Update running variables -libs += lib$(1).so +libs += lib$(1).a objs += $$($(2)_objs) srcs += $$(addprefix $(src_dir)/$(1)/, $$($(2)_srcs)) hdrs += $$(addprefix $(src_dir)/$(1)/, $$($(2)_hdrs)) $$($(2)_gen_hdrs) @@ -307,10 +335,11 @@ deps += $$($(2)_deps) test_outs += $$($(2)_test_outs) -install_hdrs += $$(addprefix $(src_dir)/$(1)/, $$($(2)_hdrs)) $$($(2)_gen_hdrs) -install_libs += lib$(1).so +install_hdrs += $$(addprefix $(src_dir)/$(1)/, $$($(2)_install_hdrs)) +install_libs += $$(if $$($(2)_install_lib),lib$(1).a,) +install_libs += $$(if $$($(2)_install_shared_lib),lib$(1).so,) install_exes += $$($(2)_install_prog_exes) -install_pcs += riscv-$(1).pc +install_pcs += $$(if $$($(2)_install_lib),riscv-$(1).pc,) endef @@ -353,11 +382,12 @@ check : check-cpp check-bin # Installation #------------------------------------------------------------------------- -install-hdrs : $(install_hdrs) config.h +install-hdrs : $(install_hdrs) $(MKINSTALLDIRS) $(install_hdrs_dir) - for file in $^; \ + for file in $(subst $(src_dir)/,,$^); \ do \ - $(INSTALL_HDR) $$file $(install_hdrs_dir); \ + $(MKINSTALLDIRS) $(install_hdrs_dir)/`dirname $$file`; \ + $(INSTALL_HDR) $(src_dir)/$$file $(install_hdrs_dir)/`dirname $$file`; \ done install-libs : $(install_libs) @@ -382,12 +412,6 @@ install-pc : $(install_pcs) done install : install-hdrs install-libs install-exes install-pc -ifeq ($(enable_stow),yes) - $(MKINSTALLDIRS) $(stow_pkg_dir) - cd $(stow_pkg_dir) && \ - $(STOW) --delete $(project_name)-* && \ - $(STOW) $(project_name)-$(project_ver) -endif .PHONY : install install-hdrs install-libs install-exes diff --git a/README.md b/README.md index 018c7d3ead..1cc4c24df3 100644 --- a/README.md +++ b/README.md @@ -5,24 +5,67 @@ About ------------- Spike, the RISC-V ISA Simulator, implements a functional model of one or more -RISC-V processors. - -Spike is named after the golden spike used to celebrate the completion of the -US transcontinental railway. +RISC-V harts. It is named after the golden spike used to celebrate the +completion of the US transcontinental railway. + +Spike supports the following RISC-V ISA features: + - RV32I and RV64I base ISAs, v2.1 + - Zifencei extension, v2.0 + - Zicsr extension, v2.0 + - M extension, v2.0 + - A extension, v2.1 + - F extension, v2.2 + - D extension, v2.2 + - Q extension, v2.2 + - C extension, v2.0 + - V extension, v0.9, w/ Zvlsseg/Zvamo/Zvqmac, w/o Zvediv, (_requires a 64-bit host_) + - Conformance to both RVWMO and RVTSO (Spike is sequentially consistent) + - Machine, Supervisor, and User modes, v1.11 + - Debug v0.14 + +Versioning and APIs +------------------- + +Projects are versioned primarily to indicate when the API has been extended or +rendered incompatible. In that spirit, Spike aims to follow the +[SemVer](https://semver.org/spec/v2.0.0.html) versioning scheme, in which +major version numbers are incremented when backwards-incompatible API changes +are made; minor version numbers are incremented when new APIs are added; and +patch version numbers are incremented when bugs are fixed in +a backwards-compatible manner. + +Spike's principal public API is the RISC-V ISA. _The C++ interface to Spike's +internals is **not** considered a public API at this time_, and +backwards-incompatible changes to this interface _will_ be made without +incrementing the major version number. Build Steps --------------- We assume that the RISCV environment variable is set to the RISC-V tools -install path, and that the riscv-fesvr package is installed there. +install path. $ apt-get install device-tree-compiler $ mkdir build $ cd build - $ ../configure --prefix=$RISCV --with-fesvr=$RISCV + $ ../configure --prefix=$RISCV $ make $ [sudo] make install +Build Steps on OpenBSD +---------------------- + +Install bash, gmake, dtc, and use clang. + + $ pkg_add bash gmake dtc + $ exec bash + $ export CC=cc; export CXX=c++ + $ mkdir build + $ cd build + $ ../configure --prefix=$RISCV + $ gmake + $ [doas] make install + Compiling and Running a Simple C Program ------------------------------------------- @@ -48,10 +91,11 @@ Adding an instruction to the simulator requires two steps: 2. Add the opcode and opcode mask to riscv/opcodes.h. Alternatively, add it to the riscv-opcodes package, and it will do so for you: - + ``` $ cd ../riscv-opcodes $ vi opcodes // add a line for the new instruction $ make install + ``` 3. Rebuild the simulator. @@ -84,7 +128,7 @@ To see the contents of memory with a virtual address (0 for core 0): : mem 0 2020 -You can advance by one instruction by pressing . You can also +You can advance by one instruction by pressing the enter key. You can also execute until a desired equality is reached: : until pc 0 2020 (stop when pc=2020) @@ -136,6 +180,7 @@ int main() i++; } +done: while (!wait) ; } @@ -184,8 +229,8 @@ riscv.cpu: target state: halted In yet another shell, start your gdb debug session: ``` tnewsome@compy-vm:~/SiFive/spike-test$ riscv64-unknown-elf-gdb rot13-64 -GNU gdb (GDB) 7.12.50.20170505-git -Copyright (C) 2016 Free Software Foundation, Inc. +GNU gdb (GDB) 8.0.50.20170724-git +Copyright (C) 2017 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. Type "show copying" @@ -201,21 +246,22 @@ Type "apropos word" to search for commands related to "word"... Reading symbols from rot13-64...done. (gdb) target remote localhost:3333 Remote debugging using localhost:3333 -0x000000001001000a in main () at rot13.c:8 -8 while (wait) +0x0000000010010004 in main () at rot13.c:8 +8 while (wait) (gdb) print wait $1 = 1 (gdb) print wait=0 $2 = 0 (gdb) print text $3 = "Vafgehpgvba frgf jnag gb or serr!" -(gdb) b 23 -Breakpoint 1 at 0x10010064: file rot13.c, line 23. +(gdb) b done +Breakpoint 1 at 0x10010064: file rot13.c, line 22. (gdb) c Continuing. +Disabling abstract command writes to CSRs. Breakpoint 1, main () at rot13.c:23 -23 while (!wait) +23 while (!wait) (gdb) print wait $4 = 0 (gdb) print text diff --git a/VERSION b/VERSION new file mode 100644 index 0000000000..3af5f50aff --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +#define SPIKE_VERSION "1.0.1-dev" diff --git a/aclocal.m4 b/aclocal.m4 index 15353f2c95..def74dbadf 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -59,49 +59,6 @@ AC_DEFUN([MCPPBS_PROG_INSTALL], # Check for install script AC_PROG_INSTALL - - # Deterimine if native build and set prefix appropriately - - AS_IF([ test ${enable_stow} = "yes" ], - [ - AC_CHECK_PROGS([stow],[stow],[no]) - AS_IF([ test ${stow} = "no" ], - [ - AC_MSG_ERROR([Cannot use --enable-stow since stow is not available]) - ]) - - # Check if native or non-native build - - AS_IF([ test "${build}" = "${host}" ], - [ - - # build == host so this is a native build. Make sure --prefix not - # set and $STOW_PREFIX is set, then set prefix=$STOW_PREFIX. - - AS_IF([ test "${prefix}" = "NONE" && test -n "${STOW_PREFIX}" ], - [ - prefix="${STOW_PREFIX}" - AC_MSG_NOTICE([Using \$STOW_PREFIX from environment]) - AC_MSG_NOTICE([prefix=${prefix}]) - ]) - - ],[ - - # build != host so this is a non-native build. Make sure --prefix - # not set and $STOW_ROOT is set, then set - # prefix=$STOW_ROOT/${host_alias}. - - AS_IF([ test "${prefix}" = "NONE" && test -n "${STOW_ROOT}" ], - [ - prefix="${STOW_ROOT}/${host_alias}" - AC_MSG_NOTICE([Using \$STOW_ROOT from environment]) - AC_MSG_NOTICE([prefix=${prefix}]) - ]) - - ]) - - ]) - ]) #------------------------------------------------------------------------- diff --git a/ax_append_flag.m4 b/ax_append_flag.m4 new file mode 100644 index 0000000000..dd6d8b6140 --- /dev/null +++ b/ax_append_flag.m4 @@ -0,0 +1,50 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_append_flag.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_APPEND_FLAG(FLAG, [FLAGS-VARIABLE]) +# +# DESCRIPTION +# +# FLAG is appended to the FLAGS-VARIABLE shell variable, with a space +# added in between. +# +# If FLAGS-VARIABLE is not specified, the current language's flags (e.g. +# CFLAGS) is used. FLAGS-VARIABLE is not changed if it already contains +# FLAG. If FLAGS-VARIABLE is unset in the shell, it is set to exactly +# FLAG. +# +# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2011 Maarten Bosmans +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 8 + +AC_DEFUN([AX_APPEND_FLAG], +[dnl +AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_SET_IF +AS_VAR_PUSHDEF([FLAGS], [m4_default($2,_AC_LANG_PREFIX[FLAGS])]) +AS_VAR_SET_IF(FLAGS,[ + AS_CASE([" AS_VAR_GET(FLAGS) "], + [*" $1 "*], [AC_RUN_LOG([: FLAGS already contains $1])], + [ + AS_VAR_APPEND(FLAGS,[" $1"]) + AC_RUN_LOG([: FLAGS="$FLAGS"]) + ]) + ], + [ + AS_VAR_SET(FLAGS,[$1]) + AC_RUN_LOG([: FLAGS="$FLAGS"]) + ]) +AS_VAR_POPDEF([FLAGS])dnl +])dnl AX_APPEND_FLAG diff --git a/ax_append_link_flags.m4 b/ax_append_link_flags.m4 new file mode 100644 index 0000000000..99b9fa5b4e --- /dev/null +++ b/ax_append_link_flags.m4 @@ -0,0 +1,44 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_append_link_flags.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_APPEND_LINK_FLAGS([FLAG1 FLAG2 ...], [FLAGS-VARIABLE], [EXTRA-FLAGS], [INPUT]) +# +# DESCRIPTION +# +# For every FLAG1, FLAG2 it is checked whether the linker works with the +# flag. If it does, the flag is added FLAGS-VARIABLE +# +# If FLAGS-VARIABLE is not specified, the linker's flags (LDFLAGS) is +# used. During the check the flag is always added to the linker's flags. +# +# If EXTRA-FLAGS is defined, it is added to the linker's default flags +# when the check is done. The check is thus made with the flags: "LDFLAGS +# EXTRA-FLAGS FLAG". This can for example be used to force the linker to +# issue an error when a bad flag is given. +# +# INPUT gives an alternative input source to AC_COMPILE_IFELSE. +# +# NOTE: This macro depends on the AX_APPEND_FLAG and AX_CHECK_LINK_FLAG. +# Please keep this macro in sync with AX_APPEND_COMPILE_FLAGS. +# +# LICENSE +# +# Copyright (c) 2011 Maarten Bosmans +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 7 + +AC_DEFUN([AX_APPEND_LINK_FLAGS], +[AX_REQUIRE_DEFINED([AX_CHECK_LINK_FLAG]) +AX_REQUIRE_DEFINED([AX_APPEND_FLAG]) +for flag in $1; do + AX_CHECK_LINK_FLAG([$flag], [AX_APPEND_FLAG([$flag], [m4_default([$2], [LDFLAGS])])], [], [$3], [$4]) +done +])dnl AX_APPEND_LINK_FLAGS diff --git a/ax_check_link_flag.m4 b/ax_check_link_flag.m4 new file mode 100644 index 0000000000..03a30ce4c7 --- /dev/null +++ b/ax_check_link_flag.m4 @@ -0,0 +1,53 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_check_link_flag.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CHECK_LINK_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) +# +# DESCRIPTION +# +# Check whether the given FLAG works with the linker or gives an error. +# (Warnings, however, are ignored) +# +# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on +# success/failure. +# +# If EXTRA-FLAGS is defined, it is added to the linker's default flags +# when the check is done. The check is thus made with the flags: "LDFLAGS +# EXTRA-FLAGS FLAG". This can for example be used to force the linker to +# issue an error when a bad flag is given. +# +# INPUT gives an alternative input source to AC_LINK_IFELSE. +# +# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this +# macro in sync with AX_CHECK_{PREPROC,COMPILE}_FLAG. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2011 Maarten Bosmans +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 6 + +AC_DEFUN([AX_CHECK_LINK_FLAG], +[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF +AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_ldflags_$4_$1])dnl +AC_CACHE_CHECK([whether the linker accepts $1], CACHEVAR, [ + ax_check_save_flags=$LDFLAGS + LDFLAGS="$LDFLAGS $4 $1" + AC_LINK_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], + [AS_VAR_SET(CACHEVAR,[yes])], + [AS_VAR_SET(CACHEVAR,[no])]) + LDFLAGS=$ax_check_save_flags]) +AS_VAR_IF(CACHEVAR,yes, + [m4_default([$2], :)], + [m4_default([$3], :)]) +AS_VAR_POPDEF([CACHEVAR])dnl +])dnl AX_CHECK_LINK_FLAGS diff --git a/ax_require_defined.m4 b/ax_require_defined.m4 new file mode 100644 index 0000000000..17c3eab7da --- /dev/null +++ b/ax_require_defined.m4 @@ -0,0 +1,37 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_require_defined.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_REQUIRE_DEFINED(MACRO) +# +# DESCRIPTION +# +# AX_REQUIRE_DEFINED is a simple helper for making sure other macros have +# been defined and thus are available for use. This avoids random issues +# where a macro isn't expanded. Instead the configure script emits a +# non-fatal: +# +# ./configure: line 1673: AX_CFLAGS_WARN_ALL: command not found +# +# It's like AC_REQUIRE except it doesn't expand the required macro. +# +# Here's an example: +# +# AX_REQUIRE_DEFINED([AX_CHECK_LINK_FLAG]) +# +# LICENSE +# +# Copyright (c) 2014 Mike Frysinger +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 2 + +AC_DEFUN([AX_REQUIRE_DEFINED], [dnl + m4_ifndef([$1], [m4_fatal([macro ]$1[ is not defined; is a m4 file missing?])]) +])dnl AX_REQUIRE_DEFINED diff --git a/ci-tests/test-spike b/ci-tests/test-spike new file mode 100755 index 0000000000..3d5ed6d79a --- /dev/null +++ b/ci-tests/test-spike @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +mkdir build +cd build +mkdir install +$DIR/../configure --prefix=`pwd`/install +make -j4 +make install diff --git a/config.h.in b/config.h.in index 137f195005..f5bbab1325 100644 --- a/config.h.in +++ b/config.h.in @@ -3,21 +3,33 @@ /* Define if building universal (internal helper macro) */ #undef AC_APPLE_UNIVERSAL_BUILD +/* Define if subproject MCPPBS_SPROJ_NORM is enabled */ +#undef CUSTOMEXT_ENABLED + /* Default value for --isa switch */ #undef DEFAULT_ISA -/* Path to the device-tree-compiler */ +/* Default value for --priv switch */ +#undef DEFAULT_PRIV + +/* Default value for --varch switch */ +#undef DEFAULT_VARCH + +/* Executable name of device-tree-compiler */ #undef DTC /* Define if subproject MCPPBS_SPROJ_NORM is enabled */ -#undef DUMMY_ROCC_ENABLED +#undef FDT_ENABLED + +/* Define if subproject MCPPBS_SPROJ_NORM is enabled */ +#undef FESVR_ENABLED + +/* Dynamic library loading is supported */ +#undef HAVE_DLOPEN /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H -/* Define to 1 if you have the `fesvr' library (-lfesvr). */ -#undef HAVE_LIBFESVR - /* Define to 1 if you have the `pthread' library (-lpthread). */ #undef HAVE_LIBPTHREAD diff --git a/configure b/configure index 015f63e965..7b9da7e86d 100755 --- a/configure +++ b/configure @@ -626,7 +626,8 @@ ac_subst_vars='LTLIBOBJS LIBOBJS subprojects_enabled subprojects -stow +HAVE_DLOPEN +HAVE_INT128 INSTALL_DATA INSTALL_SCRIPT INSTALL_PROGRAM @@ -702,7 +703,8 @@ enable_option_checking enable_stow enable_optional_subprojects with_isa -with_fesvr +with_priv +with_varch enable_commitlog enable_histogram enable_dirty @@ -1360,8 +1362,9 @@ Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-isa=RV64IMAFDC Sets the default RISC-V ISA - --with-fesvr path to your fesvr installation if not in a standard - location + --with-priv=MSU Sets the default RISC-V privilege modes supported + --with-varch=vlen:128,elen:64,slen:128 + Sets the default vector config Some influential environment variables: CC C compiler command @@ -1643,6 +1646,60 @@ $as_echo "$ac_res" >&6; } } # ac_fn_cxx_check_header_compile +# ac_fn_cxx_check_type LINENO TYPE VAR INCLUDES +# --------------------------------------------- +# Tests whether TYPE exists after having included INCLUDES, setting cache +# variable VAR accordingly. +ac_fn_cxx_check_type () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof ($2)) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof (($2))) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + +else + eval "$3=yes" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_cxx_check_type + # ac_fn_cxx_try_link LINENO # ------------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. @@ -2148,6 +2205,152 @@ case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_require_defined.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_REQUIRE_DEFINED(MACRO) +# +# DESCRIPTION +# +# AX_REQUIRE_DEFINED is a simple helper for making sure other macros have +# been defined and thus are available for use. This avoids random issues +# where a macro isn't expanded. Instead the configure script emits a +# non-fatal: +# +# ./configure: line 1673: AX_CFLAGS_WARN_ALL: command not found +# +# It's like AC_REQUIRE except it doesn't expand the required macro. +# +# Here's an example: +# +# AX_REQUIRE_DEFINED([AX_CHECK_LINK_FLAG]) +# +# LICENSE +# +# Copyright (c) 2014 Mike Frysinger +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 2 + + +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_append_flag.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_APPEND_FLAG(FLAG, [FLAGS-VARIABLE]) +# +# DESCRIPTION +# +# FLAG is appended to the FLAGS-VARIABLE shell variable, with a space +# added in between. +# +# If FLAGS-VARIABLE is not specified, the current language's flags (e.g. +# CFLAGS) is used. FLAGS-VARIABLE is not changed if it already contains +# FLAG. If FLAGS-VARIABLE is unset in the shell, it is set to exactly +# FLAG. +# +# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2011 Maarten Bosmans +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 8 + + +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_check_link_flag.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CHECK_LINK_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) +# +# DESCRIPTION +# +# Check whether the given FLAG works with the linker or gives an error. +# (Warnings, however, are ignored) +# +# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on +# success/failure. +# +# If EXTRA-FLAGS is defined, it is added to the linker's default flags +# when the check is done. The check is thus made with the flags: "LDFLAGS +# EXTRA-FLAGS FLAG". This can for example be used to force the linker to +# issue an error when a bad flag is given. +# +# INPUT gives an alternative input source to AC_LINK_IFELSE. +# +# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this +# macro in sync with AX_CHECK_{PREPROC,COMPILE}_FLAG. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2011 Maarten Bosmans +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 6 + + +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_append_link_flags.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_APPEND_LINK_FLAGS([FLAG1 FLAG2 ...], [FLAGS-VARIABLE], [EXTRA-FLAGS], [INPUT]) +# +# DESCRIPTION +# +# For every FLAG1, FLAG2 it is checked whether the linker works with the +# flag. If it does, the flag is added FLAGS-VARIABLE +# +# If FLAGS-VARIABLE is not specified, the linker's flags (LDFLAGS) is +# used. During the check the flag is always added to the linker's flags. +# +# If EXTRA-FLAGS is defined, it is added to the linker's default flags +# when the check is done. The check is thus made with the flags: "LDFLAGS +# EXTRA-FLAGS FLAG". This can for example be used to force the linker to +# issue an error when a bad flag is given. +# +# INPUT gives an alternative input source to AC_COMPILE_IFELSE. +# +# NOTE: This macro depends on the AX_APPEND_FLAG and AX_CHECK_LINK_FLAG. +# Please keep this macro in sync with AX_APPEND_COMPILE_FLAGS. +# +# LICENSE +# +# Copyright (c) 2011 Maarten Bosmans +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 7 + + + #------------------------------------------------------------------------- # Checks for programs #------------------------------------------------------------------------- @@ -3428,7 +3631,7 @@ if test x"$DTC" == xno; then : fi cat >>confdefs.h <<_ACEOF -#define DTC "$DTC" +#define DTC "dtc" _ACEOF @@ -4036,7 +4239,8 @@ fi $as_echo "$ac_cv_c_bigendian" >&6; } case $ac_cv_c_bigendian in #( yes) - as_fn_error $? "Spike requires a little-endian host" "$LINENO" 5;; #( + $as_echo "#define WORDS_BIGENDIAN 1" >>confdefs.h +;; #( no) ;; #( universal) @@ -4173,102 +4377,6 @@ fi - # Deterimine if native build and set prefix appropriately - - if test ${enable_stow} = "yes" ; then : - - for ac_prog in stow -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_stow+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$stow"; then - ac_cv_prog_stow="$stow" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_stow="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -stow=$ac_cv_prog_stow -if test -n "$stow"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $stow" >&5 -$as_echo "$stow" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$stow" && break -done -test -n "$stow" || stow="no" - - if test ${stow} = "no" ; then : - - as_fn_error $? "Cannot use --enable-stow since stow is not available" "$LINENO" 5 - -fi - - # Check if native or non-native build - - if test "${build}" = "${host}" ; then : - - - # build == host so this is a native build. Make sure --prefix not - # set and $STOW_PREFIX is set, then set prefix=$STOW_PREFIX. - - if test "${prefix}" = "NONE" && test -n "${STOW_PREFIX}" ; then : - - prefix="${STOW_PREFIX}" - { $as_echo "$as_me:${as_lineno-$LINENO}: Using \$STOW_PREFIX from environment" >&5 -$as_echo "$as_me: Using \$STOW_PREFIX from environment" >&6;} - { $as_echo "$as_me:${as_lineno-$LINENO}: prefix=${prefix}" >&5 -$as_echo "$as_me: prefix=${prefix}" >&6;} - -fi - - -else - - - # build != host so this is a non-native build. Make sure --prefix - # not set and $STOW_ROOT is set, then set - # prefix=$STOW_ROOT/${host_alias}. - - if test "${prefix}" = "NONE" && test -n "${STOW_ROOT}" ; then : - - prefix="${STOW_ROOT}/${host_alias}" - { $as_echo "$as_me:${as_lineno-$LINENO}: Using \$STOW_ROOT from environment" >&5 -$as_echo "$as_me: Using \$STOW_ROOT from environment" >&6;} - { $as_echo "$as_me:${as_lineno-$LINENO}: prefix=${prefix}" >&5 -$as_echo "$as_me: prefix=${prefix}" >&6;} - -fi - - -fi - - -fi - - #------------------------------------------------------------------------- # Checks for header files @@ -4387,13 +4495,96 @@ $as_echo "#define STDC_HEADERS 1" >>confdefs.h fi +#------------------------------------------------------------------------- +# Checks for type +#------------------------------------------------------------------------- + +ac_fn_cxx_check_type "$LINENO" "__int128_t" "ac_cv_type___int128_t" "$ac_includes_default" +if test "x$ac_cv_type___int128_t" = xyes; then : + HAVE_INT128=yes + +fi + + #------------------------------------------------------------------------- # Default compiler flags #------------------------------------------------------------------------- -CFLAGS="-Wall -Wno-unused -g -O2" -CXXFLAGS="-Wall -Wno-unused -g -O2 -std=c++11" + + + +for flag in -Wl,--export-dynamic; do + as_CACHEVAR=`$as_echo "ax_cv_check_ldflags__$flag" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the linker accepts $flag" >&5 +$as_echo_n "checking whether the linker accepts $flag... " >&6; } +if eval \${$as_CACHEVAR+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$LDFLAGS + LDFLAGS="$LDFLAGS $flag" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + eval "$as_CACHEVAR=yes" +else + eval "$as_CACHEVAR=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$ax_check_save_flags +fi +eval ac_res=\$$as_CACHEVAR + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +if eval test \"x\$"$as_CACHEVAR"\" = x"yes"; then : + +if ${LDFLAGS+:} false; then : + + case " $LDFLAGS " in #( + *" $flag "*) : + { { $as_echo "$as_me:${as_lineno-$LINENO}: : LDFLAGS already contains \$flag"; } >&5 + (: LDFLAGS already contains $flag) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } ;; #( + *) : + + as_fn_append LDFLAGS " $flag" + { { $as_echo "$as_me:${as_lineno-$LINENO}: : LDFLAGS=\"\$LDFLAGS\""; } >&5 + (: LDFLAGS="$LDFLAGS") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + ;; +esac + +else + + LDFLAGS=$flag + { { $as_echo "$as_me:${as_lineno-$LINENO}: : LDFLAGS=\"\$LDFLAGS\""; } >&5 + (: LDFLAGS="$LDFLAGS") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + +fi + +else + : +fi + +done #------------------------------------------------------------------------- @@ -4438,6 +4629,98 @@ fi + # Add subproject to our running list + + subprojects="$subprojects fesvr" + + # Process the subproject appropriately. If enabled add it to the + # $enabled_subprojects running shell variable, set a + # SUBPROJECT_ENABLED C define, and include the appropriate + # 'subproject.ac'. + + + { $as_echo "$as_me:${as_lineno-$LINENO}: configuring default subproject : fesvr" >&5 +$as_echo "$as_me: configuring default subproject : fesvr" >&6;} + ac_config_files="$ac_config_files fesvr.mk:fesvr/fesvr.mk.in" + + enable_fesvr_sproj="yes" + subprojects_enabled="$subprojects_enabled fesvr" + +$as_echo "#define FESVR_ENABLED /**/" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 +$as_echo_n "checking for pthread_create in -lpthread... " >&6; } +if ${ac_cv_lib_pthread_pthread_create+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpthread $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_create (); +int +main () +{ +return pthread_create (); + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + ac_cv_lib_pthread_pthread_create=yes +else + ac_cv_lib_pthread_pthread_create=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 +$as_echo "$ac_cv_lib_pthread_pthread_create" >&6; } +if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBPTHREAD 1 +_ACEOF + + LIBS="-lpthread $LIBS" + +else + as_fn_error $? "libpthread is required" "$LINENO" 5 +fi + + + + + + + # Determine if this is a required or an optional subproject + + + + # Determine if there is a group with the same name + + + + # Create variations of the subproject name suitable for use as a CPP + # enabled define, a shell enabled variable, and a shell function + + + + + + + + + + + # Add subproject to our running list subprojects="$subprojects riscv" @@ -4481,6 +4764,41 @@ _ACEOF fi + +# Check whether --with-priv was given. +if test "${with_priv+set}" = set; then : + withval=$with_priv; +cat >>confdefs.h <<_ACEOF +#define DEFAULT_PRIV "$withval" +_ACEOF + +else + +cat >>confdefs.h <<_ACEOF +#define DEFAULT_PRIV "MSU" +_ACEOF + +fi + + + +# Check whether --with-varch was given. +if test "${with_varch+set}" = set; then : + withval=$with_varch; +cat >>confdefs.h <<_ACEOF +#define DEFAULT_VARCH "$withval" +_ACEOF + +else + +cat >>confdefs.h <<_ACEOF +#define DEFAULT_VARCH "vlen:128,elen:64,slen:128" +_ACEOF + +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing dlopen" >&5 $as_echo_n "checking for library containing dlopen... " >&6; } if ${ac_cv_search_dlopen+:} false; then : @@ -4535,69 +4853,12 @@ ac_res=$ac_cv_search_dlopen if test "$ac_res" != no; then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" -else - - as_fn_error $? "unable to find the dlopen() function" "$LINENO" 5 - -fi - - -# Check whether --with-fesvr was given. -if test "${with_fesvr+set}" = set; then : - withval=$with_fesvr; - LDFLAGS="-L$withval/lib $LDFLAGS" - CPPFLAGS="-I$withval/include $CPPFLAGS" - - -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for libfesvr_is_present in -lfesvr" >&5 -$as_echo_n "checking for libfesvr_is_present in -lfesvr... " >&6; } -if ${ac_cv_lib_fesvr_libfesvr_is_present+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-lfesvr -pthread $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char libfesvr_is_present (); -int -main () -{ -return libfesvr_is_present (); - ; - return 0; -} -_ACEOF -if ac_fn_cxx_try_link "$LINENO"; then : - ac_cv_lib_fesvr_libfesvr_is_present=yes -else - ac_cv_lib_fesvr_libfesvr_is_present=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_fesvr_libfesvr_is_present" >&5 -$as_echo "$ac_cv_lib_fesvr_libfesvr_is_present" >&6; } -if test "x$ac_cv_lib_fesvr_libfesvr_is_present" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_LIBFESVR 1 -_ACEOF +$as_echo "#define HAVE_DLOPEN /**/" >>confdefs.h +, + HAVE_DLOPEN=yes - LIBS="-lfesvr $LIBS" -else - as_fn_error $? "libfesvr is required" "$LINENO" 5 fi @@ -4728,7 +4989,7 @@ fi # Add subproject to our running list - subprojects="$subprojects dummy_rocc" + subprojects="$subprojects disasm" # Process the subproject appropriately. If enabled add it to the # $enabled_subprojects running shell variable, set a @@ -4736,14 +4997,104 @@ fi # 'subproject.ac'. - { $as_echo "$as_me:${as_lineno-$LINENO}: configuring default subproject : dummy_rocc" >&5 -$as_echo "$as_me: configuring default subproject : dummy_rocc" >&6;} - ac_config_files="$ac_config_files dummy_rocc.mk:dummy_rocc/dummy_rocc.mk.in" + { $as_echo "$as_me:${as_lineno-$LINENO}: configuring default subproject : disasm" >&5 +$as_echo "$as_me: configuring default subproject : disasm" >&6;} + ac_config_files="$ac_config_files disasm.mk:disasm/disasm.mk.in" - enable_dummy_rocc_sproj="yes" - subprojects_enabled="$subprojects_enabled dummy_rocc" + enable_disasm_sproj="yes" + subprojects_enabled="$subprojects_enabled disasm" -$as_echo "#define DUMMY_ROCC_ENABLED /**/" >>confdefs.h +$as_echo "#define DISASM_ENABLED /**/" >>confdefs.h + + + + + + + # Determine if this is a required or an optional subproject + + + + # Determine if there is a group with the same name + + + + # Create variations of the subproject name suitable for use as a CPP + # enabled define, a shell enabled variable, and a shell function + + + + + + + + + + + + # Add subproject to our running list + + subprojects="$subprojects customext" + + # Process the subproject appropriately. If enabled add it to the + # $enabled_subprojects running shell variable, set a + # SUBPROJECT_ENABLED C define, and include the appropriate + # 'subproject.ac'. + + + { $as_echo "$as_me:${as_lineno-$LINENO}: configuring default subproject : customext" >&5 +$as_echo "$as_me: configuring default subproject : customext" >&6;} + ac_config_files="$ac_config_files customext.mk:customext/customext.mk.in" + + enable_customext_sproj="yes" + subprojects_enabled="$subprojects_enabled customext" + +$as_echo "#define CUSTOMEXT_ENABLED /**/" >>confdefs.h + + + + + + + # Determine if this is a required or an optional subproject + + + + # Determine if there is a group with the same name + + + + # Create variations of the subproject name suitable for use as a CPP + # enabled define, a shell enabled variable, and a shell function + + + + + + + + + + + + # Add subproject to our running list + + subprojects="$subprojects fdt" + + # Process the subproject appropriately. If enabled add it to the + # $enabled_subprojects running shell variable, set a + # SUBPROJECT_ENABLED C define, and include the appropriate + # 'subproject.ac'. + + + { $as_echo "$as_me:${as_lineno-$LINENO}: configuring default subproject : fdt" >&5 +$as_echo "$as_me: configuring default subproject : fdt" >&6;} + ac_config_files="$ac_config_files fdt.mk:fdt/fdt.mk.in" + + enable_fdt_sproj="yes" + subprojects_enabled="$subprojects_enabled fdt" + +$as_echo "#define FDT_ENABLED /**/" >>confdefs.h @@ -4840,6 +5191,51 @@ $as_echo "#define SPIKE_MAIN_ENABLED /**/" >>confdefs.h + # Determine if this is a required or an optional subproject + + + + # Determine if there is a group with the same name + + + + # Create variations of the subproject name suitable for use as a CPP + # enabled define, a shell enabled variable, and a shell function + + + + + + + + + + + + # Add subproject to our running list + + subprojects="$subprojects spike_dasm" + + # Process the subproject appropriately. If enabled add it to the + # $enabled_subprojects running shell variable, set a + # SUBPROJECT_ENABLED C define, and include the appropriate + # 'subproject.ac'. + + + { $as_echo "$as_me:${as_lineno-$LINENO}: configuring default subproject : spike_dasm" >&5 +$as_echo "$as_me: configuring default subproject : spike_dasm" >&6;} + ac_config_files="$ac_config_files spike_dasm.mk:spike_dasm/spike_dasm.mk.in" + + enable_spike_dasm_sproj="yes" + subprojects_enabled="$subprojects_enabled spike_dasm" + +$as_echo "#define SPIKE_DASM_ENABLED /**/" >>confdefs.h + + + + + + # Output make variables @@ -4866,15 +5262,9 @@ ac_config_headers="$ac_config_headers config.h" ac_config_files="$ac_config_files Makefile" -ac_config_files="$ac_config_files riscv-spike.pc" - -ac_config_files="$ac_config_files riscv-riscv.pc" - -ac_config_files="$ac_config_files riscv-softfloat.pc" - -ac_config_files="$ac_config_files riscv-dummy_rocc.pc" +ac_config_files="$ac_config_files riscv-fesvr.pc" -ac_config_files="$ac_config_files riscv-spike_main.pc" +ac_config_files="$ac_config_files riscv-disasm.pc" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure @@ -5567,17 +5957,18 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 for ac_config_target in $ac_config_targets do case $ac_config_target in + "fesvr.mk") CONFIG_FILES="$CONFIG_FILES fesvr.mk:fesvr/fesvr.mk.in" ;; "riscv.mk") CONFIG_FILES="$CONFIG_FILES riscv.mk:riscv/riscv.mk.in" ;; - "dummy_rocc.mk") CONFIG_FILES="$CONFIG_FILES dummy_rocc.mk:dummy_rocc/dummy_rocc.mk.in" ;; + "disasm.mk") CONFIG_FILES="$CONFIG_FILES disasm.mk:disasm/disasm.mk.in" ;; + "customext.mk") CONFIG_FILES="$CONFIG_FILES customext.mk:customext/customext.mk.in" ;; + "fdt.mk") CONFIG_FILES="$CONFIG_FILES fdt.mk:fdt/fdt.mk.in" ;; "softfloat.mk") CONFIG_FILES="$CONFIG_FILES softfloat.mk:softfloat/softfloat.mk.in" ;; "spike_main.mk") CONFIG_FILES="$CONFIG_FILES spike_main.mk:spike_main/spike_main.mk.in" ;; + "spike_dasm.mk") CONFIG_FILES="$CONFIG_FILES spike_dasm.mk:spike_dasm/spike_dasm.mk.in" ;; "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; - "riscv-spike.pc") CONFIG_FILES="$CONFIG_FILES riscv-spike.pc" ;; - "riscv-riscv.pc") CONFIG_FILES="$CONFIG_FILES riscv-riscv.pc" ;; - "riscv-softfloat.pc") CONFIG_FILES="$CONFIG_FILES riscv-softfloat.pc" ;; - "riscv-dummy_rocc.pc") CONFIG_FILES="$CONFIG_FILES riscv-dummy_rocc.pc" ;; - "riscv-spike_main.pc") CONFIG_FILES="$CONFIG_FILES riscv-spike_main.pc" ;; + "riscv-fesvr.pc") CONFIG_FILES="$CONFIG_FILES riscv-fesvr.pc" ;; + "riscv-disasm.pc") CONFIG_FILES="$CONFIG_FILES riscv-disasm.pc" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac diff --git a/configure.ac b/configure.ac index e361877da2..b7788b4ea9 100644 --- a/configure.ac +++ b/configure.ac @@ -43,6 +43,11 @@ AC_CONFIG_AUX_DIR([scripts]) AC_CANONICAL_BUILD AC_CANONICAL_HOST +m4_include(ax_require_defined.m4) +m4_include(ax_append_flag.m4) +m4_include(ax_check_link_flag.m4) +m4_include(ax_append_link_flags.m4) + #------------------------------------------------------------------------- # Checks for programs #------------------------------------------------------------------------- @@ -53,9 +58,9 @@ AC_CHECK_TOOL([AR],[ar]) AC_CHECK_TOOL([RANLIB],[ranlib]) AC_PATH_PROG([DTC],[dtc],[no]) AS_IF([test x"$DTC" == xno],AC_MSG_ERROR([device-tree-compiler not found])) -AC_DEFINE_UNQUOTED(DTC, ["$DTC"], [Path to the device-tree-compiler]) +AC_DEFINE_UNQUOTED(DTC, ["dtc"], [Executable name of device-tree-compiler]) -AC_C_BIGENDIAN(AC_MSG_ERROR([Spike requires a little-endian host])) +AC_C_BIGENDIAN #------------------------------------------------------------------------- # MCPPBS specific program checks @@ -72,12 +77,17 @@ MCPPBS_PROG_INSTALL AC_HEADER_STDC +#------------------------------------------------------------------------- +# Checks for type +#------------------------------------------------------------------------- + +AC_CHECK_TYPE([__int128_t], AC_SUBST([HAVE_INT128],[yes])) + #------------------------------------------------------------------------- # Default compiler flags #------------------------------------------------------------------------- -AC_SUBST([CFLAGS], ["-Wall -Wno-unused -g -O2"]) -AC_SUBST([CXXFLAGS],["-Wall -Wno-unused -g -O2 -std=c++11"]) +AX_APPEND_LINK_FLAGS([-Wl,--export-dynamic]) #------------------------------------------------------------------------- # MCPPBS subproject list @@ -86,7 +96,7 @@ AC_SUBST([CXXFLAGS],["-Wall -Wno-unused -g -O2 -std=c++11"]) # The '*' suffix indicates an optional subproject. The '**' suffix # indicates an optional subproject which is also the name of a group. -MCPPBS_SUBPROJECTS([ riscv, dummy_rocc, softfloat, spike_main ]) +MCPPBS_SUBPROJECTS([ fesvr, riscv, disasm, customext, fdt, softfloat, spike_main, spike_dasm ]) #------------------------------------------------------------------------- # MCPPBS subproject groups @@ -105,9 +115,6 @@ MCPPBS_SUBPROJECTS([ riscv, dummy_rocc, softfloat, spike_main ]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_FILES([Makefile]) -AC_CONFIG_FILES([riscv-spike.pc]) -AC_CONFIG_FILES([riscv-riscv.pc]) -AC_CONFIG_FILES([riscv-softfloat.pc]) -AC_CONFIG_FILES([riscv-dummy_rocc.pc]) -AC_CONFIG_FILES([riscv-spike_main.pc]) +AC_CONFIG_FILES([riscv-fesvr.pc]) +AC_CONFIG_FILES([riscv-disasm.pc]) AC_OUTPUT diff --git a/customext/cflush.cc b/customext/cflush.cc new file mode 100644 index 0000000000..dedcc03763 --- /dev/null +++ b/customext/cflush.cc @@ -0,0 +1,41 @@ +#include "extension.h" +#include + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rs1()]; + } +} xrs1; + +static reg_t custom_cflush(processor_t* p, insn_t insn, reg_t pc) +{ + require_privilege(PRV_M); + + return pc + 4; \ +} + +class cflush_t : public extension_t +{ + public: + const char* name() { return "cflush"; } + + cflush_t() {} + + std::vector get_instructions() { + std::vector insns; + insns.push_back((insn_desc_t){0xFC000073, 0xFFF07FFF, custom_cflush, custom_cflush}); + insns.push_back((insn_desc_t){0xFC200073, 0xFFF07FFF, custom_cflush, custom_cflush}); + insns.push_back((insn_desc_t){0xFC100073, 0xFFF07FFF, custom_cflush, custom_cflush}); + return insns; + } + + std::vector get_disasms() { + std::vector insns; + insns.push_back(new disasm_insn_t("cflush.d.l1", 0xFC000073, 0xFFF07FFF, {&xrs1})); + insns.push_back(new disasm_insn_t("cdiscard.d.l1", 0xFC200073, 0xFFF07FFF, {&xrs1})); + insns.push_back(new disasm_insn_t("cflush.i.l1", 0xFC100073, 0xFFF07FFF, {&xrs1})); + return insns; + } +}; + +REGISTER_EXTENSION(cflush, []() { return new cflush_t; }) diff --git a/dummy_rocc/dummy_rocc.ac b/customext/customext.ac similarity index 100% rename from dummy_rocc/dummy_rocc.ac rename to customext/customext.ac diff --git a/customext/customext.mk.in b/customext/customext.mk.in new file mode 100644 index 0000000000..0dd725ef2c --- /dev/null +++ b/customext/customext.mk.in @@ -0,0 +1,12 @@ +customext_subproject_deps = \ + spike_main \ + riscv \ + softfloat \ + +customext_srcs = \ + dummy_rocc.cc \ + cflush.cc \ + +customext_CFLAGS = -fPIC + +customext_install_shared_lib = yes diff --git a/dummy_rocc/dummy_rocc.cc b/customext/dummy_rocc.cc similarity index 100% rename from dummy_rocc/dummy_rocc.cc rename to customext/dummy_rocc.cc diff --git a/dummy_rocc/dummy_rocc_test.c b/customext/dummy_rocc_test.c similarity index 100% rename from dummy_rocc/dummy_rocc_test.c rename to customext/dummy_rocc_test.c diff --git a/debug_rom/debug_rom.S b/debug_rom/debug_rom.S index 28c7076fda..8d8e4cd037 100755 --- a/debug_rom/debug_rom.S +++ b/debug_rom/debug_rom.S @@ -14,6 +14,7 @@ entry: jal zero, _entry resume: + // Not used. jal zero, _resume exception: jal zero, _exception @@ -37,16 +38,22 @@ entry_loop: csrr s0, CSR_MHARTID lbu s0, DEBUG_ROM_FLAGS(s0) // multiple harts can resume here andi s0, s0, (1 << DEBUG_ROM_FLAG_RESUME) - bnez s0, resume + bnez s0, _resume + wfi jal zero, entry_loop _exception: + // Restore S0, which we always save to dscratch. + // We need this in case the user tried an abstract write to a + // non-existent CSR. + csrr s0, CSR_DSCRATCH sw zero, DEBUG_ROM_EXCEPTION(zero) // Let debug module know you got an exception. ebreak going: + csrr s0, CSR_MHARTID + sw s0, DEBUG_ROM_GOING(zero) // When debug module sees this write, the GO flag is reset. csrr s0, CSR_DSCRATCH // Restore s0 here - sw zero, DEBUG_ROM_GOING(zero) // When debug module sees this write, the GO flag is reset. fence fence.i jalr zero, zero, %lo(whereto) // Debug module will put different instructions and data in the RAM, diff --git a/debug_rom/debug_rom.h b/debug_rom/debug_rom.h index d21e1669c7..7edd5f68f9 100644 --- a/debug_rom/debug_rom.h +++ b/debug_rom/debug_rom.h @@ -1,12 +1,13 @@ static const unsigned char debug_rom_raw[] = { - 0x6f, 0x00, 0xc0, 0x00, 0x6f, 0x00, 0x40, 0x05, 0x6f, 0x00, 0x40, 0x03, + 0x6f, 0x00, 0xc0, 0x00, 0x6f, 0x00, 0x00, 0x06, 0x6f, 0x00, 0x80, 0x03, 0x0f, 0x00, 0xf0, 0x0f, 0x73, 0x10, 0x24, 0x7b, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x20, 0x80, 0x10, 0x03, 0x44, 0x04, 0x40, 0x13, 0x74, 0x14, 0x00, - 0x63, 0x10, 0x04, 0x02, 0x73, 0x24, 0x40, 0xf1, 0x03, 0x44, 0x04, 0x40, - 0x13, 0x74, 0x24, 0x00, 0xe3, 0x18, 0x04, 0xfc, 0x6f, 0xf0, 0xdf, 0xfd, - 0x23, 0x26, 0x00, 0x10, 0x73, 0x00, 0x10, 0x00, 0x73, 0x24, 0x20, 0x7b, - 0x23, 0x22, 0x00, 0x10, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x10, 0x00, 0x00, + 0x63, 0x14, 0x04, 0x02, 0x73, 0x24, 0x40, 0xf1, 0x03, 0x44, 0x04, 0x40, + 0x13, 0x74, 0x24, 0x00, 0x63, 0x18, 0x04, 0x02, 0x73, 0x00, 0x50, 0x10, + 0x6f, 0xf0, 0x9f, 0xfd, 0x73, 0x24, 0x20, 0x7b, 0x23, 0x26, 0x00, 0x10, + 0x73, 0x00, 0x10, 0x00, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x22, 0x80, 0x10, + 0x73, 0x24, 0x20, 0x7b, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x10, 0x00, 0x00, 0x67, 0x00, 0x00, 0x30, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x24, 0x80, 0x10, 0x73, 0x24, 0x20, 0x7b, 0x73, 0x00, 0x20, 0x7b }; -static const unsigned int debug_rom_raw_len = 104; +static const unsigned int debug_rom_raw_len = 116; diff --git a/disasm/disasm.ac b/disasm/disasm.ac new file mode 100644 index 0000000000..e69de29bb2 diff --git a/disasm/disasm.cc b/disasm/disasm.cc new file mode 100644 index 0000000000..fbb889775a --- /dev/null +++ b/disasm/disasm.cc @@ -0,0 +1,1553 @@ +// See LICENSE for license details. + +#include "disasm.h" +#include +#include +#include +#include +#include +#include + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.i_imm()) + '(' + xpr_name[insn.rs1()] + ')'; + } +} load_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.s_imm()) + '(' + xpr_name[insn.rs1()] + ')'; + } +} store_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string("(") + xpr_name[insn.rs1()] + ')'; + } +} amo_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rd()]; + } +} xrd; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rs1()]; + } +} xrs1; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rs2()]; + } +} xrs2; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return fpr_name[insn.rd()]; + } +} frd; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return fpr_name[insn.rs1()]; + } +} frs1; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return fpr_name[insn.rs2()]; + } +} frs2; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return fpr_name[insn.rs3()]; + } +} frs3; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + switch (insn.csr()) + { + #define DECLARE_CSR(name, num) case num: return #name; + #include "encoding.h" + #undef DECLARE_CSR + default: + { + char buf[16]; + snprintf(buf, sizeof buf, "unknown_%03" PRIx64, insn.csr()); + return std::string(buf); + } + } + } +} csr; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.i_imm()); + } +} imm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.shamt()); + } +} shamt; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + s << std::hex << "0x" << ((uint32_t)insn.u_imm() >> 12); + return s.str(); + } +} bigimm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string(insn.rs1()); + } +} zimm5; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + int32_t target = insn.sb_imm(); + char sign = target >= 0 ? '+' : '-'; + s << "pc " << sign << ' ' << abs(target); + return s.str(); + } +} branch_target; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + int32_t target = insn.uj_imm(); + char sign = target >= 0 ? '+' : '-'; + s << "pc " << sign << std::hex << " 0x" << abs(target); + return s.str(); + } +} jump_target; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rvc_rs1()]; + } +} rvc_rs1; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rvc_rs2()]; + } +} rvc_rs2; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return fpr_name[insn.rvc_rs2()]; + } +} rvc_fp_rs2; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rvc_rs1s()]; + } +} rvc_rs1s; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rvc_rs2s()]; + } +} rvc_rs2s; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return fpr_name[insn.rvc_rs2s()]; + } +} rvc_fp_rs2s; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[X_SP]; + } +} rvc_sp; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_imm()); + } +} rvc_imm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_addi4spn_imm()); + } +} rvc_addi4spn_imm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_addi16sp_imm()); + } +} rvc_addi16sp_imm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_lwsp_imm()); + } +} rvc_lwsp_imm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)(insn.rvc_imm() & 0x3f)); + } +} rvc_shamt; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + s << std::hex << "0x" << ((uint32_t)insn.rvc_imm() << 12 >> 12); + return s.str(); + } +} rvc_uimm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_lwsp_imm()) + '(' + xpr_name[X_SP] + ')'; + } +} rvc_lwsp_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_ldsp_imm()) + '(' + xpr_name[X_SP] + ')'; + } +} rvc_ldsp_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_swsp_imm()) + '(' + xpr_name[X_SP] + ')'; + } +} rvc_swsp_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_sdsp_imm()) + '(' + xpr_name[X_SP] + ')'; + } +} rvc_sdsp_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_lw_imm()) + '(' + xpr_name[insn.rvc_rs1s()] + ')'; + } +} rvc_lw_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_ld_imm()) + '(' + xpr_name[insn.rvc_rs1s()] + ')'; + } +} rvc_ld_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + int32_t target = insn.rvc_b_imm(); + char sign = target >= 0 ? '+' : '-'; + s << "pc " << sign << ' ' << abs(target); + return s.str(); + } +} rvc_branch_target; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + int32_t target = insn.rvc_j_imm(); + char sign = target >= 0 ? '+' : '-'; + s << "pc " << sign << ' ' << abs(target); + return s.str(); + } +} rvc_jump_target; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string("(") + xpr_name[insn.rs1()] + ')'; + } +} v_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return vr_name[insn.rd()]; + } +} vd; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return vr_name[insn.rs1()]; + } +} vs1; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return vr_name[insn.rs2()]; + } +} vs2; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return vr_name[insn.rd()]; + } +} vs3; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return insn.v_vm() ? "" : "v0.t"; + } +} vm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return "v0"; + } +} v0; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.v_simm5()); + } +} v_simm5; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + int sew = insn.v_sew(); + int lmul = insn.v_lmul(); + auto vta = insn.v_vta() == 1 ? "ta" : "tu"; + auto vma = insn.v_vma() == 1 ? "ma" : "mu"; + s << "e" << sew; + if(insn.v_frac_lmul()) { + std::string lmul_str = ""; + switch(lmul){ + case 3: + lmul_str = "f2"; + break; + case 2: + lmul_str = "f4"; + break; + case 1: + lmul_str = "f8"; + break; + default: + assert(true && "unsupport fractional LMUL"); + } + s << ", m" << lmul_str; + } else { + s << ", m" << (1 << lmul); + } + s << ", " << vta << ", " << vma; + return s.str(); + } +} v_vtype; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return "x0"; + } +} x0; + +// Xpulpimg + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_zimm5()); + } +} p_zimm5; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.p_simm5()); + } +} p_simm5; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_zimm6()); + } +} p_zimm6; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.p_simm6()); + } +} p_simm6; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.i_imm()) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} load_address_irpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.rs2()]) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} load_address_rrpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.rs2()]) + '(' + xpr_name[insn.rs1()] + ')'; + } +} load_address_rr; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.s_imm()) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} store_address_irpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.p_rs3()]) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} store_address_rrpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.p_rs3()]) + '(' + xpr_name[insn.rs1()] + ')'; + } +} store_address_rr; + + +typedef struct { + reg_t match; + reg_t mask; + const char *fmt; + std::vector& arg; +} custom_fmt_t; + +std::string disassembler_t::disassemble(insn_t insn) const +{ + const disasm_insn_t* disasm_insn = lookup(insn); + return disasm_insn ? disasm_insn->to_string(insn) : "unknown"; +} + +disassembler_t::disassembler_t(int xlen) +{ + const uint32_t mask_rd = 0x1fUL << 7; + const uint32_t match_rd_ra = 1UL << 7; + const uint32_t mask_rs1 = 0x1fUL << 15; + const uint32_t match_rs1_ra = 1UL << 15; + const uint32_t mask_rs2 = 0x1fUL << 20; + const uint32_t mask_imm = 0xfffUL << 20; + const uint32_t match_imm_1 = 1UL << 20; + const uint32_t mask_rvc_rs2 = 0x1fUL << 2; + const uint32_t mask_rvc_imm = mask_rvc_rs2 | 0x1000UL; + const uint32_t mask_nf = 0x7Ul << 29; + const uint32_t mask_wd = 0x1Ul << 26; + const uint32_t mask_vm = 0x1Ul << 25; + const uint32_t mask_vldst = 0x7Ul << 12 | 0x1UL << 28; + const uint32_t mask_amoop = 0x1fUl << 27; + const uint32_t mask_width = 0x7Ul << 12; + + #define DECLARE_INSN(code, match, mask) \ + const uint32_t match_##code = match; \ + const uint32_t mask_##code = mask; + #include "encoding.h" + #undef DECLARE_INSN + + // explicit per-instruction disassembly + #define DISASM_INSN(name, code, extra, ...) \ + add_insn(new disasm_insn_t(name, match_##code, mask_##code | (extra), __VA_ARGS__)); + #define DEFINE_NOARG(code) \ + add_insn(new disasm_insn_t(#code, match_##code, mask_##code, {})); + #define DEFINE_RTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &xrs2}) + #define DEFINE_R1TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1}) + #define DEFINE_ITYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &imm}) + #define DEFINE_ITYPE_SHIFT(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &shamt}) + #define DEFINE_I0TYPE(name, code) DISASM_INSN(name, code, mask_rs1, {&xrd, &imm}) + #define DEFINE_I1TYPE(name, code) DISASM_INSN(name, code, mask_imm, {&xrd, &xrs1}) + #define DEFINE_I2TYPE(name, code) DISASM_INSN(name, code, mask_rd | mask_imm, {&xrs1}) + #define DEFINE_LTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &bigimm}) + #define DEFINE_BTYPE(code) DISASM_INSN(#code, code, 0, {&xrs1, &xrs2, &branch_target}) + #define DEFINE_B0TYPE(name, code) DISASM_INSN(name, code, mask_rs1 | mask_rs2, {&branch_target}) + #define DEFINE_B1TYPE(name, code) DISASM_INSN(name, code, mask_rs2, {&xrs1, &branch_target}) + #define DEFINE_XLOAD(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address}) + #define DEFINE_XSTORE(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address}) + #define DEFINE_XAMO(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs2, &amo_address}) + #define DEFINE_XAMO_LR(code) DISASM_INSN(#code, code, 0, {&xrd, &amo_address}) + #define DEFINE_FLOAD(code) DISASM_INSN(#code, code, 0, {&frd, &load_address}) + #define DEFINE_FSTORE(code) DISASM_INSN(#code, code, 0, {&frs2, &store_address}) + #define DEFINE_FRTYPE(code) DISASM_INSN(#code, code, 0, {&frd, &frs1, &frs2}) + #define DEFINE_FR1TYPE(code) DISASM_INSN(#code, code, 0, {&frd, &frs1}) + #define DEFINE_FR3TYPE(code) DISASM_INSN(#code, code, 0, {&frd, &frs1, &frs2, &frs3}) + #define DEFINE_FXTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &frs1}) + #define DEFINE_FX2TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &frs1, &frs2}) + #define DEFINE_XFTYPE(code) DISASM_INSN(#code, code, 0, {&frd, &xrs1}) + #define DEFINE_SFENCE_TYPE(code) DISASM_INSN(#code, code, 0, {&xrs1, &xrs2}) + // Xpulpimg + #define DEFINE_PLOAD_IRPOST(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address_irpost}) + #define DEFINE_PLOAD_RRPOST(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address_rrpost}) + #define DEFINE_PLOAD_RR(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address_rr}) + #define DEFINE_PSTORE_IRPOST(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_irpost}) + #define DEFINE_PSTORE_RRPOST(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_rrpost}) + #define DEFINE_PSTORE_RR(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_rr}) + #define DEFINE_PI0TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm5}) + #define DEFINE_PI1ZTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm6}) + #define DEFINE_PI1STYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm6}) + #define DEFINE_PBTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm5, &branch_target}) + + DEFINE_XLOAD(lb) + DEFINE_XLOAD(lbu) + DEFINE_XLOAD(lh) + DEFINE_XLOAD(lhu) + DEFINE_XLOAD(lw) + DEFINE_XLOAD(lwu) + DEFINE_XLOAD(ld) + + DEFINE_XSTORE(sb) + DEFINE_XSTORE(sh) + DEFINE_XSTORE(sw) + DEFINE_XSTORE(sd) + + DEFINE_XAMO(amoadd_w) + DEFINE_XAMO(amoswap_w) + DEFINE_XAMO(amoand_w) + DEFINE_XAMO(amoor_w) + DEFINE_XAMO(amoxor_w) + DEFINE_XAMO(amomin_w) + DEFINE_XAMO(amomax_w) + DEFINE_XAMO(amominu_w) + DEFINE_XAMO(amomaxu_w) + DEFINE_XAMO(amoadd_d) + DEFINE_XAMO(amoswap_d) + DEFINE_XAMO(amoand_d) + DEFINE_XAMO(amoor_d) + DEFINE_XAMO(amoxor_d) + DEFINE_XAMO(amomin_d) + DEFINE_XAMO(amomax_d) + DEFINE_XAMO(amominu_d) + DEFINE_XAMO(amomaxu_d) + + DEFINE_XAMO_LR(lr_w) + DEFINE_XAMO(sc_w) + DEFINE_XAMO_LR(lr_d) + DEFINE_XAMO(sc_d) + + DEFINE_FLOAD(flw) + DEFINE_FLOAD(fld) + DEFINE_FLOAD(flh) + DEFINE_FLOAD(flq) + + DEFINE_FSTORE(fsw) + DEFINE_FSTORE(fsd) + DEFINE_FSTORE(fsh) + DEFINE_FSTORE(fsq) + + add_insn(new disasm_insn_t("j", match_jal, mask_jal | mask_rd, {&jump_target})); + add_insn(new disasm_insn_t("jal", match_jal | match_rd_ra, mask_jal | mask_rd, {&jump_target})); + add_insn(new disasm_insn_t("jal", match_jal, mask_jal, {&xrd, &jump_target})); + + DEFINE_B1TYPE("beqz", beq); + DEFINE_B1TYPE("bnez", bne); + DEFINE_B1TYPE("bltz", blt); + DEFINE_B1TYPE("bgez", bge); + DEFINE_BTYPE(beq) + DEFINE_BTYPE(bne) + DEFINE_BTYPE(blt) + DEFINE_BTYPE(bge) + DEFINE_BTYPE(bltu) + DEFINE_BTYPE(bgeu) + + DEFINE_LTYPE(lui); + DEFINE_LTYPE(auipc); + + add_insn(new disasm_insn_t("ret", match_jalr | match_rs1_ra, mask_jalr | mask_rd | mask_rs1 | mask_imm, {})); + DEFINE_I2TYPE("jr", jalr); + add_insn(new disasm_insn_t("jalr", match_jalr | match_rd_ra, mask_jalr | mask_rd | mask_imm, {&xrs1})); + DEFINE_ITYPE(jalr); + + add_insn(new disasm_insn_t("nop", match_addi, mask_addi | mask_rd | mask_rs1 | mask_imm, {})); + DEFINE_I0TYPE("li", addi); + DEFINE_I1TYPE("mv", addi); + DEFINE_ITYPE(addi); + DEFINE_ITYPE(slti); + add_insn(new disasm_insn_t("seqz", match_sltiu | match_imm_1, mask_sltiu | mask_imm, {&xrd, &xrs1})); + DEFINE_ITYPE(sltiu); + add_insn(new disasm_insn_t("not", match_xori | mask_imm, mask_xori | mask_imm, {&xrd, &xrs1})); + DEFINE_ITYPE(xori); + + DEFINE_ITYPE_SHIFT(slli); + DEFINE_ITYPE_SHIFT(srli); + DEFINE_ITYPE_SHIFT(srai); + + DEFINE_ITYPE(ori); + DEFINE_ITYPE(andi); + DEFINE_I1TYPE("sext.w", addiw); + DEFINE_ITYPE(addiw); + + DEFINE_ITYPE_SHIFT(slliw); + DEFINE_ITYPE_SHIFT(srliw); + DEFINE_ITYPE_SHIFT(sraiw); + + DEFINE_RTYPE(add); + DEFINE_RTYPE(sub); + DEFINE_RTYPE(sll); + DEFINE_RTYPE(slt); + add_insn(new disasm_insn_t("snez", match_sltu, mask_sltu | mask_rs1, {&xrd, &xrs2})); + DEFINE_RTYPE(sltu); + DEFINE_RTYPE(xor); + DEFINE_RTYPE(srl); + DEFINE_RTYPE(sra); + DEFINE_RTYPE(or); + DEFINE_RTYPE(and); + DEFINE_RTYPE(mul); + DEFINE_RTYPE(mulh); + DEFINE_RTYPE(mulhu); + DEFINE_RTYPE(mulhsu); + DEFINE_RTYPE(div); + DEFINE_RTYPE(divu); + DEFINE_RTYPE(rem); + DEFINE_RTYPE(remu); + DEFINE_RTYPE(addw); + DEFINE_RTYPE(subw); + DEFINE_RTYPE(sllw); + DEFINE_RTYPE(srlw); + DEFINE_RTYPE(sraw); + DEFINE_RTYPE(mulw); + DEFINE_RTYPE(divw); + DEFINE_RTYPE(divuw); + DEFINE_RTYPE(remw); + DEFINE_RTYPE(remuw); + + DEFINE_NOARG(ecall); + DEFINE_NOARG(ebreak); + DEFINE_NOARG(uret); + DEFINE_NOARG(sret); + DEFINE_NOARG(mret); + DEFINE_NOARG(dret); + DEFINE_NOARG(wfi); + DEFINE_NOARG(fence); + DEFINE_NOARG(fence_i); + DEFINE_SFENCE_TYPE(sfence_vma); + + add_insn(new disasm_insn_t("csrr", match_csrrs, mask_csrrs | mask_rs1, {&xrd, &csr})); + add_insn(new disasm_insn_t("csrw", match_csrrw, mask_csrrw | mask_rd, {&csr, &xrs1})); + add_insn(new disasm_insn_t("csrs", match_csrrs, mask_csrrs | mask_rd, {&csr, &xrs1})); + add_insn(new disasm_insn_t("csrc", match_csrrc, mask_csrrc | mask_rd, {&csr, &xrs1})); + add_insn(new disasm_insn_t("csrwi", match_csrrwi, mask_csrrwi | mask_rd, {&csr, &zimm5})); + add_insn(new disasm_insn_t("csrsi", match_csrrsi, mask_csrrsi | mask_rd, {&csr, &zimm5})); + add_insn(new disasm_insn_t("csrci", match_csrrci, mask_csrrci | mask_rd, {&csr, &zimm5})); + add_insn(new disasm_insn_t("csrrw", match_csrrw, mask_csrrw, {&xrd, &csr, &xrs1})); + add_insn(new disasm_insn_t("csrrs", match_csrrs, mask_csrrs, {&xrd, &csr, &xrs1})); + add_insn(new disasm_insn_t("csrrc", match_csrrc, mask_csrrc, {&xrd, &csr, &xrs1})); + add_insn(new disasm_insn_t("csrrwi", match_csrrwi, mask_csrrwi, {&xrd, &csr, &zimm5})); + add_insn(new disasm_insn_t("csrrsi", match_csrrsi, mask_csrrsi, {&xrd, &csr, &zimm5})); + add_insn(new disasm_insn_t("csrrci", match_csrrci, mask_csrrci, {&xrd, &csr, &zimm5})); + + DEFINE_FRTYPE(fadd_s); + DEFINE_FRTYPE(fsub_s); + DEFINE_FRTYPE(fmul_s); + DEFINE_FRTYPE(fdiv_s); + DEFINE_FR1TYPE(fsqrt_s); + DEFINE_FRTYPE(fmin_s); + DEFINE_FRTYPE(fmax_s); + DEFINE_FR3TYPE(fmadd_s); + DEFINE_FR3TYPE(fmsub_s); + DEFINE_FR3TYPE(fnmadd_s); + DEFINE_FR3TYPE(fnmsub_s); + DEFINE_FRTYPE(fsgnj_s); + DEFINE_FRTYPE(fsgnjn_s); + DEFINE_FRTYPE(fsgnjx_s); + DEFINE_FR1TYPE(fcvt_s_d); + DEFINE_FR1TYPE(fcvt_s_q); + DEFINE_XFTYPE(fcvt_s_l); + DEFINE_XFTYPE(fcvt_s_lu); + DEFINE_XFTYPE(fcvt_s_w); + DEFINE_XFTYPE(fcvt_s_wu); + DEFINE_XFTYPE(fcvt_s_wu); + DEFINE_XFTYPE(fmv_w_x); + DEFINE_FXTYPE(fcvt_l_s); + DEFINE_FXTYPE(fcvt_lu_s); + DEFINE_FXTYPE(fcvt_w_s); + DEFINE_FXTYPE(fcvt_wu_s); + DEFINE_FXTYPE(fclass_s); + DEFINE_FXTYPE(fmv_x_w); + DEFINE_FX2TYPE(feq_s); + DEFINE_FX2TYPE(flt_s); + DEFINE_FX2TYPE(fle_s); + + DEFINE_FRTYPE(fadd_d); + DEFINE_FRTYPE(fsub_d); + DEFINE_FRTYPE(fmul_d); + DEFINE_FRTYPE(fdiv_d); + DEFINE_FR1TYPE(fsqrt_d); + DEFINE_FRTYPE(fmin_d); + DEFINE_FRTYPE(fmax_d); + DEFINE_FR3TYPE(fmadd_d); + DEFINE_FR3TYPE(fmsub_d); + DEFINE_FR3TYPE(fnmadd_d); + DEFINE_FR3TYPE(fnmsub_d); + DEFINE_FRTYPE(fsgnj_d); + DEFINE_FRTYPE(fsgnjn_d); + DEFINE_FRTYPE(fsgnjx_d); + DEFINE_FR1TYPE(fcvt_d_s); + DEFINE_FR1TYPE(fcvt_d_q); + DEFINE_XFTYPE(fcvt_d_l); + DEFINE_XFTYPE(fcvt_d_lu); + DEFINE_XFTYPE(fcvt_d_w); + DEFINE_XFTYPE(fcvt_d_wu); + DEFINE_XFTYPE(fcvt_d_wu); + DEFINE_XFTYPE(fmv_d_x); + DEFINE_FXTYPE(fcvt_l_d); + DEFINE_FXTYPE(fcvt_lu_d); + DEFINE_FXTYPE(fcvt_w_d); + DEFINE_FXTYPE(fcvt_wu_d); + DEFINE_FXTYPE(fclass_d); + DEFINE_FXTYPE(fmv_x_d); + DEFINE_FX2TYPE(feq_d); + DEFINE_FX2TYPE(flt_d); + DEFINE_FX2TYPE(fle_d); + + DEFINE_FRTYPE(fadd_h); + DEFINE_FRTYPE(fsub_h); + DEFINE_FRTYPE(fmul_h); + DEFINE_FRTYPE(fdiv_h); + DEFINE_FR1TYPE(fsqrt_h); + DEFINE_FRTYPE(fmin_h); + DEFINE_FRTYPE(fmax_h); + DEFINE_FR3TYPE(fmadd_h); + DEFINE_FR3TYPE(fmsub_h); + DEFINE_FR3TYPE(fnmadd_h); + DEFINE_FR3TYPE(fnmsub_h); + DEFINE_FRTYPE(fsgnj_h); + DEFINE_FRTYPE(fsgnjn_h); + DEFINE_FRTYPE(fsgnjx_h); + DEFINE_FR1TYPE(fcvt_h_s); + DEFINE_FR1TYPE(fcvt_h_d); + DEFINE_FR1TYPE(fcvt_h_q); + DEFINE_FR1TYPE(fcvt_s_h); + DEFINE_FR1TYPE(fcvt_d_h); + DEFINE_FR1TYPE(fcvt_q_h); + DEFINE_XFTYPE(fcvt_h_l); + DEFINE_XFTYPE(fcvt_h_lu); + DEFINE_XFTYPE(fcvt_h_w); + DEFINE_XFTYPE(fcvt_h_wu); + DEFINE_XFTYPE(fcvt_h_wu); + DEFINE_XFTYPE(fmv_h_x); + DEFINE_FXTYPE(fcvt_l_h); + DEFINE_FXTYPE(fcvt_lu_h); + DEFINE_FXTYPE(fcvt_w_h); + DEFINE_FXTYPE(fcvt_wu_h); + DEFINE_FXTYPE(fclass_h); + DEFINE_FXTYPE(fmv_x_h); + DEFINE_FX2TYPE(feq_h); + DEFINE_FX2TYPE(flt_h); + DEFINE_FX2TYPE(fle_h); + + DEFINE_FRTYPE(fadd_q); + DEFINE_FRTYPE(fsub_q); + DEFINE_FRTYPE(fmul_q); + DEFINE_FRTYPE(fdiv_q); + DEFINE_FR1TYPE(fsqrt_q); + DEFINE_FRTYPE(fmin_q); + DEFINE_FRTYPE(fmax_q); + DEFINE_FR3TYPE(fmadd_q); + DEFINE_FR3TYPE(fmsub_q); + DEFINE_FR3TYPE(fnmadd_q); + DEFINE_FR3TYPE(fnmsub_q); + DEFINE_FRTYPE(fsgnj_q); + DEFINE_FRTYPE(fsgnjn_q); + DEFINE_FRTYPE(fsgnjx_q); + DEFINE_FR1TYPE(fcvt_q_s); + DEFINE_FR1TYPE(fcvt_q_d); + DEFINE_XFTYPE(fcvt_q_l); + DEFINE_XFTYPE(fcvt_q_lu); + DEFINE_XFTYPE(fcvt_q_w); + DEFINE_XFTYPE(fcvt_q_wu); + DEFINE_XFTYPE(fcvt_q_wu); + //DEFINE_XFTYPE(fmv_q_x); + DEFINE_FXTYPE(fcvt_l_q); + DEFINE_FXTYPE(fcvt_lu_q); + DEFINE_FXTYPE(fcvt_w_q); + DEFINE_FXTYPE(fcvt_wu_q); + DEFINE_FXTYPE(fclass_q); + //DEFINE_FXTYPE(fmv_x_q); + DEFINE_FX2TYPE(feq_q); + DEFINE_FX2TYPE(flt_q); + DEFINE_FX2TYPE(fle_q); + + DISASM_INSN("c.ebreak", c_add, mask_rd | mask_rvc_rs2, {}); + add_insn(new disasm_insn_t("ret", match_c_jr | match_rd_ra, mask_c_jr | mask_rd | mask_rvc_imm, {})); + DISASM_INSN("c.jr", c_jr, mask_rvc_imm, {&rvc_rs1}); + DISASM_INSN("c.jalr", c_jalr, mask_rvc_imm, {&rvc_rs1}); + DISASM_INSN("c.nop", c_addi, mask_rd | mask_rvc_imm, {}); + DISASM_INSN("c.addi16sp", c_addi16sp, mask_rd, {&rvc_sp, &rvc_addi16sp_imm}); + DISASM_INSN("c.addi4spn", c_addi4spn, 0, {&rvc_rs2s, &rvc_sp, &rvc_addi4spn_imm}); + DISASM_INSN("c.li", c_li, 0, {&xrd, &rvc_imm}); + DISASM_INSN("c.lui", c_lui, 0, {&xrd, &rvc_uimm}); + DISASM_INSN("c.addi", c_addi, 0, {&xrd, &rvc_imm}); + DISASM_INSN("c.slli", c_slli, 0, {&rvc_rs1, &rvc_shamt}); + DISASM_INSN("c.srli", c_srli, 0, {&rvc_rs1s, &rvc_shamt}); + DISASM_INSN("c.srai", c_srai, 0, {&rvc_rs1s, &rvc_shamt}); + DISASM_INSN("c.andi", c_andi, 0, {&rvc_rs1s, &rvc_imm}); + DISASM_INSN("c.mv", c_mv, 0, {&xrd, &rvc_rs2}); + DISASM_INSN("c.add", c_add, 0, {&xrd, &rvc_rs2}); + DISASM_INSN("c.addw", c_addw, 0, {&rvc_rs1s, &rvc_rs2s}); + DISASM_INSN("c.sub", c_sub, 0, {&rvc_rs1s, &rvc_rs2s}); + DISASM_INSN("c.subw", c_subw, 0, {&rvc_rs1s, &rvc_rs2s}); + DISASM_INSN("c.and", c_and, 0, {&rvc_rs1s, &rvc_rs2s}); + DISASM_INSN("c.or", c_or, 0, {&rvc_rs1s, &rvc_rs2s}); + DISASM_INSN("c.xor", c_xor, 0, {&rvc_rs1s, &rvc_rs2s}); + DISASM_INSN("c.lwsp", c_lwsp, 0, {&xrd, &rvc_lwsp_address}); + DISASM_INSN("c.fld", c_fld, 0, {&rvc_fp_rs2s, &rvc_ld_address}); + DISASM_INSN("c.swsp", c_swsp, 0, {&rvc_rs2, &rvc_swsp_address}); + DISASM_INSN("c.lw", c_lw, 0, {&rvc_rs2s, &rvc_lw_address}); + DISASM_INSN("c.sw", c_sw, 0, {&rvc_rs2s, &rvc_lw_address}); + DISASM_INSN("c.beqz", c_beqz, 0, {&rvc_rs1s, &rvc_branch_target}); + DISASM_INSN("c.bnez", c_bnez, 0, {&rvc_rs1s, &rvc_branch_target}); + DISASM_INSN("c.j", c_j, 0, {&rvc_jump_target}); + DISASM_INSN("c.fldsp", c_fldsp, 0, {&frd, &rvc_ldsp_address}); + DISASM_INSN("c.fsd", c_fsd, 0, {&rvc_fp_rs2s, &rvc_ld_address}); + DISASM_INSN("c.fsdsp", c_fsdsp, 0, {&rvc_fp_rs2, &rvc_sdsp_address}); + +#ifdef VECTOR_EXT + + DISASM_INSN("vsetvli", vsetvli, 0, {&xrd, &xrs1, &v_vtype}); + //DISASM_INSN("vsetvl", vsetvl, 0, {&xrd, &xrs1, &xrs2}); + + #define DISASM_VMEM_INSN(name, fmt, ff) \ + add_insn(new disasm_insn_t(#name "8" #ff ".v", match_##name##8##ff##_v, mask_##name##8##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "16" #ff ".v", match_##name##16##ff##_v, mask_##name##16##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "32" #ff ".v", match_##name##32##ff##_v, mask_##name##32##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "64" #ff ".v", match_##name##64##ff##_v, mask_##name##64##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "128" #ff ".v", match_##name##128##ff##_v, mask_##name##128##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "256" #ff ".v", match_##name##256##ff##_v, mask_##name##256##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "512" #ff ".v", match_##name##512##ff##_v, mask_##name##512##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "1024" #ff ".v", match_##name##1024##ff##_v, mask_##name##1024##ff##_v | mask_nf, fmt)); \ + + std::vector v_ld_unit = {&vd, &v_address, &opt, &vm}; + std::vector v_st_unit = {&vs3, &v_address, &opt, &vm}; + std::vector v_ld_stride = {&vd, &v_address, &xrs2, &opt, &vm}; + std::vector v_st_stride = {&vs3, &v_address, &xrs2, &opt, &vm}; + std::vector v_ld_index = {&vd, &v_address, &vs2, &opt, &vm}; + std::vector v_st_index = {&vs3, &v_address, &vs2, &opt, &vm}; + + DISASM_VMEM_INSN(vle, v_ld_unit, ); + DISASM_VMEM_INSN(vlse, v_ld_stride, ); + DISASM_VMEM_INSN(vlxei, v_ld_index, ); + DISASM_VMEM_INSN(vle, v_ld_unit, ff); + DISASM_VMEM_INSN(vse, v_st_unit, ); + DISASM_VMEM_INSN(vsse, v_st_stride, ); + DISASM_VMEM_INSN(vsxei, v_st_index, ); + DISASM_VMEM_INSN(vsuxei, v_st_index, ); + + #undef DISASM_VMEM_INSN + + // handle vector segment load/store + for (size_t elt = 0; elt <= 7; ++elt) { + const custom_fmt_t template_insn[] = { + {match_vle8_v, mask_vle8_v, "vlseg%de%d.v", v_ld_unit}, + {match_vse8_v, mask_vse8_v, "vsseg%de%d.v", v_st_unit}, + + {match_vlse8_v, mask_vlse8_v, "vlsseg%de%d.v", v_ld_stride}, + {match_vsse8_v, mask_vsse8_v, "vssseg%de%d.v", v_st_stride}, + + {match_vlxei8_v, mask_vlxei8_v, "vlxseg%dei%d.v", v_ld_index}, + {match_vsxei8_v, mask_vsxei8_v, "vsxseg%dei%d.v", v_st_index}, + + {match_vle8ff_v, mask_vle8ff_v, "vlseg%de%dff.v", v_ld_unit} + }; + + reg_t elt_map[] = {0x00000000, 0x00005000, 0x00006000, 0x00007000, + 0x10000000, 0x10005000, 0x10006000, 0x10007000}; + + for (size_t nf = 1; nf <= 7; ++nf) { + for (auto item : template_insn) { + const reg_t match_nf = nf << 29; + char buf[128]; + sprintf(buf, item.fmt, nf + 1, 8 << elt); + add_insn(new disasm_insn_t( + buf, + ((item.match | match_nf) & ~mask_vldst) | elt_map[elt], + item.mask | mask_nf, + item.arg + )); + } + } + + //handle whole register load + if (elt >= 4) + continue; + + const custom_fmt_t template_insn2[] = { + {match_vl1re8_v, mask_vl1re8_v, "vl%dre%d.v", v_ld_unit}, + }; + + for (reg_t i = 0, nf = 7; i < 4; i++, nf >>= 1) { + for (auto item : template_insn2) { + const reg_t match_nf = nf << 29; + char buf[128]; + sprintf(buf, item.fmt, nf + 1, 8 << elt); + add_insn(new disasm_insn_t( + buf, + item.match | match_nf | elt_map[elt], + item.mask | mask_nf, + item.arg + )); + } + } + } + + #define DISASM_ST_WHOLE_INSN(name, nf) \ + add_insn(new disasm_insn_t(#name, match_vs1r_v | (nf << 29), \ + mask_vs1r_v | mask_nf, \ + {&vs3, &v_address})); + DISASM_ST_WHOLE_INSN(vs1r.v, 0); + DISASM_ST_WHOLE_INSN(vs2r.v, 1); + DISASM_ST_WHOLE_INSN(vs4r.v, 3); + DISASM_ST_WHOLE_INSN(vs8r.v, 7); + + #undef DISASM_ST_WHOLE_INSN + + #define DISASM_OPIV_VXI_INSN(name, sign, suf) \ + add_insn(new disasm_insn_t(#name "." #suf "v", \ + match_##name##_##suf##v, mask_##name##_##suf##v, \ + {&vd, &vs2, &vs1, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name "." #suf "x", \ + match_##name##_##suf##x, mask_##name##_##suf##x, \ + {&vd, &vs2, &xrs1, &opt, &vm})); \ + if (sign) \ + add_insn(new disasm_insn_t(#name "." #suf "i", \ + match_##name##_##suf##i, mask_##name##_##suf##i, \ + {&vd, &vs2, &v_simm5, &opt, &vm})); \ + else \ + add_insn(new disasm_insn_t(#name "." #suf "i", \ + match_##name##_##suf##i, mask_##name##_##suf##i, \ + {&vd, &vs2, &zimm5, &opt, &vm})); + + #define DISASM_OPIV_VX__INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".vv", match_##name##_vv, mask_##name##_vv, \ + {&vd, &vs2, &vs1, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name ".vx", match_##name##_vx, mask_##name##_vx, \ + {&vd, &vs2, &xrs1, &opt, &vm})); \ + + #define DISASM_OPIV__XI_INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".vx", match_##name##_vx, mask_##name##_vx, \ + {&vd, &vs2, &xrs1, &opt, &vm})); \ + if (sign) \ + add_insn(new disasm_insn_t(#name ".vi", match_##name##_vi, mask_##name##_vi, \ + {&vd, &vs2, &v_simm5, &opt, &vm})); \ + else \ + add_insn(new disasm_insn_t(#name ".vi", match_##name##_vi, mask_##name##_vi, \ + {&vd, &vs2, &zimm5, &opt, &vm})); + + #define DISASM_OPIV_V___INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".vv", match_##name##_vv, mask_##name##_vv, \ + {&vd, &vs2, &vs1, &opt, &vm})); + + #define DISASM_OPIV_S___INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".vs", match_##name##_vs, mask_##name##_vs, \ + {&vd, &vs2, &vs1, &opt, &vm})); + + #define DISASM_OPIV_W___INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".wv", match_##name##_wv, mask_##name##_wv, \ + {&vd, &vs2, &vs1, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name ".wx", match_##name##_wx, mask_##name##_wx, \ + {&vd, &vs2, &xrs1, &opt, &vm})); + + #define DISASM_OPIV_M___INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".mm", match_##name##_mm, mask_##name##_mm, \ + {&vd, &vs2, &vs1})); + + #define DISASM_OPIV__X__INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".vx", match_##name##_vx, mask_##name##_vx, \ + {&vd, &vs2, &xrs1, &opt, &vm})); + + #define DISASM_OPIV_VXIM_INSN(name, sign, has_vm) \ + add_insn(new disasm_insn_t(#name ".vvm", match_##name##_vvm, \ + mask_##name##_vvm | mask_vm, \ + {&vd, &vs2, &vs1, &v0})); \ + add_insn(new disasm_insn_t(#name ".vxm", match_##name##_vxm, \ + mask_##name##_vxm | mask_vm, \ + {&vd, &vs2, &xrs1, &v0})); \ + add_insn(new disasm_insn_t(#name ".vim", match_##name##_vim, \ + mask_##name##_vim | mask_vm, \ + {&vd, &vs2, &v_simm5, &v0})); \ + if (has_vm) { \ + add_insn(new disasm_insn_t(#name ".vv", \ + match_##name##_vvm | mask_vm, \ + mask_##name##_vvm | mask_vm, \ + {&vd, &vs2, &vs1})); \ + add_insn(new disasm_insn_t(#name ".vx", \ + match_##name##_vxm | mask_vm, \ + mask_##name##_vxm | mask_vm, \ + {&vd, &vs2, &xrs1})); \ + add_insn(new disasm_insn_t(#name ".vi", \ + match_##name##_vim | mask_vm, \ + mask_##name##_vim | mask_vm, \ + {&vd, &vs2, &v_simm5})); \ + } + + #define DISASM_OPIV_VX_M_INSN(name, sign, has_vm) \ + add_insn(new disasm_insn_t(#name ".vvm", match_##name##_vvm, \ + mask_##name##_vvm | mask_vm, \ + {&vd, &vs2, &vs1, &v0})); \ + add_insn(new disasm_insn_t(#name ".vxm", match_##name##_vxm, \ + mask_##name##_vxm | mask_vm, \ + {&vd, &vs2, &xrs1, &v0})); \ + if (has_vm) { \ + add_insn(new disasm_insn_t(#name ".vv", \ + match_##name##_vvm | mask_vm, \ + mask_##name##_vvm | mask_vm, \ + {&vd, &vs2, &vs1})); \ + add_insn(new disasm_insn_t(#name ".vx", \ + match_##name##_vxm | mask_vm, \ + mask_##name##_vxm | mask_vm, \ + {&vd, &vs2, &xrs1})); \ + } \ + + //OPFVV/OPFVF + //0b00_0000 + DISASM_OPIV_VXI_INSN(vadd, 1, v); + DISASM_OPIV_VX__INSN(vsub, 1); + DISASM_OPIV__XI_INSN(vrsub, 1); + DISASM_OPIV_VX__INSN(vminu, 0); + DISASM_OPIV_VX__INSN(vmin, 1); + DISASM_OPIV_VX__INSN(vmaxu, 1); + DISASM_OPIV_VX__INSN(vmax, 0); + DISASM_OPIV_VXI_INSN(vand, 1, v); + DISASM_OPIV_VXI_INSN(vor, 1, v); + DISASM_OPIV_VXI_INSN(vxor, 1, v); + DISASM_OPIV_VXI_INSN(vrgather, 0, v); + DISASM_OPIV_V___INSN(vrgatherei16, 0); + DISASM_OPIV__XI_INSN(vslideup, 0); + DISASM_OPIV__XI_INSN(vslidedown, 0); + + //0b01_0000 + //DISASM_OPIV_VXIM_INSN(vadc, 1, 0); + DISASM_OPIV_VXIM_INSN(vmadc, 1, 1); + //DISASM_OPIV_VX_M_INSN(vsbc, 1, 0); + DISASM_OPIV_VX_M_INSN(vmsbc, 1, 1); + DISASM_OPIV_VXIM_INSN(vmerge, 1, 0); + DISASM_INSN("vmv.v.i", vmv_v_i, 0, {&vd, &v_simm5}); + DISASM_INSN("vmv.v.v", vmv_v_v, 0, {&vd, &vs1}); + DISASM_INSN("vmv.v.x", vmv_v_x, 0, {&vd, &xrs1}); + DISASM_OPIV_VXI_INSN(vmseq, 1, v); + DISASM_OPIV_VXI_INSN(vmsne, 1, v); + DISASM_OPIV_VX__INSN(vmsltu, 0); + DISASM_OPIV_VX__INSN(vmslt, 1); + DISASM_OPIV_VXI_INSN(vmsleu, 0, v); + DISASM_OPIV_VXI_INSN(vmsle, 1, v); + DISASM_OPIV__XI_INSN(vmsgtu, 0); + DISASM_OPIV__XI_INSN(vmsgt, 1); + + //0b10_0000 + DISASM_OPIV_VXI_INSN(vsaddu, 0, v); + DISASM_OPIV_VXI_INSN(vsadd, 1, v); + DISASM_OPIV_VX__INSN(vssubu, 0); + DISASM_OPIV_VX__INSN(vssub, 1); + DISASM_OPIV_VXI_INSN(vsll, 1, v); + DISASM_INSN("vmv1r.v", vmv1r_v, 0, {&vd, &vs2}); + DISASM_INSN("vmv2r.v", vmv2r_v, 0, {&vd, &vs2}); + DISASM_INSN("vmv4r.v", vmv4r_v, 0, {&vd, &vs2}); + DISASM_INSN("vmv8r.v", vmv8r_v, 0, {&vd, &vs2}); + DISASM_OPIV_VX__INSN(vsmul, 1); + DISASM_OPIV_VXI_INSN(vsrl, 0, v); + DISASM_OPIV_VXI_INSN(vsra, 0, v); + DISASM_OPIV_VXI_INSN(vssrl, 0, v); + DISASM_OPIV_VXI_INSN(vssra, 0, v); + DISASM_OPIV_VXI_INSN(vnsrl, 0, w); + DISASM_OPIV_VXI_INSN(vnsra, 0, w); + DISASM_OPIV_VXI_INSN(vnclipu, 0, w); + DISASM_OPIV_VXI_INSN(vnclip, 0, w); + + //0b11_0000 + DISASM_OPIV_S___INSN(vwredsumu, 0); + DISASM_OPIV_S___INSN(vwredsum, 1); + DISASM_OPIV_V___INSN(vdotu, 0); + DISASM_OPIV_V___INSN(vdot, 1); + + //OPMVV/OPMVX + //0b00_0000 + //DISASM_OPIV_VX__INSN(vaaddu, 0); + DISASM_OPIV_VX__INSN(vaadd, 0); + //DISASM_OPIV_VX__INSN(vasubu, 0); + DISASM_OPIV_VX__INSN(vasub, 0); + + DISASM_OPIV_S___INSN(vredsum, 1); + DISASM_OPIV_S___INSN(vredand, 1); + DISASM_OPIV_S___INSN(vredor, 1); + DISASM_OPIV_S___INSN(vredxor, 1); + DISASM_OPIV_S___INSN(vredminu, 0); + DISASM_OPIV_S___INSN(vredmin, 1); + DISASM_OPIV_S___INSN(vredmaxu, 0); + DISASM_OPIV_S___INSN(vredmax, 1); + //DISASM_OPIV__X__INSN(vslide1up, 1); + DISASM_OPIV__X__INSN(vslide1down,1); + + //0b01_0000 + //VWXUNARY0 + DISASM_INSN("vmv.x.s", vmv_x_s, 0, {&xrd, &vs2}); + DISASM_INSN("vpopc.m", vpopc_m, 0, {&xrd, &vs2, &opt, &vm}); + DISASM_INSN("vfirst.m", vfirst_m, 0, {&xrd, &vs2, &opt, &vm}); + + //VRXUNARY0 + DISASM_INSN("vmv.s.x", vmv_s_x, 0, {&vd, &xrs1}); + + //VXUNARY0 + DISASM_INSN("vzext.vf2", vzext_vf2, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vsext.vf2", vsext_vf2, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vzext.vf4", vzext_vf4, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vsext.vf4", vsext_vf4, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vzext.vf8", vzext_vf8, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vsext.vf8", vsext_vf8, 0, {&vd, &vs2, &opt, &vm}); + + //VMUNARY0 + DISASM_INSN("vmsbf.m", vmsbf_m, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vmsof.m", vmsof_m, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vmsif.m", vmsif_m, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("viota.m", viota_m, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vid.v", vid_v, 0, {&vd, &opt, &vm}); + + DISASM_INSN("vid.v", vid_v, 0, {&vd, &opt, &vm}); + + DISASM_INSN("vcompress.vm", vcompress_vm, 0, {&vd, &vs2, &vs1}); + + DISASM_OPIV_M___INSN(vmandnot, 1); + DISASM_OPIV_M___INSN(vmand, 1); + DISASM_OPIV_M___INSN(vmor, 1); + DISASM_OPIV_M___INSN(vmxor, 1); + DISASM_OPIV_M___INSN(vmornot, 1); + DISASM_OPIV_M___INSN(vmnand, 1); + DISASM_OPIV_M___INSN(vmnor, 1); + DISASM_OPIV_M___INSN(vmxnor, 1); + + //0b10_0000 + //DISASM_OPIV_VX__INSN(vdivu, 0); + DISASM_OPIV_VX__INSN(vdiv, 1); + DISASM_OPIV_VX__INSN(vremu, 0); + DISASM_OPIV_VX__INSN(vrem, 1); + //DISASM_OPIV_VX__INSN(vmulhu, 0); + DISASM_OPIV_VX__INSN(vmul, 1); + //DISASM_OPIV_VX__INSN(vmulhsu, 0); + DISASM_OPIV_VX__INSN(vmulh, 1); + DISASM_OPIV_VX__INSN(vmadd, 1); + DISASM_OPIV_VX__INSN(vnmsub, 1); + DISASM_OPIV_VX__INSN(vmacc, 1); + DISASM_OPIV_VX__INSN(vnmsac, 1); + + //0b11_0000 + DISASM_OPIV_VX__INSN(vwaddu, 0); + DISASM_OPIV_VX__INSN(vwadd, 1); + DISASM_OPIV_VX__INSN(vwsubu, 0); + DISASM_OPIV_VX__INSN(vwsub, 1); + DISASM_OPIV_W___INSN(vwaddu, 0); + DISASM_OPIV_W___INSN(vwadd, 1); + DISASM_OPIV_W___INSN(vwsubu, 0); + DISASM_OPIV_W___INSN(vwsub, 1); + DISASM_OPIV_VX__INSN(vwmulu, 0); + DISASM_OPIV_VX__INSN(vwmulsu, 0); + DISASM_OPIV_VX__INSN(vwmul, 1); + DISASM_OPIV_VX__INSN(vwmaccu, 0); + DISASM_OPIV_VX__INSN(vwmacc, 1); + DISASM_OPIV__X__INSN(vwmaccus, 1); + DISASM_OPIV_VX__INSN(vwmaccsu, 0); + + #undef DISASM_OPIV_VXI_INSN + #undef DISASM_OPIV_VX__INSN + #undef DISASM_OPIV__XI_INSN + #undef DISASM_OPIV_V___INSN + #undef DISASM_OPIV_S___INSN + #undef DISASM_OPIV_W___INSN + #undef DISASM_OPIV_M___INSN + #undef DISASM_OPIV__X__INSN + #undef DISASM_OPIV_VXIM_INSN + #undef DISASM_OPIV_VX_M_INSN + + #define DISASM_OPIV_VF_INSN(name) \ + add_insn(new disasm_insn_t(#name ".vv", match_##name##_vv, mask_##name##_vv, \ + {&vd, &vs2, &vs1, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name ".vf", match_##name##_vf, mask_##name##_vf, \ + {&vd, &vs2, &frs1, &opt, &vm})); \ + + #define DISASM_OPIV_WF_INSN(name) \ + add_insn(new disasm_insn_t(#name ".wv", match_##name##_wv, mask_##name##_wv, \ + {&vd, &vs2, &vs1, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name ".wf", match_##name##_wf, mask_##name##_wf, \ + {&vd, &vs2, &frs1, &opt, &vm})); \ + + #define DISASM_OPIV_V__INSN(name) \ + add_insn(new disasm_insn_t(#name ".vv", match_##name##_vv, mask_##name##_vv, \ + {&vd, &vs2, &vs1, &opt, &vm})); + + #define DISASM_OPIV_S__INSN(name) \ + add_insn(new disasm_insn_t(#name ".vs", match_##name##_vs, mask_##name##_vs, \ + {&vd, &vs2, &vs1, &opt, &vm})); + + #define DISASM_OPIV__F_INSN(name) \ + add_insn(new disasm_insn_t(#name ".vf", match_##name##_vf, mask_##name##_vf, \ + {&vd, &vs2, &frs1, &opt, &vm})); \ + + #define DISASM_VFUNARY0_INSN(name, suf) \ + add_insn(new disasm_insn_t(#name "cvt.xu.f." #suf, \ + match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \ + {&vd, &vs2, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name "cvt.x.f." #suf, \ + match_##name##cvt_x_f_##suf, mask_##name##cvt_x_f_##suf, \ + {&vd, &vs2, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name "cvt.f.xu." #suf, \ + match_##name##cvt_f_xu_##suf, mask_##name##cvt_f_xu_##suf, \ + {&vd, &vs2, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name "cvt.f.x." #suf, \ + match_##name##cvt_f_x_##suf, mask_##name##cvt_f_x_##suf, \ + {&vd, &vs2, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name "cvt.rtz.xu.f." #suf, \ + match_##name##cvt_rtz_xu_f_##suf, mask_##name##cvt_rtz_xu_f_##suf, \ + {&vd, &vs2, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name "cvt.rtz.x.f." #suf, \ + match_##name##cvt_rtz_x_f_##suf, mask_##name##cvt_rtz_x_f_##suf, \ + {&vd, &vs2, &opt, &vm})); \ + + //OPFVV/OPFVF + //0b00_0000 + DISASM_OPIV_VF_INSN(vfadd); + DISASM_OPIV_S__INSN(vfredsum); + DISASM_OPIV_VF_INSN(vfsub); + DISASM_OPIV_S__INSN(vfredosum); + DISASM_OPIV_VF_INSN(vfmin); + DISASM_OPIV_S__INSN(vfredmin); + DISASM_OPIV_VF_INSN(vfmax); + DISASM_OPIV_S__INSN(vfredmax); + DISASM_OPIV_VF_INSN(vfsgnj); + DISASM_OPIV_VF_INSN(vfsgnjn); + DISASM_OPIV_VF_INSN(vfsgnjx); + DISASM_INSN("vfmv.f.s", vfmv_f_s, 0, {&frd, &vs2}); + DISASM_INSN("vfmv.s.f", vfmv_s_f, mask_vfmv_s_f, {&vd, &frs1}); + DISASM_OPIV__F_INSN(vfslide1up); + DISASM_OPIV__F_INSN(vfslide1down); + + //0b01_0000 + DISASM_INSN("vfmerge.vfm", vfmerge_vfm, 0, {&vd, &vs2, &frs1, &v0}); + DISASM_INSN("vfmv.v.f", vfmv_v_f, 0, {&vd, &frs1}); + DISASM_OPIV_VF_INSN(vmfeq); + DISASM_OPIV_VF_INSN(vmfle); + DISASM_OPIV_VF_INSN(vmflt); + DISASM_OPIV_VF_INSN(vmfne); + DISASM_OPIV__F_INSN(vmfgt); + DISASM_OPIV__F_INSN(vmfge); + + //0b10_0000 + DISASM_OPIV_VF_INSN(vfdiv); + DISASM_OPIV__F_INSN(vfrdiv); + + //vfunary0 + //DISASM_VFUNARY0_INSN(vf, v); + + DISASM_VFUNARY0_INSN(vfw, v); + DISASM_INSN("vfwcvt.f.f.v", vfwcvt_f_f_v, 0, {&vd, &vs2, &opt, &vm}); + + DISASM_VFUNARY0_INSN(vfn, w); + DISASM_INSN("vfncvt.f.f.w", vfncvt_f_f_w, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vfncvt.rod.f.f.w", vfncvt_rod_f_f_w, 0, {&vd, &vs2, &opt, &vm}); + + //vfunary1 + DISASM_INSN("vfsqrt.v", vfsqrt_v, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vfrsqrte7.v", vfrsqrte7_v, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vfrece7.v", vfrece7_v, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vfclass.v", vfclass_v, 0, {&vd, &vs2, &opt, &vm}); + + DISASM_OPIV_VF_INSN(vfmul); + DISASM_OPIV__F_INSN(vfrsub); + DISASM_OPIV_VF_INSN(vfmadd); + DISASM_OPIV_VF_INSN(vfnmadd); + DISASM_OPIV_VF_INSN(vfmsub); + DISASM_OPIV_VF_INSN(vfnmsub); + DISASM_OPIV_VF_INSN(vfmacc); + DISASM_OPIV_VF_INSN(vfnmacc); + DISASM_OPIV_VF_INSN(vfmsac); + DISASM_OPIV_VF_INSN(vfnmsac); + + //0b11_0000 + DISASM_OPIV_VF_INSN(vfwadd); + DISASM_OPIV_S__INSN(vfwredsum); + DISASM_OPIV_VF_INSN(vfwsub); + DISASM_OPIV_S__INSN(vfwredosum); + DISASM_OPIV_WF_INSN(vfwadd); + DISASM_OPIV_WF_INSN(vfwsub); + DISASM_OPIV_VF_INSN(vfwmul); + DISASM_OPIV_V__INSN(vfdot); + DISASM_OPIV_VF_INSN(vfwmacc); + DISASM_OPIV_VF_INSN(vfwnmacc); + DISASM_OPIV_VF_INSN(vfwmsac); + DISASM_OPIV_VF_INSN(vfwnmsac); + + #undef DISASM_OPIV_VF_INSN + #undef DISASM_OPIV_V__INSN + #undef DISASM_OPIV__F_INSN + #undef DISASM_OPIV_S__INSN + #undef DISASM_OPIV_W__INSN + #undef DISASM_VFUNARY0_INSN + + // vector amo + std::vector v_fmt_amo_wd = {&vd, &v_address, &vs2, &vd, &opt, &vm}; + std::vector v_fmt_amo = {&x0, &v_address, &vs2, &vd, &opt, &vm}; + for (size_t elt = 0; elt <= 3; ++elt) { + const custom_fmt_t template_insn[] = { + {match_vamoaddei8_v | mask_wd, mask_vamoaddei8_v | mask_wd, + "%sei%d.v", v_fmt_amo_wd}, + {match_vamoaddei8_v, mask_vamoaddei8_v | mask_wd, + "%sei%d.v", v_fmt_amo}, + }; + std::pair amo_map[] = { + {"vamoswap", 0x01ul << 27}, + {"vamoadd", 0x00ul << 27}, + {"vamoxor", 0x04ul << 27}, + {"vamoand", 0x0cul << 27}, + {"vamoor", 0x08ul << 27}, + {"vamomin", 0x10ul << 27}, + {"vamomax", 0x14ul << 27}, + {"vamominu", 0x18ul << 27}, + {"vamomaxu", 0x1cul << 27}}; + const reg_t elt_map[] = {0x0ul << 12, 0x5ul << 12, + 0x6ul <<12, 0x7ul << 12}; + + for (size_t idx = 0; idx < sizeof(amo_map) / sizeof(amo_map[0]); ++idx) { + for (auto item : template_insn) { + char buf[128]; + sprintf(buf, item.fmt, amo_map[idx].first, 8 << elt); + add_insn(new disasm_insn_t(buf, + item.match | amo_map[idx].second | elt_map[elt], + item.mask, + item.arg)); + } + } + } + +#endif + + if (xlen == 32) { + DISASM_INSN("c.flw", c_flw, 0, {&rvc_fp_rs2s, &rvc_lw_address}); + DISASM_INSN("c.flwsp", c_flwsp, 0, {&frd, &rvc_lwsp_address}); + DISASM_INSN("c.fsw", c_fsw, 0, {&rvc_fp_rs2s, &rvc_lw_address}); + DISASM_INSN("c.fswsp", c_fswsp, 0, {&rvc_fp_rs2, &rvc_swsp_address}); + DISASM_INSN("c.jal", c_jal, 0, {&rvc_jump_target}); + } else { + DISASM_INSN("c.ld", c_ld, 0, {&rvc_rs2s, &rvc_ld_address}); + DISASM_INSN("c.ldsp", c_ldsp, 0, {&xrd, &rvc_ldsp_address}); + DISASM_INSN("c.sd", c_sd, 0, {&rvc_rs2s, &rvc_ld_address}); + DISASM_INSN("c.sdsp", c_sdsp, 0, {&rvc_rs2, &rvc_sdsp_address}); + DISASM_INSN("c.addiw", c_addiw, 0, {&xrd, &rvc_imm}); + } + + // Xpulpimg extension + DEFINE_PLOAD_IRPOST(p_lb_irpost); + DEFINE_PLOAD_IRPOST(p_lbu_irpost); + DEFINE_PLOAD_IRPOST(p_lh_irpost); + DEFINE_PLOAD_IRPOST(p_lhu_irpost); + DEFINE_PLOAD_IRPOST(p_lw_irpost); + DEFINE_PLOAD_RRPOST(p_lb_rrpost); + DEFINE_PLOAD_RRPOST(p_lbu_rrpost); + DEFINE_PLOAD_RRPOST(p_lh_rrpost); + DEFINE_PLOAD_RRPOST(p_lhu_rrpost); + DEFINE_PLOAD_RRPOST(p_lw_rrpost); + DEFINE_PLOAD_RR(p_lb_rr); + DEFINE_PLOAD_RR(p_lbu_rr); + DEFINE_PLOAD_RR(p_lh_rr); + DEFINE_PLOAD_RR(p_lhu_rr); + DEFINE_PLOAD_RR(p_lw_rr); + DEFINE_PSTORE_IRPOST(p_sb_irpost); + DEFINE_PSTORE_IRPOST(p_sh_irpost); + DEFINE_PSTORE_IRPOST(p_sw_irpost); + DEFINE_PSTORE_RRPOST(p_sb_rrpost); + DEFINE_PSTORE_RRPOST(p_sh_rrpost); + DEFINE_PSTORE_RRPOST(p_sw_rrpost); + DEFINE_PSTORE_RR(p_sb_rr); + DEFINE_PSTORE_RR(p_sh_rr); + DEFINE_PSTORE_RR(p_sw_rr); + DEFINE_R1TYPE(p_abs); + DEFINE_RTYPE(p_slet); + DEFINE_RTYPE(p_sletu); + DEFINE_RTYPE(p_min); + DEFINE_RTYPE(p_minu); + DEFINE_RTYPE(p_max); + DEFINE_RTYPE(p_maxu); + DEFINE_R1TYPE(p_exths); + DEFINE_R1TYPE(p_exthz); + DEFINE_R1TYPE(p_extbs); + DEFINE_R1TYPE(p_extbz); + DEFINE_PI0TYPE(p_clip); + DEFINE_PI0TYPE(p_clipu); + DEFINE_RTYPE(p_clipr); + DEFINE_RTYPE(p_clipur); + DEFINE_PBTYPE(p_beqimm); + DEFINE_PBTYPE(p_bneimm); + DEFINE_RTYPE(p_mac); + DEFINE_RTYPE(p_msu); + + DEFINE_RTYPE(pv_add_h); + DEFINE_RTYPE(pv_add_sc_h); + DEFINE_PI1STYPE(pv_add_sci_h); + DEFINE_RTYPE(pv_add_b); + DEFINE_RTYPE(pv_add_sc_b); + DEFINE_PI1STYPE(pv_add_sci_b); + DEFINE_RTYPE(pv_sub_h); + DEFINE_RTYPE(pv_sub_sc_h); + DEFINE_PI1STYPE(pv_sub_sci_h); + DEFINE_RTYPE(pv_sub_b); + DEFINE_RTYPE(pv_sub_sc_b); + DEFINE_PI1STYPE(pv_sub_sci_b); + DEFINE_RTYPE(pv_avg_h); + DEFINE_RTYPE(pv_avg_sc_h); + DEFINE_PI1STYPE(pv_avg_sci_h); + DEFINE_RTYPE(pv_avg_b); + DEFINE_RTYPE(pv_avg_sc_b); + DEFINE_PI1STYPE(pv_avg_sci_b); + DEFINE_RTYPE(pv_avgu_h); + DEFINE_RTYPE(pv_avgu_sc_h); + DEFINE_PI1ZTYPE(pv_avgu_sci_h); + DEFINE_RTYPE(pv_avgu_b); + DEFINE_RTYPE(pv_avgu_sc_b); + DEFINE_PI1ZTYPE(pv_avgu_sci_b); + DEFINE_RTYPE(pv_min_h); + DEFINE_RTYPE(pv_min_sc_h); + DEFINE_PI1STYPE(pv_min_sci_h); + DEFINE_RTYPE(pv_min_b); + DEFINE_RTYPE(pv_min_sc_b); + DEFINE_PI1STYPE(pv_min_sci_b); + DEFINE_RTYPE(pv_minu_h); + DEFINE_RTYPE(pv_minu_sc_h); + DEFINE_PI1ZTYPE(pv_minu_sci_h); + DEFINE_RTYPE(pv_minu_b); + DEFINE_RTYPE(pv_minu_sc_b); + DEFINE_PI1ZTYPE(pv_minu_sci_b); + DEFINE_RTYPE(pv_max_h); + DEFINE_RTYPE(pv_max_sc_h); + DEFINE_PI1STYPE(pv_max_sci_h); + DEFINE_RTYPE(pv_max_b); + DEFINE_RTYPE(pv_max_sc_b); + DEFINE_PI1STYPE(pv_max_sci_b); + DEFINE_RTYPE(pv_maxu_h); + DEFINE_RTYPE(pv_maxu_sc_h); + DEFINE_PI1ZTYPE(pv_maxu_sci_h); + DEFINE_RTYPE(pv_maxu_b); + DEFINE_RTYPE(pv_maxu_sc_b); + DEFINE_PI1ZTYPE(pv_maxu_sci_b); + DEFINE_RTYPE(pv_srl_h); + DEFINE_RTYPE(pv_srl_sc_h); + DEFINE_PI1ZTYPE(pv_srl_sci_h); + DEFINE_RTYPE(pv_srl_b); + DEFINE_RTYPE(pv_srl_sc_b); + DEFINE_PI1ZTYPE(pv_srl_sci_b); + DEFINE_RTYPE(pv_sra_h); + DEFINE_RTYPE(pv_sra_sc_h); + DEFINE_PI1ZTYPE(pv_sra_sci_h); + DEFINE_RTYPE(pv_sra_b); + DEFINE_RTYPE(pv_sra_sc_b); + DEFINE_PI1ZTYPE(pv_sra_sci_b); + DEFINE_RTYPE(pv_sll_h); + DEFINE_RTYPE(pv_sll_sc_h); + DEFINE_PI1ZTYPE(pv_sll_sci_h); + DEFINE_RTYPE(pv_sll_b); + DEFINE_RTYPE(pv_sll_sc_b); + DEFINE_PI1ZTYPE(pv_sll_sci_b); + DEFINE_RTYPE(pv_or_h); + DEFINE_RTYPE(pv_or_sc_h); + DEFINE_PI1ZTYPE(pv_or_sci_h); + DEFINE_RTYPE(pv_or_b); + DEFINE_RTYPE(pv_or_sc_b); + DEFINE_PI1ZTYPE(pv_or_sci_b); + DEFINE_RTYPE(pv_xor_h); + DEFINE_RTYPE(pv_xor_sc_h); + DEFINE_PI1ZTYPE(pv_xor_sci_h); + DEFINE_RTYPE(pv_xor_b); + DEFINE_RTYPE(pv_xor_sc_b); + DEFINE_PI1ZTYPE(pv_xor_sci_b); + DEFINE_RTYPE(pv_and_h); + DEFINE_RTYPE(pv_and_sc_h); + DEFINE_PI1ZTYPE(pv_and_sci_h); + DEFINE_RTYPE(pv_and_b); + DEFINE_RTYPE(pv_and_sc_b); + DEFINE_PI1ZTYPE(pv_and_sci_b); + DEFINE_R1TYPE(pv_abs_h); + DEFINE_R1TYPE(pv_abs_b); + DEFINE_PI1ZTYPE(pv_extract_h); + DEFINE_PI1ZTYPE(pv_extract_b); + DEFINE_PI1ZTYPE(pv_extractu_h); + DEFINE_PI1ZTYPE(pv_extractu_b); + DEFINE_PI1ZTYPE(pv_insert_h); + DEFINE_PI1ZTYPE(pv_insert_b); + + DEFINE_RTYPE(pv_dotup_h); + DEFINE_RTYPE(pv_dotup_sc_h); + DEFINE_PI1ZTYPE(pv_dotup_sci_h); + DEFINE_RTYPE(pv_dotup_b); + DEFINE_RTYPE(pv_dotup_sc_b); + DEFINE_PI1ZTYPE(pv_dotup_sci_b); + DEFINE_RTYPE(pv_dotusp_h); + DEFINE_RTYPE(pv_dotusp_sc_h); + DEFINE_PI1ZTYPE(pv_dotusp_sci_h); + DEFINE_RTYPE(pv_dotusp_b); + DEFINE_RTYPE(pv_dotusp_sc_b); + DEFINE_PI1ZTYPE(pv_dotusp_sci_b); + DEFINE_RTYPE(pv_dotsp_h); + DEFINE_RTYPE(pv_dotsp_sc_h); + DEFINE_PI1ZTYPE(pv_dotsp_sci_h); + DEFINE_RTYPE(pv_dotsp_b); + DEFINE_RTYPE(pv_dotsp_sc_b); + DEFINE_PI1ZTYPE(pv_dotsp_sci_b); + DEFINE_RTYPE(pv_sdotup_h); + DEFINE_RTYPE(pv_sdotup_sc_h); + DEFINE_PI1ZTYPE(pv_sdotup_sci_h); + DEFINE_RTYPE(pv_sdotup_b); + DEFINE_RTYPE(pv_sdotup_sc_b); + DEFINE_PI1ZTYPE(pv_sdotup_sci_b); + DEFINE_RTYPE(pv_sdotusp_h); + DEFINE_RTYPE(pv_sdotusp_sc_h); + DEFINE_PI1ZTYPE(pv_sdotusp_sci_h); + DEFINE_RTYPE(pv_sdotusp_b); + DEFINE_RTYPE(pv_sdotusp_sc_b); + DEFINE_PI1ZTYPE(pv_sdotusp_sci_b); + DEFINE_RTYPE(pv_sdotsp_h); + DEFINE_RTYPE(pv_sdotsp_sc_h); + DEFINE_PI1ZTYPE(pv_sdotsp_sci_h); + DEFINE_RTYPE(pv_sdotsp_b); + DEFINE_RTYPE(pv_sdotsp_sc_b); + DEFINE_PI1ZTYPE(pv_sdotsp_sci_b); + + DEFINE_RTYPE(pv_shuffle2_h); + DEFINE_RTYPE(pv_shuffle2_b); + + // provide a default disassembly for all instructions as a fallback + #define DECLARE_INSN(code, match, mask) \ + add_insn(new disasm_insn_t(#code " (args unknown)", match, mask, {})); + #include "encoding.h" + #undef DECLARE_INSN +} + +const disasm_insn_t* disassembler_t::lookup(insn_t insn) const +{ + size_t idx = insn.bits() % HASH_SIZE; + for (size_t j = 0; j < chain[idx].size(); j++) + if(*chain[idx][j] == insn) + return chain[idx][j]; + + idx = HASH_SIZE; + for (size_t j = 0; j < chain[idx].size(); j++) + if(*chain[idx][j] == insn) + return chain[idx][j]; + + return NULL; +} + +void NOINLINE disassembler_t::add_insn(disasm_insn_t* insn) +{ + size_t idx = HASH_SIZE; + if (insn->get_mask() % HASH_SIZE == HASH_SIZE - 1) + idx = insn->get_match() % HASH_SIZE; + chain[idx].push_back(insn); +} + +disassembler_t::~disassembler_t() +{ + for (size_t i = 0; i < HASH_SIZE+1; i++) + for (size_t j = 0; j < chain[i].size(); j++) + delete chain[i][j]; +} diff --git a/disasm/disasm.mk.in b/disasm/disasm.mk.in new file mode 100644 index 0000000000..039a717f90 --- /dev/null +++ b/disasm/disasm.mk.in @@ -0,0 +1,7 @@ +disasm_CFLAGS = -fPIC + +disasm_srcs = \ + disasm.cc \ + regnames.cc \ + +disasm_install_lib = yes diff --git a/riscv/regnames.cc b/disasm/regnames.cc similarity index 75% rename from riscv/regnames.cc rename to disasm/regnames.cc index 0bf8d9c6e9..0a7fd4d22c 100644 --- a/riscv/regnames.cc +++ b/disasm/regnames.cc @@ -16,6 +16,13 @@ const char* fpr_name[] = { "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11" }; +const char* vr_name[] = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" +}; + const char* csr_name(int which) { switch (which) { #define DECLARE_CSR(name, number) case number: return #name; diff --git a/dummy_rocc/dummy_rocc.mk.in b/dummy_rocc/dummy_rocc.mk.in deleted file mode 100644 index 0143ffd1c3..0000000000 --- a/dummy_rocc/dummy_rocc.mk.in +++ /dev/null @@ -1,7 +0,0 @@ -dummy_rocc_subproject_deps = \ - spike_main \ - riscv \ - softfloat \ - -dummy_rocc_srcs = \ - dummy_rocc.cc \ diff --git a/fdt/fdt.ac b/fdt/fdt.ac new file mode 100644 index 0000000000..e69de29bb2 diff --git a/fdt/fdt.c b/fdt/fdt.c new file mode 100644 index 0000000000..d6ce7c052d --- /dev/null +++ b/fdt/fdt.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +/* + * Minimal sanity check for a read-only tree. fdt_ro_probe_() checks + * that the given buffer contains what appears to be a flattened + * device tree with sane information in its header. + */ +int32_t fdt_ro_probe_(const void *fdt) +{ + uint32_t totalsize = fdt_totalsize(fdt); + + if (fdt_magic(fdt) == FDT_MAGIC) { + /* Complete tree */ + if (fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION) + return -FDT_ERR_BADVERSION; + if (fdt_last_comp_version(fdt) > FDT_LAST_SUPPORTED_VERSION) + return -FDT_ERR_BADVERSION; + } else if (fdt_magic(fdt) == FDT_SW_MAGIC) { + /* Unfinished sequential-write blob */ + if (fdt_size_dt_struct(fdt) == 0) + return -FDT_ERR_BADSTATE; + } else { + return -FDT_ERR_BADMAGIC; + } + + if (totalsize < INT32_MAX) + return totalsize; + else + return -FDT_ERR_TRUNCATED; +} + +static int check_off_(uint32_t hdrsize, uint32_t totalsize, uint32_t off) +{ + return (off >= hdrsize) && (off <= totalsize); +} + +static int check_block_(uint32_t hdrsize, uint32_t totalsize, + uint32_t base, uint32_t size) +{ + if (!check_off_(hdrsize, totalsize, base)) + return 0; /* block start out of bounds */ + if ((base + size) < base) + return 0; /* overflow */ + if (!check_off_(hdrsize, totalsize, base + size)) + return 0; /* block end out of bounds */ + return 1; +} + +size_t fdt_header_size_(uint32_t version) +{ + if (version <= 1) + return FDT_V1_SIZE; + else if (version <= 2) + return FDT_V2_SIZE; + else if (version <= 3) + return FDT_V3_SIZE; + else if (version <= 16) + return FDT_V16_SIZE; + else + return FDT_V17_SIZE; +} + +int fdt_check_header(const void *fdt) +{ + size_t hdrsize; + + if (fdt_magic(fdt) != FDT_MAGIC) + return -FDT_ERR_BADMAGIC; + hdrsize = fdt_header_size(fdt); + if ((fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION) + || (fdt_last_comp_version(fdt) > FDT_LAST_SUPPORTED_VERSION)) + return -FDT_ERR_BADVERSION; + if (fdt_version(fdt) < fdt_last_comp_version(fdt)) + return -FDT_ERR_BADVERSION; + + if ((fdt_totalsize(fdt) < hdrsize) + || (fdt_totalsize(fdt) > INT_MAX)) + return -FDT_ERR_TRUNCATED; + + /* Bounds check memrsv block */ + if (!check_off_(hdrsize, fdt_totalsize(fdt), fdt_off_mem_rsvmap(fdt))) + return -FDT_ERR_TRUNCATED; + + /* Bounds check structure block */ + if (fdt_version(fdt) < 17) { + if (!check_off_(hdrsize, fdt_totalsize(fdt), + fdt_off_dt_struct(fdt))) + return -FDT_ERR_TRUNCATED; + } else { + if (!check_block_(hdrsize, fdt_totalsize(fdt), + fdt_off_dt_struct(fdt), + fdt_size_dt_struct(fdt))) + return -FDT_ERR_TRUNCATED; + } + + /* Bounds check strings block */ + if (!check_block_(hdrsize, fdt_totalsize(fdt), + fdt_off_dt_strings(fdt), fdt_size_dt_strings(fdt))) + return -FDT_ERR_TRUNCATED; + + return 0; +} + +const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int len) +{ + unsigned absoffset = offset + fdt_off_dt_struct(fdt); + + if ((absoffset < offset) + || ((absoffset + len) < absoffset) + || (absoffset + len) > fdt_totalsize(fdt)) + return NULL; + + if (fdt_version(fdt) >= 0x11) + if (((offset + len) < offset) + || ((offset + len) > fdt_size_dt_struct(fdt))) + return NULL; + + return fdt_offset_ptr_(fdt, offset); +} + +uint32_t fdt_next_tag(const void *fdt, int startoffset, int *nextoffset) +{ + const fdt32_t *tagp, *lenp; + uint32_t tag; + int offset = startoffset; + const char *p; + + *nextoffset = -FDT_ERR_TRUNCATED; + tagp = fdt_offset_ptr(fdt, offset, FDT_TAGSIZE); + if (!tagp) + return FDT_END; /* premature end */ + tag = fdt32_to_cpu(*tagp); + offset += FDT_TAGSIZE; + + *nextoffset = -FDT_ERR_BADSTRUCTURE; + switch (tag) { + case FDT_BEGIN_NODE: + /* skip name */ + do { + p = fdt_offset_ptr(fdt, offset++, 1); + } while (p && (*p != '\0')); + if (!p) + return FDT_END; /* premature end */ + break; + + case FDT_PROP: + lenp = fdt_offset_ptr(fdt, offset, sizeof(*lenp)); + if (!lenp) + return FDT_END; /* premature end */ + /* skip-name offset, length and value */ + offset += sizeof(struct fdt_property) - FDT_TAGSIZE + + fdt32_to_cpu(*lenp); + if (fdt_version(fdt) < 0x10 && fdt32_to_cpu(*lenp) >= 8 && + ((offset - fdt32_to_cpu(*lenp)) % 8) != 0) + offset += 4; + break; + + case FDT_END: + case FDT_END_NODE: + case FDT_NOP: + break; + + default: + return FDT_END; + } + + if (!fdt_offset_ptr(fdt, startoffset, offset - startoffset)) + return FDT_END; /* premature end */ + + *nextoffset = FDT_TAGALIGN(offset); + return tag; +} + +int fdt_check_node_offset_(const void *fdt, int offset) +{ + if ((offset < 0) || (offset % FDT_TAGSIZE) + || (fdt_next_tag(fdt, offset, &offset) != FDT_BEGIN_NODE)) + return -FDT_ERR_BADOFFSET; + + return offset; +} + +int fdt_check_prop_offset_(const void *fdt, int offset) +{ + if ((offset < 0) || (offset % FDT_TAGSIZE) + || (fdt_next_tag(fdt, offset, &offset) != FDT_PROP)) + return -FDT_ERR_BADOFFSET; + + return offset; +} + +int fdt_next_node(const void *fdt, int offset, int *depth) +{ + int nextoffset = 0; + uint32_t tag; + + if (offset >= 0) + if ((nextoffset = fdt_check_node_offset_(fdt, offset)) < 0) + return nextoffset; + + do { + offset = nextoffset; + tag = fdt_next_tag(fdt, offset, &nextoffset); + + switch (tag) { + case FDT_PROP: + case FDT_NOP: + break; + + case FDT_BEGIN_NODE: + if (depth) + (*depth)++; + break; + + case FDT_END_NODE: + if (depth && ((--(*depth)) < 0)) + return nextoffset; + break; + + case FDT_END: + if ((nextoffset >= 0) + || ((nextoffset == -FDT_ERR_TRUNCATED) && !depth)) + return -FDT_ERR_NOTFOUND; + else + return nextoffset; + } + } while (tag != FDT_BEGIN_NODE); + + return offset; +} + +int fdt_first_subnode(const void *fdt, int offset) +{ + int depth = 0; + + offset = fdt_next_node(fdt, offset, &depth); + if (offset < 0 || depth != 1) + return -FDT_ERR_NOTFOUND; + + return offset; +} + +int fdt_next_subnode(const void *fdt, int offset) +{ + int depth = 1; + + /* + * With respect to the parent, the depth of the next subnode will be + * the same as the last. + */ + do { + offset = fdt_next_node(fdt, offset, &depth); + if (offset < 0 || depth < 1) + return -FDT_ERR_NOTFOUND; + } while (depth > 1); + + return offset; +} + +const char *fdt_find_string_(const char *strtab, int tabsize, const char *s) +{ + int len = strlen(s) + 1; + const char *last = strtab + tabsize - len; + const char *p; + + for (p = strtab; p <= last; p++) + if (memcmp(p, s, len) == 0) + return p; + return NULL; +} + +int fdt_move(const void *fdt, void *buf, int bufsize) +{ + FDT_RO_PROBE(fdt); + + if (fdt_totalsize(fdt) > bufsize) + return -FDT_ERR_NOSPACE; + + memmove(buf, fdt, fdt_totalsize(fdt)); + return 0; +} diff --git a/fdt/fdt.h b/fdt/fdt.h new file mode 100644 index 0000000000..f2e68807f2 --- /dev/null +++ b/fdt/fdt.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ +#ifndef FDT_H +#define FDT_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * Copyright 2012 Kim Phillips, Freescale Semiconductor. + */ + +#ifndef __ASSEMBLY__ + +struct fdt_header { + fdt32_t magic; /* magic word FDT_MAGIC */ + fdt32_t totalsize; /* total size of DT block */ + fdt32_t off_dt_struct; /* offset to structure */ + fdt32_t off_dt_strings; /* offset to strings */ + fdt32_t off_mem_rsvmap; /* offset to memory reserve map */ + fdt32_t version; /* format version */ + fdt32_t last_comp_version; /* last compatible version */ + + /* version 2 fields below */ + fdt32_t boot_cpuid_phys; /* Which physical CPU id we're + booting on */ + /* version 3 fields below */ + fdt32_t size_dt_strings; /* size of the strings block */ + + /* version 17 fields below */ + fdt32_t size_dt_struct; /* size of the structure block */ +}; + +struct fdt_reserve_entry { + fdt64_t address; + fdt64_t size; +}; + +struct fdt_node_header { + fdt32_t tag; + char name[0]; +}; + +struct fdt_property { + fdt32_t tag; + fdt32_t len; + fdt32_t nameoff; + char data[0]; +}; + +#endif /* !__ASSEMBLY */ + +#define FDT_MAGIC 0xd00dfeed /* 4: version, 4: total size */ +#define FDT_TAGSIZE sizeof(fdt32_t) + +#define FDT_BEGIN_NODE 0x1 /* Start node: full name */ +#define FDT_END_NODE 0x2 /* End node */ +#define FDT_PROP 0x3 /* Property: name off, + size, content */ +#define FDT_NOP 0x4 /* nop */ +#define FDT_END 0x9 + +#define FDT_V1_SIZE (7*sizeof(fdt32_t)) +#define FDT_V2_SIZE (FDT_V1_SIZE + sizeof(fdt32_t)) +#define FDT_V3_SIZE (FDT_V2_SIZE + sizeof(fdt32_t)) +#define FDT_V16_SIZE FDT_V3_SIZE +#define FDT_V17_SIZE (FDT_V16_SIZE + sizeof(fdt32_t)) + +#endif /* FDT_H */ diff --git a/fdt/fdt.mk.in b/fdt/fdt.mk.in new file mode 100644 index 0000000000..273375efb4 --- /dev/null +++ b/fdt/fdt.mk.in @@ -0,0 +1,17 @@ +fdt_subproject_deps = \ + +fdt_hdrs = \ + fdt.h \ + libfdt.h \ + libfdt_env.h \ + +fdt_c_srcs = \ + fdt.c \ + fdt_ro.c \ + fdt_wip.c \ + fdt_sw.c \ + fdt_rw.c \ + fdt_strerror.c \ + fdt_empty_tree.c \ + fdt_addresses.c \ + fdt_overlay.c \ diff --git a/fdt/fdt_addresses.c b/fdt/fdt_addresses.c new file mode 100644 index 0000000000..9a82cd0ba2 --- /dev/null +++ b/fdt/fdt_addresses.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2014 David Gibson + * Copyright (C) 2018 embedded brains GmbH + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +static int fdt_cells(const void *fdt, int nodeoffset, const char *name) +{ + const fdt32_t *c; + uint32_t val; + int len; + + c = fdt_getprop(fdt, nodeoffset, name, &len); + if (!c) + return len; + + if (len != sizeof(*c)) + return -FDT_ERR_BADNCELLS; + + val = fdt32_to_cpu(*c); + if (val > FDT_MAX_NCELLS) + return -FDT_ERR_BADNCELLS; + + return (int)val; +} + +int fdt_address_cells(const void *fdt, int nodeoffset) +{ + int val; + + val = fdt_cells(fdt, nodeoffset, "#address-cells"); + if (val == 0) + return -FDT_ERR_BADNCELLS; + if (val == -FDT_ERR_NOTFOUND) + return 2; + return val; +} + +int fdt_size_cells(const void *fdt, int nodeoffset) +{ + int val; + + val = fdt_cells(fdt, nodeoffset, "#size-cells"); + if (val == -FDT_ERR_NOTFOUND) + return 1; + return val; +} + +/* This function assumes that [address|size]_cells is 1 or 2 */ +int fdt_appendprop_addrrange(void *fdt, int parent, int nodeoffset, + const char *name, uint64_t addr, uint64_t size) +{ + int addr_cells, size_cells, ret; + uint8_t data[sizeof(fdt64_t) * 2], *prop; + + ret = fdt_address_cells(fdt, parent); + if (ret < 0) + return ret; + addr_cells = ret; + + ret = fdt_size_cells(fdt, parent); + if (ret < 0) + return ret; + size_cells = ret; + + /* check validity of address */ + prop = data; + if (addr_cells == 1) { + if ((addr > UINT32_MAX) || ((UINT32_MAX + 1 - addr) < size)) + return -FDT_ERR_BADVALUE; + + fdt32_st(prop, (uint32_t)addr); + } else if (addr_cells == 2) { + fdt64_st(prop, addr); + } else { + return -FDT_ERR_BADNCELLS; + } + + /* check validity of size */ + prop += addr_cells * sizeof(fdt32_t); + if (size_cells == 1) { + if (size > UINT32_MAX) + return -FDT_ERR_BADVALUE; + + fdt32_st(prop, (uint32_t)size); + } else if (size_cells == 2) { + fdt64_st(prop, size); + } else { + return -FDT_ERR_BADNCELLS; + } + + return fdt_appendprop(fdt, nodeoffset, name, data, + (addr_cells + size_cells) * sizeof(fdt32_t)); +} diff --git a/fdt/fdt_empty_tree.c b/fdt/fdt_empty_tree.c new file mode 100644 index 0000000000..49d54d44b8 --- /dev/null +++ b/fdt/fdt_empty_tree.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2012 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +int fdt_create_empty_tree(void *buf, int bufsize) +{ + int err; + + err = fdt_create(buf, bufsize); + if (err) + return err; + + err = fdt_finish_reservemap(buf); + if (err) + return err; + + err = fdt_begin_node(buf, ""); + if (err) + return err; + + err = fdt_end_node(buf); + if (err) + return err; + + err = fdt_finish(buf); + if (err) + return err; + + return fdt_open_into(buf, buf, bufsize); +} diff --git a/fdt/fdt_overlay.c b/fdt/fdt_overlay.c new file mode 100644 index 0000000000..be71873366 --- /dev/null +++ b/fdt/fdt_overlay.c @@ -0,0 +1,881 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2016 Free Electrons + * Copyright (C) 2016 NextThing Co. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +/** + * overlay_get_target_phandle - retrieves the target phandle of a fragment + * @fdto: pointer to the device tree overlay blob + * @fragment: node offset of the fragment in the overlay + * + * overlay_get_target_phandle() retrieves the target phandle of an + * overlay fragment when that fragment uses a phandle (target + * property) instead of a path (target-path property). + * + * returns: + * the phandle pointed by the target property + * 0, if the phandle was not found + * -1, if the phandle was malformed + */ +static uint32_t overlay_get_target_phandle(const void *fdto, int fragment) +{ + const fdt32_t *val; + int len; + + val = fdt_getprop(fdto, fragment, "target", &len); + if (!val) + return 0; + + if ((len != sizeof(*val)) || (fdt32_to_cpu(*val) == (uint32_t)-1)) + return (uint32_t)-1; + + return fdt32_to_cpu(*val); +} + +/** + * overlay_get_target - retrieves the offset of a fragment's target + * @fdt: Base device tree blob + * @fdto: Device tree overlay blob + * @fragment: node offset of the fragment in the overlay + * @pathp: pointer which receives the path of the target (or NULL) + * + * overlay_get_target() retrieves the target offset in the base + * device tree of a fragment, no matter how the actual targeting is + * done (through a phandle or a path) + * + * returns: + * the targeted node offset in the base device tree + * Negative error code on error + */ +static int overlay_get_target(const void *fdt, const void *fdto, + int fragment, char const **pathp) +{ + uint32_t phandle; + const char *path = NULL; + int path_len = 0, ret; + + /* Try first to do a phandle based lookup */ + phandle = overlay_get_target_phandle(fdto, fragment); + if (phandle == (uint32_t)-1) + return -FDT_ERR_BADPHANDLE; + + /* no phandle, try path */ + if (!phandle) { + /* And then a path based lookup */ + path = fdt_getprop(fdto, fragment, "target-path", &path_len); + if (path) + ret = fdt_path_offset(fdt, path); + else + ret = path_len; + } else + ret = fdt_node_offset_by_phandle(fdt, phandle); + + /* + * If we haven't found either a target or a + * target-path property in a node that contains a + * __overlay__ subnode (we wouldn't be called + * otherwise), consider it a improperly written + * overlay + */ + if (ret < 0 && path_len == -FDT_ERR_NOTFOUND) + ret = -FDT_ERR_BADOVERLAY; + + /* return on error */ + if (ret < 0) + return ret; + + /* return pointer to path (if available) */ + if (pathp) + *pathp = path ? path : NULL; + + return ret; +} + +/** + * overlay_phandle_add_offset - Increases a phandle by an offset + * @fdt: Base device tree blob + * @node: Device tree overlay blob + * @name: Name of the property to modify (phandle or linux,phandle) + * @delta: offset to apply + * + * overlay_phandle_add_offset() increments a node phandle by a given + * offset. + * + * returns: + * 0 on success. + * Negative error code on error + */ +static int overlay_phandle_add_offset(void *fdt, int node, + const char *name, uint32_t delta) +{ + const fdt32_t *val; + uint32_t adj_val; + int len; + + val = fdt_getprop(fdt, node, name, &len); + if (!val) + return len; + + if (len != sizeof(*val)) + return -FDT_ERR_BADPHANDLE; + + adj_val = fdt32_to_cpu(*val); + if ((adj_val + delta) < adj_val) + return -FDT_ERR_NOPHANDLES; + + adj_val += delta; + if (adj_val == (uint32_t)-1) + return -FDT_ERR_NOPHANDLES; + + return fdt_setprop_inplace_u32(fdt, node, name, adj_val); +} + +/** + * overlay_adjust_node_phandles - Offsets the phandles of a node + * @fdto: Device tree overlay blob + * @node: Offset of the node we want to adjust + * @delta: Offset to shift the phandles of + * + * overlay_adjust_node_phandles() adds a constant to all the phandles + * of a given node. This is mainly use as part of the overlay + * application process, when we want to update all the overlay + * phandles to not conflict with the overlays of the base device tree. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_adjust_node_phandles(void *fdto, int node, + uint32_t delta) +{ + int child; + int ret; + + ret = overlay_phandle_add_offset(fdto, node, "phandle", delta); + if (ret && ret != -FDT_ERR_NOTFOUND) + return ret; + + ret = overlay_phandle_add_offset(fdto, node, "linux,phandle", delta); + if (ret && ret != -FDT_ERR_NOTFOUND) + return ret; + + fdt_for_each_subnode(child, fdto, node) { + ret = overlay_adjust_node_phandles(fdto, child, delta); + if (ret) + return ret; + } + + return 0; +} + +/** + * overlay_adjust_local_phandles - Adjust the phandles of a whole overlay + * @fdto: Device tree overlay blob + * @delta: Offset to shift the phandles of + * + * overlay_adjust_local_phandles() adds a constant to all the + * phandles of an overlay. This is mainly use as part of the overlay + * application process, when we want to update all the overlay + * phandles to not conflict with the overlays of the base device tree. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_adjust_local_phandles(void *fdto, uint32_t delta) +{ + /* + * Start adjusting the phandles from the overlay root + */ + return overlay_adjust_node_phandles(fdto, 0, delta); +} + +/** + * overlay_update_local_node_references - Adjust the overlay references + * @fdto: Device tree overlay blob + * @tree_node: Node offset of the node to operate on + * @fixup_node: Node offset of the matching local fixups node + * @delta: Offset to shift the phandles of + * + * overlay_update_local_nodes_references() update the phandles + * pointing to a node within the device tree overlay by adding a + * constant delta. + * + * This is mainly used as part of a device tree application process, + * where you want the device tree overlays phandles to not conflict + * with the ones from the base device tree before merging them. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_update_local_node_references(void *fdto, + int tree_node, + int fixup_node, + uint32_t delta) +{ + int fixup_prop; + int fixup_child; + int ret; + + fdt_for_each_property_offset(fixup_prop, fdto, fixup_node) { + const fdt32_t *fixup_val; + const char *tree_val; + const char *name; + int fixup_len; + int tree_len; + int i; + + fixup_val = fdt_getprop_by_offset(fdto, fixup_prop, + &name, &fixup_len); + if (!fixup_val) + return fixup_len; + + if (fixup_len % sizeof(uint32_t)) + return -FDT_ERR_BADOVERLAY; + + tree_val = fdt_getprop(fdto, tree_node, name, &tree_len); + if (!tree_val) { + if (tree_len == -FDT_ERR_NOTFOUND) + return -FDT_ERR_BADOVERLAY; + + return tree_len; + } + + for (i = 0; i < (fixup_len / sizeof(uint32_t)); i++) { + fdt32_t adj_val; + uint32_t poffset; + + poffset = fdt32_to_cpu(fixup_val[i]); + + /* + * phandles to fixup can be unaligned. + * + * Use a memcpy for the architectures that do + * not support unaligned accesses. + */ + memcpy(&adj_val, tree_val + poffset, sizeof(adj_val)); + + adj_val = cpu_to_fdt32(fdt32_to_cpu(adj_val) + delta); + + ret = fdt_setprop_inplace_namelen_partial(fdto, + tree_node, + name, + strlen(name), + poffset, + &adj_val, + sizeof(adj_val)); + if (ret == -FDT_ERR_NOSPACE) + return -FDT_ERR_BADOVERLAY; + + if (ret) + return ret; + } + } + + fdt_for_each_subnode(fixup_child, fdto, fixup_node) { + const char *fixup_child_name = fdt_get_name(fdto, fixup_child, + NULL); + int tree_child; + + tree_child = fdt_subnode_offset(fdto, tree_node, + fixup_child_name); + if (tree_child == -FDT_ERR_NOTFOUND) + return -FDT_ERR_BADOVERLAY; + if (tree_child < 0) + return tree_child; + + ret = overlay_update_local_node_references(fdto, + tree_child, + fixup_child, + delta); + if (ret) + return ret; + } + + return 0; +} + +/** + * overlay_update_local_references - Adjust the overlay references + * @fdto: Device tree overlay blob + * @delta: Offset to shift the phandles of + * + * overlay_update_local_references() update all the phandles pointing + * to a node within the device tree overlay by adding a constant + * delta to not conflict with the base overlay. + * + * This is mainly used as part of a device tree application process, + * where you want the device tree overlays phandles to not conflict + * with the ones from the base device tree before merging them. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_update_local_references(void *fdto, uint32_t delta) +{ + int fixups; + + fixups = fdt_path_offset(fdto, "/__local_fixups__"); + if (fixups < 0) { + /* There's no local phandles to adjust, bail out */ + if (fixups == -FDT_ERR_NOTFOUND) + return 0; + + return fixups; + } + + /* + * Update our local references from the root of the tree + */ + return overlay_update_local_node_references(fdto, 0, fixups, + delta); +} + +/** + * overlay_fixup_one_phandle - Set an overlay phandle to the base one + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * @symbols_off: Node offset of the symbols node in the base device tree + * @path: Path to a node holding a phandle in the overlay + * @path_len: number of path characters to consider + * @name: Name of the property holding the phandle reference in the overlay + * @name_len: number of name characters to consider + * @poffset: Offset within the overlay property where the phandle is stored + * @label: Label of the node referenced by the phandle + * + * overlay_fixup_one_phandle() resolves an overlay phandle pointing to + * a node in the base device tree. + * + * This is part of the device tree overlay application process, when + * you want all the phandles in the overlay to point to the actual + * base dt nodes. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_fixup_one_phandle(void *fdt, void *fdto, + int symbols_off, + const char *path, uint32_t path_len, + const char *name, uint32_t name_len, + int poffset, const char *label) +{ + const char *symbol_path; + uint32_t phandle; + fdt32_t phandle_prop; + int symbol_off, fixup_off; + int prop_len; + + if (symbols_off < 0) + return symbols_off; + + symbol_path = fdt_getprop(fdt, symbols_off, label, + &prop_len); + if (!symbol_path) + return prop_len; + + symbol_off = fdt_path_offset(fdt, symbol_path); + if (symbol_off < 0) + return symbol_off; + + phandle = fdt_get_phandle(fdt, symbol_off); + if (!phandle) + return -FDT_ERR_NOTFOUND; + + fixup_off = fdt_path_offset_namelen(fdto, path, path_len); + if (fixup_off == -FDT_ERR_NOTFOUND) + return -FDT_ERR_BADOVERLAY; + if (fixup_off < 0) + return fixup_off; + + phandle_prop = cpu_to_fdt32(phandle); + return fdt_setprop_inplace_namelen_partial(fdto, fixup_off, + name, name_len, poffset, + &phandle_prop, + sizeof(phandle_prop)); +}; + +/** + * overlay_fixup_phandle - Set an overlay phandle to the base one + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * @symbols_off: Node offset of the symbols node in the base device tree + * @property: Property offset in the overlay holding the list of fixups + * + * overlay_fixup_phandle() resolves all the overlay phandles pointed + * to in a __fixups__ property, and updates them to match the phandles + * in use in the base device tree. + * + * This is part of the device tree overlay application process, when + * you want all the phandles in the overlay to point to the actual + * base dt nodes. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_fixup_phandle(void *fdt, void *fdto, int symbols_off, + int property) +{ + const char *value; + const char *label; + int len; + + value = fdt_getprop_by_offset(fdto, property, + &label, &len); + if (!value) { + if (len == -FDT_ERR_NOTFOUND) + return -FDT_ERR_INTERNAL; + + return len; + } + + do { + const char *path, *name, *fixup_end; + const char *fixup_str = value; + uint32_t path_len, name_len; + uint32_t fixup_len; + char *sep, *endptr; + int poffset, ret; + + fixup_end = memchr(value, '\0', len); + if (!fixup_end) + return -FDT_ERR_BADOVERLAY; + fixup_len = fixup_end - fixup_str; + + len -= fixup_len + 1; + value += fixup_len + 1; + + path = fixup_str; + sep = memchr(fixup_str, ':', fixup_len); + if (!sep || *sep != ':') + return -FDT_ERR_BADOVERLAY; + + path_len = sep - path; + if (path_len == (fixup_len - 1)) + return -FDT_ERR_BADOVERLAY; + + fixup_len -= path_len + 1; + name = sep + 1; + sep = memchr(name, ':', fixup_len); + if (!sep || *sep != ':') + return -FDT_ERR_BADOVERLAY; + + name_len = sep - name; + if (!name_len) + return -FDT_ERR_BADOVERLAY; + + poffset = strtoul(sep + 1, &endptr, 10); + if ((*endptr != '\0') || (endptr <= (sep + 1))) + return -FDT_ERR_BADOVERLAY; + + ret = overlay_fixup_one_phandle(fdt, fdto, symbols_off, + path, path_len, name, name_len, + poffset, label); + if (ret) + return ret; + } while (len > 0); + + return 0; +} + +/** + * overlay_fixup_phandles - Resolve the overlay phandles to the base + * device tree + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * + * overlay_fixup_phandles() resolves all the overlay phandles pointing + * to nodes in the base device tree. + * + * This is one of the steps of the device tree overlay application + * process, when you want all the phandles in the overlay to point to + * the actual base dt nodes. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_fixup_phandles(void *fdt, void *fdto) +{ + int fixups_off, symbols_off; + int property; + + /* We can have overlays without any fixups */ + fixups_off = fdt_path_offset(fdto, "/__fixups__"); + if (fixups_off == -FDT_ERR_NOTFOUND) + return 0; /* nothing to do */ + if (fixups_off < 0) + return fixups_off; + + /* And base DTs without symbols */ + symbols_off = fdt_path_offset(fdt, "/__symbols__"); + if ((symbols_off < 0 && (symbols_off != -FDT_ERR_NOTFOUND))) + return symbols_off; + + fdt_for_each_property_offset(property, fdto, fixups_off) { + int ret; + + ret = overlay_fixup_phandle(fdt, fdto, symbols_off, property); + if (ret) + return ret; + } + + return 0; +} + +/** + * overlay_apply_node - Merges a node into the base device tree + * @fdt: Base Device Tree blob + * @target: Node offset in the base device tree to apply the fragment to + * @fdto: Device tree overlay blob + * @node: Node offset in the overlay holding the changes to merge + * + * overlay_apply_node() merges a node into a target base device tree + * node pointed. + * + * This is part of the final step in the device tree overlay + * application process, when all the phandles have been adjusted and + * resolved and you just have to merge overlay into the base device + * tree. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_apply_node(void *fdt, int target, + void *fdto, int node) +{ + int property; + int subnode; + + fdt_for_each_property_offset(property, fdto, node) { + const char *name; + const void *prop; + int prop_len; + int ret; + + prop = fdt_getprop_by_offset(fdto, property, &name, + &prop_len); + if (prop_len == -FDT_ERR_NOTFOUND) + return -FDT_ERR_INTERNAL; + if (prop_len < 0) + return prop_len; + + ret = fdt_setprop(fdt, target, name, prop, prop_len); + if (ret) + return ret; + } + + fdt_for_each_subnode(subnode, fdto, node) { + const char *name = fdt_get_name(fdto, subnode, NULL); + int nnode; + int ret; + + nnode = fdt_add_subnode(fdt, target, name); + if (nnode == -FDT_ERR_EXISTS) { + nnode = fdt_subnode_offset(fdt, target, name); + if (nnode == -FDT_ERR_NOTFOUND) + return -FDT_ERR_INTERNAL; + } + + if (nnode < 0) + return nnode; + + ret = overlay_apply_node(fdt, nnode, fdto, subnode); + if (ret) + return ret; + } + + return 0; +} + +/** + * overlay_merge - Merge an overlay into its base device tree + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * + * overlay_merge() merges an overlay into its base device tree. + * + * This is the next to last step in the device tree overlay application + * process, when all the phandles have been adjusted and resolved and + * you just have to merge overlay into the base device tree. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_merge(void *fdt, void *fdto) +{ + int fragment; + + fdt_for_each_subnode(fragment, fdto, 0) { + int overlay; + int target; + int ret; + + /* + * Each fragments will have an __overlay__ node. If + * they don't, it's not supposed to be merged + */ + overlay = fdt_subnode_offset(fdto, fragment, "__overlay__"); + if (overlay == -FDT_ERR_NOTFOUND) + continue; + + if (overlay < 0) + return overlay; + + target = overlay_get_target(fdt, fdto, fragment, NULL); + if (target < 0) + return target; + + ret = overlay_apply_node(fdt, target, fdto, overlay); + if (ret) + return ret; + } + + return 0; +} + +static int get_path_len(const void *fdt, int nodeoffset) +{ + int len = 0, namelen; + const char *name; + + FDT_RO_PROBE(fdt); + + for (;;) { + name = fdt_get_name(fdt, nodeoffset, &namelen); + if (!name) + return namelen; + + /* root? we're done */ + if (namelen == 0) + break; + + nodeoffset = fdt_parent_offset(fdt, nodeoffset); + if (nodeoffset < 0) + return nodeoffset; + len += namelen + 1; + } + + /* in case of root pretend it's "/" */ + if (len == 0) + len++; + return len; +} + +/** + * overlay_symbol_update - Update the symbols of base tree after a merge + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * + * overlay_symbol_update() updates the symbols of the base tree with the + * symbols of the applied overlay + * + * This is the last step in the device tree overlay application + * process, allowing the reference of overlay symbols by subsequent + * overlay operations. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_symbol_update(void *fdt, void *fdto) +{ + int root_sym, ov_sym, prop, path_len, fragment, target; + int len, frag_name_len, ret, rel_path_len; + const char *s, *e; + const char *path; + const char *name; + const char *frag_name; + const char *rel_path; + const char *target_path; + char *buf; + void *p; + + ov_sym = fdt_subnode_offset(fdto, 0, "__symbols__"); + + /* if no overlay symbols exist no problem */ + if (ov_sym < 0) + return 0; + + root_sym = fdt_subnode_offset(fdt, 0, "__symbols__"); + + /* it no root symbols exist we should create them */ + if (root_sym == -FDT_ERR_NOTFOUND) + root_sym = fdt_add_subnode(fdt, 0, "__symbols__"); + + /* any error is fatal now */ + if (root_sym < 0) + return root_sym; + + /* iterate over each overlay symbol */ + fdt_for_each_property_offset(prop, fdto, ov_sym) { + path = fdt_getprop_by_offset(fdto, prop, &name, &path_len); + if (!path) + return path_len; + + /* verify it's a string property (terminated by a single \0) */ + if (path_len < 1 || memchr(path, '\0', path_len) != &path[path_len - 1]) + return -FDT_ERR_BADVALUE; + + /* keep end marker to avoid strlen() */ + e = path + path_len; + + if (*path != '/') + return -FDT_ERR_BADVALUE; + + /* get fragment name first */ + s = strchr(path + 1, '/'); + if (!s) { + /* Symbol refers to something that won't end + * up in the target tree */ + continue; + } + + frag_name = path + 1; + frag_name_len = s - path - 1; + + /* verify format; safe since "s" lies in \0 terminated prop */ + len = sizeof("/__overlay__/") - 1; + if ((e - s) > len && (memcmp(s, "/__overlay__/", len) == 0)) { + /* //__overlay__/ */ + rel_path = s + len; + rel_path_len = e - rel_path; + } else if ((e - s) == len + && (memcmp(s, "/__overlay__", len - 1) == 0)) { + /* //__overlay__ */ + rel_path = ""; + rel_path_len = 0; + } else { + /* Symbol refers to something that won't end + * up in the target tree */ + continue; + } + + /* find the fragment index in which the symbol lies */ + ret = fdt_subnode_offset_namelen(fdto, 0, frag_name, + frag_name_len); + /* not found? */ + if (ret < 0) + return -FDT_ERR_BADOVERLAY; + fragment = ret; + + /* an __overlay__ subnode must exist */ + ret = fdt_subnode_offset(fdto, fragment, "__overlay__"); + if (ret < 0) + return -FDT_ERR_BADOVERLAY; + + /* get the target of the fragment */ + ret = overlay_get_target(fdt, fdto, fragment, &target_path); + if (ret < 0) + return ret; + target = ret; + + /* if we have a target path use */ + if (!target_path) { + ret = get_path_len(fdt, target); + if (ret < 0) + return ret; + len = ret; + } else { + len = strlen(target_path); + } + + ret = fdt_setprop_placeholder(fdt, root_sym, name, + len + (len > 1) + rel_path_len + 1, &p); + if (ret < 0) + return ret; + + if (!target_path) { + /* again in case setprop_placeholder changed it */ + ret = overlay_get_target(fdt, fdto, fragment, &target_path); + if (ret < 0) + return ret; + target = ret; + } + + buf = p; + if (len > 1) { /* target is not root */ + if (!target_path) { + ret = fdt_get_path(fdt, target, buf, len + 1); + if (ret < 0) + return ret; + } else + memcpy(buf, target_path, len + 1); + + } else + len--; + + buf[len] = '/'; + memcpy(buf + len + 1, rel_path, rel_path_len); + buf[len + 1 + rel_path_len] = '\0'; + } + + return 0; +} + +int fdt_overlay_apply(void *fdt, void *fdto) +{ + uint32_t delta; + int ret; + + FDT_RO_PROBE(fdt); + FDT_RO_PROBE(fdto); + + ret = fdt_find_max_phandle(fdt, &delta); + if (ret) + goto err; + + ret = overlay_adjust_local_phandles(fdto, delta); + if (ret) + goto err; + + ret = overlay_update_local_references(fdto, delta); + if (ret) + goto err; + + ret = overlay_fixup_phandles(fdt, fdto); + if (ret) + goto err; + + ret = overlay_merge(fdt, fdto); + if (ret) + goto err; + + ret = overlay_symbol_update(fdt, fdto); + if (ret) + goto err; + + /* + * The overlay has been damaged, erase its magic. + */ + fdt_set_magic(fdto, ~0); + + return 0; + +err: + /* + * The overlay might have been damaged, erase its magic. + */ + fdt_set_magic(fdto, ~0); + + /* + * The base device tree might have been damaged, erase its + * magic. + */ + fdt_set_magic(fdt, ~0); + + return ret; +} diff --git a/fdt/fdt_ro.c b/fdt/fdt_ro.c new file mode 100644 index 0000000000..a5c2797cde --- /dev/null +++ b/fdt/fdt_ro.c @@ -0,0 +1,898 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +static int fdt_nodename_eq_(const void *fdt, int offset, + const char *s, int len) +{ + int olen; + const char *p = fdt_get_name(fdt, offset, &olen); + + if (!p || olen < len) + /* short match */ + return 0; + + if (memcmp(p, s, len) != 0) + return 0; + + if (p[len] == '\0') + return 1; + else if (!memchr(s, '@', len) && (p[len] == '@')) + return 1; + else + return 0; +} + +const char *fdt_get_string(const void *fdt, int stroffset, int *lenp) +{ + int32_t totalsize = fdt_ro_probe_(fdt); + uint32_t absoffset = stroffset + fdt_off_dt_strings(fdt); + size_t len; + int err; + const char *s, *n; + + err = totalsize; + if (totalsize < 0) + goto fail; + + err = -FDT_ERR_BADOFFSET; + if (absoffset >= totalsize) + goto fail; + len = totalsize - absoffset; + + if (fdt_magic(fdt) == FDT_MAGIC) { + if (stroffset < 0) + goto fail; + if (fdt_version(fdt) >= 17) { + if (stroffset >= fdt_size_dt_strings(fdt)) + goto fail; + if ((fdt_size_dt_strings(fdt) - stroffset) < len) + len = fdt_size_dt_strings(fdt) - stroffset; + } + } else if (fdt_magic(fdt) == FDT_SW_MAGIC) { + if ((stroffset >= 0) + || (stroffset < -fdt_size_dt_strings(fdt))) + goto fail; + if ((-stroffset) < len) + len = -stroffset; + } else { + err = -FDT_ERR_INTERNAL; + goto fail; + } + + s = (const char *)fdt + absoffset; + n = memchr(s, '\0', len); + if (!n) { + /* missing terminating NULL */ + err = -FDT_ERR_TRUNCATED; + goto fail; + } + + if (lenp) + *lenp = n - s; + return s; + +fail: + if (lenp) + *lenp = err; + return NULL; +} + +const char *fdt_string(const void *fdt, int stroffset) +{ + return fdt_get_string(fdt, stroffset, NULL); +} + +static int fdt_string_eq_(const void *fdt, int stroffset, + const char *s, int len) +{ + int slen; + const char *p = fdt_get_string(fdt, stroffset, &slen); + + return p && (slen == len) && (memcmp(p, s, len) == 0); +} + +int fdt_find_max_phandle(const void *fdt, uint32_t *phandle) +{ + uint32_t max = 0; + int offset = -1; + + while (true) { + uint32_t value; + + offset = fdt_next_node(fdt, offset, NULL); + if (offset < 0) { + if (offset == -FDT_ERR_NOTFOUND) + break; + + return offset; + } + + value = fdt_get_phandle(fdt, offset); + + if (value > max) + max = value; + } + + if (phandle) + *phandle = max; + + return 0; +} + +int fdt_generate_phandle(const void *fdt, uint32_t *phandle) +{ + uint32_t max; + int err; + + err = fdt_find_max_phandle(fdt, &max); + if (err < 0) + return err; + + if (max == FDT_MAX_PHANDLE) + return -FDT_ERR_NOPHANDLES; + + if (phandle) + *phandle = max + 1; + + return 0; +} + +static const struct fdt_reserve_entry *fdt_mem_rsv(const void *fdt, int n) +{ + int offset = n * sizeof(struct fdt_reserve_entry); + int absoffset = fdt_off_mem_rsvmap(fdt) + offset; + + if (absoffset < fdt_off_mem_rsvmap(fdt)) + return NULL; + if (absoffset > fdt_totalsize(fdt) - sizeof(struct fdt_reserve_entry)) + return NULL; + return fdt_mem_rsv_(fdt, n); +} + +int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size) +{ + const struct fdt_reserve_entry *re; + + FDT_RO_PROBE(fdt); + re = fdt_mem_rsv(fdt, n); + if (!re) + return -FDT_ERR_BADOFFSET; + + *address = fdt64_ld(&re->address); + *size = fdt64_ld(&re->size); + return 0; +} + +int fdt_num_mem_rsv(const void *fdt) +{ + int i; + const struct fdt_reserve_entry *re; + + for (i = 0; (re = fdt_mem_rsv(fdt, i)) != NULL; i++) { + if (fdt64_ld(&re->size) == 0) + return i; + } + return -FDT_ERR_TRUNCATED; +} + +static int nextprop_(const void *fdt, int offset) +{ + uint32_t tag; + int nextoffset; + + do { + tag = fdt_next_tag(fdt, offset, &nextoffset); + + switch (tag) { + case FDT_END: + if (nextoffset >= 0) + return -FDT_ERR_BADSTRUCTURE; + else + return nextoffset; + + case FDT_PROP: + return offset; + } + offset = nextoffset; + } while (tag == FDT_NOP); + + return -FDT_ERR_NOTFOUND; +} + +int fdt_subnode_offset_namelen(const void *fdt, int offset, + const char *name, int namelen) +{ + int depth; + + FDT_RO_PROBE(fdt); + + for (depth = 0; + (offset >= 0) && (depth >= 0); + offset = fdt_next_node(fdt, offset, &depth)) + if ((depth == 1) + && fdt_nodename_eq_(fdt, offset, name, namelen)) + return offset; + + if (depth < 0) + return -FDT_ERR_NOTFOUND; + return offset; /* error */ +} + +int fdt_subnode_offset(const void *fdt, int parentoffset, + const char *name) +{ + return fdt_subnode_offset_namelen(fdt, parentoffset, name, strlen(name)); +} + +int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen) +{ + const char *end = path + namelen; + const char *p = path; + int offset = 0; + + FDT_RO_PROBE(fdt); + + /* see if we have an alias */ + if (*path != '/') { + const char *q = memchr(path, '/', end - p); + + if (!q) + q = end; + + p = fdt_get_alias_namelen(fdt, p, q - p); + if (!p) + return -FDT_ERR_BADPATH; + offset = fdt_path_offset(fdt, p); + + p = q; + } + + while (p < end) { + const char *q; + + while (*p == '/') { + p++; + if (p == end) + return offset; + } + q = memchr(p, '/', end - p); + if (! q) + q = end; + + offset = fdt_subnode_offset_namelen(fdt, offset, p, q-p); + if (offset < 0) + return offset; + + p = q; + } + + return offset; +} + +int fdt_path_offset(const void *fdt, const char *path) +{ + return fdt_path_offset_namelen(fdt, path, strlen(path)); +} + +const char *fdt_get_name(const void *fdt, int nodeoffset, int *len) +{ + const struct fdt_node_header *nh = fdt_offset_ptr_(fdt, nodeoffset); + const char *nameptr; + int err; + + if (((err = fdt_ro_probe_(fdt)) < 0) + || ((err = fdt_check_node_offset_(fdt, nodeoffset)) < 0)) + goto fail; + + nameptr = nh->name; + + if (fdt_version(fdt) < 0x10) { + /* + * For old FDT versions, match the naming conventions of V16: + * give only the leaf name (after all /). The actual tree + * contents are loosely checked. + */ + const char *leaf; + leaf = strrchr(nameptr, '/'); + if (leaf == NULL) { + err = -FDT_ERR_BADSTRUCTURE; + goto fail; + } + nameptr = leaf+1; + } + + if (len) + *len = strlen(nameptr); + + return nameptr; + + fail: + if (len) + *len = err; + return NULL; +} + +int fdt_first_property_offset(const void *fdt, int nodeoffset) +{ + int offset; + + if ((offset = fdt_check_node_offset_(fdt, nodeoffset)) < 0) + return offset; + + return nextprop_(fdt, offset); +} + +int fdt_next_property_offset(const void *fdt, int offset) +{ + if ((offset = fdt_check_prop_offset_(fdt, offset)) < 0) + return offset; + + return nextprop_(fdt, offset); +} + +static const struct fdt_property *fdt_get_property_by_offset_(const void *fdt, + int offset, + int *lenp) +{ + int err; + const struct fdt_property *prop; + + if ((err = fdt_check_prop_offset_(fdt, offset)) < 0) { + if (lenp) + *lenp = err; + return NULL; + } + + prop = fdt_offset_ptr_(fdt, offset); + + if (lenp) + *lenp = fdt32_ld(&prop->len); + + return prop; +} + +const struct fdt_property *fdt_get_property_by_offset(const void *fdt, + int offset, + int *lenp) +{ + /* Prior to version 16, properties may need realignment + * and this API does not work. fdt_getprop_*() will, however. */ + + if (fdt_version(fdt) < 0x10) { + if (lenp) + *lenp = -FDT_ERR_BADVERSION; + return NULL; + } + + return fdt_get_property_by_offset_(fdt, offset, lenp); +} + +static const struct fdt_property *fdt_get_property_namelen_(const void *fdt, + int offset, + const char *name, + int namelen, + int *lenp, + int *poffset) +{ + for (offset = fdt_first_property_offset(fdt, offset); + (offset >= 0); + (offset = fdt_next_property_offset(fdt, offset))) { + const struct fdt_property *prop; + + if (!(prop = fdt_get_property_by_offset_(fdt, offset, lenp))) { + offset = -FDT_ERR_INTERNAL; + break; + } + if (fdt_string_eq_(fdt, fdt32_ld(&prop->nameoff), + name, namelen)) { + if (poffset) + *poffset = offset; + return prop; + } + } + + if (lenp) + *lenp = offset; + return NULL; +} + + +const struct fdt_property *fdt_get_property_namelen(const void *fdt, + int offset, + const char *name, + int namelen, int *lenp) +{ + /* Prior to version 16, properties may need realignment + * and this API does not work. fdt_getprop_*() will, however. */ + if (fdt_version(fdt) < 0x10) { + if (lenp) + *lenp = -FDT_ERR_BADVERSION; + return NULL; + } + + return fdt_get_property_namelen_(fdt, offset, name, namelen, lenp, + NULL); +} + + +const struct fdt_property *fdt_get_property(const void *fdt, + int nodeoffset, + const char *name, int *lenp) +{ + return fdt_get_property_namelen(fdt, nodeoffset, name, + strlen(name), lenp); +} + +const void *fdt_getprop_namelen(const void *fdt, int nodeoffset, + const char *name, int namelen, int *lenp) +{ + int poffset; + const struct fdt_property *prop; + + prop = fdt_get_property_namelen_(fdt, nodeoffset, name, namelen, lenp, + &poffset); + if (!prop) + return NULL; + + /* Handle realignment */ + if (fdt_version(fdt) < 0x10 && (poffset + sizeof(*prop)) % 8 && + fdt32_ld(&prop->len) >= 8) + return prop->data + 4; + return prop->data; +} + +const void *fdt_getprop_by_offset(const void *fdt, int offset, + const char **namep, int *lenp) +{ + const struct fdt_property *prop; + + prop = fdt_get_property_by_offset_(fdt, offset, lenp); + if (!prop) + return NULL; + if (namep) { + const char *name; + int namelen; + name = fdt_get_string(fdt, fdt32_ld(&prop->nameoff), + &namelen); + if (!name) { + if (lenp) + *lenp = namelen; + return NULL; + } + *namep = name; + } + + /* Handle realignment */ + if (fdt_version(fdt) < 0x10 && (offset + sizeof(*prop)) % 8 && + fdt32_ld(&prop->len) >= 8) + return prop->data + 4; + return prop->data; +} + +const void *fdt_getprop(const void *fdt, int nodeoffset, + const char *name, int *lenp) +{ + return fdt_getprop_namelen(fdt, nodeoffset, name, strlen(name), lenp); +} + +uint32_t fdt_get_phandle(const void *fdt, int nodeoffset) +{ + const fdt32_t *php; + int len; + + /* FIXME: This is a bit sub-optimal, since we potentially scan + * over all the properties twice. */ + php = fdt_getprop(fdt, nodeoffset, "phandle", &len); + if (!php || (len != sizeof(*php))) { + php = fdt_getprop(fdt, nodeoffset, "linux,phandle", &len); + if (!php || (len != sizeof(*php))) + return 0; + } + + return fdt32_ld(php); +} + +const char *fdt_get_alias_namelen(const void *fdt, + const char *name, int namelen) +{ + int aliasoffset; + + aliasoffset = fdt_path_offset(fdt, "/aliases"); + if (aliasoffset < 0) + return NULL; + + return fdt_getprop_namelen(fdt, aliasoffset, name, namelen, NULL); +} + +const char *fdt_get_alias(const void *fdt, const char *name) +{ + return fdt_get_alias_namelen(fdt, name, strlen(name)); +} + +int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen) +{ + int pdepth = 0, p = 0; + int offset, depth, namelen; + const char *name; + + FDT_RO_PROBE(fdt); + + if (buflen < 2) + return -FDT_ERR_NOSPACE; + + for (offset = 0, depth = 0; + (offset >= 0) && (offset <= nodeoffset); + offset = fdt_next_node(fdt, offset, &depth)) { + while (pdepth > depth) { + do { + p--; + } while (buf[p-1] != '/'); + pdepth--; + } + + if (pdepth >= depth) { + name = fdt_get_name(fdt, offset, &namelen); + if (!name) + return namelen; + if ((p + namelen + 1) <= buflen) { + memcpy(buf + p, name, namelen); + p += namelen; + buf[p++] = '/'; + pdepth++; + } + } + + if (offset == nodeoffset) { + if (pdepth < (depth + 1)) + return -FDT_ERR_NOSPACE; + + if (p > 1) /* special case so that root path is "/", not "" */ + p--; + buf[p] = '\0'; + return 0; + } + } + + if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0)) + return -FDT_ERR_BADOFFSET; + else if (offset == -FDT_ERR_BADOFFSET) + return -FDT_ERR_BADSTRUCTURE; + + return offset; /* error from fdt_next_node() */ +} + +int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset, + int supernodedepth, int *nodedepth) +{ + int offset, depth; + int supernodeoffset = -FDT_ERR_INTERNAL; + + FDT_RO_PROBE(fdt); + + if (supernodedepth < 0) + return -FDT_ERR_NOTFOUND; + + for (offset = 0, depth = 0; + (offset >= 0) && (offset <= nodeoffset); + offset = fdt_next_node(fdt, offset, &depth)) { + if (depth == supernodedepth) + supernodeoffset = offset; + + if (offset == nodeoffset) { + if (nodedepth) + *nodedepth = depth; + + if (supernodedepth > depth) + return -FDT_ERR_NOTFOUND; + else + return supernodeoffset; + } + } + + if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0)) + return -FDT_ERR_BADOFFSET; + else if (offset == -FDT_ERR_BADOFFSET) + return -FDT_ERR_BADSTRUCTURE; + + return offset; /* error from fdt_next_node() */ +} + +int fdt_node_depth(const void *fdt, int nodeoffset) +{ + int nodedepth; + int err; + + err = fdt_supernode_atdepth_offset(fdt, nodeoffset, 0, &nodedepth); + if (err) + return (err < 0) ? err : -FDT_ERR_INTERNAL; + return nodedepth; +} + +int fdt_parent_offset(const void *fdt, int nodeoffset) +{ + int nodedepth = fdt_node_depth(fdt, nodeoffset); + + if (nodedepth < 0) + return nodedepth; + return fdt_supernode_atdepth_offset(fdt, nodeoffset, + nodedepth - 1, NULL); +} + +int fdt_node_offset_by_prop_value(const void *fdt, int startoffset, + const char *propname, + const void *propval, int proplen) +{ + int offset; + const void *val; + int len; + + FDT_RO_PROBE(fdt); + + /* FIXME: The algorithm here is pretty horrible: we scan each + * property of a node in fdt_getprop(), then if that didn't + * find what we want, we scan over them again making our way + * to the next node. Still it's the easiest to implement + * approach; performance can come later. */ + for (offset = fdt_next_node(fdt, startoffset, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + val = fdt_getprop(fdt, offset, propname, &len); + if (val && (len == proplen) + && (memcmp(val, propval, len) == 0)) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} + +int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle) +{ + int offset; + + if ((phandle == 0) || (phandle == -1)) + return -FDT_ERR_BADPHANDLE; + + FDT_RO_PROBE(fdt); + + /* FIXME: The algorithm here is pretty horrible: we + * potentially scan each property of a node in + * fdt_get_phandle(), then if that didn't find what + * we want, we scan over them again making our way to the next + * node. Still it's the easiest to implement approach; + * performance can come later. */ + for (offset = fdt_next_node(fdt, -1, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + if (fdt_get_phandle(fdt, offset) == phandle) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} + +int fdt_stringlist_contains(const char *strlist, int listlen, const char *str) +{ + int len = strlen(str); + const char *p; + + while (listlen >= len) { + if (memcmp(str, strlist, len+1) == 0) + return 1; + p = memchr(strlist, '\0', listlen); + if (!p) + return 0; /* malformed strlist.. */ + listlen -= (p-strlist) + 1; + strlist = p + 1; + } + return 0; +} + +int fdt_stringlist_count(const void *fdt, int nodeoffset, const char *property) +{ + const char *list, *end; + int length, count = 0; + + list = fdt_getprop(fdt, nodeoffset, property, &length); + if (!list) + return length; + + end = list + length; + + while (list < end) { + length = strnlen(list, end - list) + 1; + + /* Abort if the last string isn't properly NUL-terminated. */ + if (list + length > end) + return -FDT_ERR_BADVALUE; + + list += length; + count++; + } + + return count; +} + +int fdt_stringlist_search(const void *fdt, int nodeoffset, const char *property, + const char *string) +{ + int length, len, idx = 0; + const char *list, *end; + + list = fdt_getprop(fdt, nodeoffset, property, &length); + if (!list) + return length; + + len = strlen(string) + 1; + end = list + length; + + while (list < end) { + length = strnlen(list, end - list) + 1; + + /* Abort if the last string isn't properly NUL-terminated. */ + if (list + length > end) + return -FDT_ERR_BADVALUE; + + if (length == len && memcmp(list, string, length) == 0) + return idx; + + list += length; + idx++; + } + + return -FDT_ERR_NOTFOUND; +} + +const char *fdt_stringlist_get(const void *fdt, int nodeoffset, + const char *property, int idx, + int *lenp) +{ + const char *list, *end; + int length; + + list = fdt_getprop(fdt, nodeoffset, property, &length); + if (!list) { + if (lenp) + *lenp = length; + + return NULL; + } + + end = list + length; + + while (list < end) { + length = strnlen(list, end - list) + 1; + + /* Abort if the last string isn't properly NUL-terminated. */ + if (list + length > end) { + if (lenp) + *lenp = -FDT_ERR_BADVALUE; + + return NULL; + } + + if (idx == 0) { + if (lenp) + *lenp = length - 1; + + return list; + } + + list += length; + idx--; + } + + if (lenp) + *lenp = -FDT_ERR_NOTFOUND; + + return NULL; +} + +int fdt_node_check_compatible(const void *fdt, int nodeoffset, + const char *compatible) +{ + const void *prop; + int len; + + prop = fdt_getprop(fdt, nodeoffset, "compatible", &len); + if (!prop) + return len; + + return !fdt_stringlist_contains(prop, len, compatible); +} + +int fdt_node_offset_by_compatible(const void *fdt, int startoffset, + const char *compatible) +{ + int offset, err; + + FDT_RO_PROBE(fdt); + + /* FIXME: The algorithm here is pretty horrible: we scan each + * property of a node in fdt_node_check_compatible(), then if + * that didn't find what we want, we scan over them again + * making our way to the next node. Still it's the easiest to + * implement approach; performance can come later. */ + for (offset = fdt_next_node(fdt, startoffset, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + err = fdt_node_check_compatible(fdt, offset, compatible); + if ((err < 0) && (err != -FDT_ERR_NOTFOUND)) + return err; + else if (err == 0) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} + +int fdt_check_full(const void *fdt, size_t bufsize) +{ + int err; + int num_memrsv; + int offset, nextoffset = 0; + uint32_t tag; + unsigned depth = 0; + const void *prop; + const char *propname; + + if (bufsize < FDT_V1_SIZE) + return -FDT_ERR_TRUNCATED; + err = fdt_check_header(fdt); + if (err != 0) + return err; + if (bufsize < fdt_totalsize(fdt)) + return -FDT_ERR_TRUNCATED; + + num_memrsv = fdt_num_mem_rsv(fdt); + if (num_memrsv < 0) + return num_memrsv; + + while (1) { + offset = nextoffset; + tag = fdt_next_tag(fdt, offset, &nextoffset); + + if (nextoffset < 0) + return nextoffset; + + switch (tag) { + case FDT_NOP: + break; + + case FDT_END: + if (depth != 0) + return -FDT_ERR_BADSTRUCTURE; + return 0; + + case FDT_BEGIN_NODE: + depth++; + if (depth > INT_MAX) + return -FDT_ERR_BADSTRUCTURE; + break; + + case FDT_END_NODE: + if (depth == 0) + return -FDT_ERR_BADSTRUCTURE; + depth--; + break; + + case FDT_PROP: + prop = fdt_getprop_by_offset(fdt, offset, &propname, + &err); + if (!prop) + return err; + break; + + default: + return -FDT_ERR_INTERNAL; + } + } +} diff --git a/fdt/fdt_rw.c b/fdt/fdt_rw.c new file mode 100644 index 0000000000..8795947c00 --- /dev/null +++ b/fdt/fdt_rw.c @@ -0,0 +1,476 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +static int fdt_blocks_misordered_(const void *fdt, + int mem_rsv_size, int struct_size) +{ + return (fdt_off_mem_rsvmap(fdt) < FDT_ALIGN(sizeof(struct fdt_header), 8)) + || (fdt_off_dt_struct(fdt) < + (fdt_off_mem_rsvmap(fdt) + mem_rsv_size)) + || (fdt_off_dt_strings(fdt) < + (fdt_off_dt_struct(fdt) + struct_size)) + || (fdt_totalsize(fdt) < + (fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt))); +} + +static int fdt_rw_probe_(void *fdt) +{ + FDT_RO_PROBE(fdt); + + if (fdt_version(fdt) < 17) + return -FDT_ERR_BADVERSION; + if (fdt_blocks_misordered_(fdt, sizeof(struct fdt_reserve_entry), + fdt_size_dt_struct(fdt))) + return -FDT_ERR_BADLAYOUT; + if (fdt_version(fdt) > 17) + fdt_set_version(fdt, 17); + + return 0; +} + +#define FDT_RW_PROBE(fdt) \ + { \ + int err_; \ + if ((err_ = fdt_rw_probe_(fdt)) != 0) \ + return err_; \ + } + +static inline int fdt_data_size_(void *fdt) +{ + return fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt); +} + +static int fdt_splice_(void *fdt, void *splicepoint, int oldlen, int newlen) +{ + char *p = splicepoint; + char *end = (char *)fdt + fdt_data_size_(fdt); + + if (((p + oldlen) < p) || ((p + oldlen) > end)) + return -FDT_ERR_BADOFFSET; + if ((p < (char *)fdt) || ((end - oldlen + newlen) < (char *)fdt)) + return -FDT_ERR_BADOFFSET; + if ((end - oldlen + newlen) > ((char *)fdt + fdt_totalsize(fdt))) + return -FDT_ERR_NOSPACE; + memmove(p + newlen, p + oldlen, end - p - oldlen); + return 0; +} + +static int fdt_splice_mem_rsv_(void *fdt, struct fdt_reserve_entry *p, + int oldn, int newn) +{ + int delta = (newn - oldn) * sizeof(*p); + int err; + err = fdt_splice_(fdt, p, oldn * sizeof(*p), newn * sizeof(*p)); + if (err) + return err; + fdt_set_off_dt_struct(fdt, fdt_off_dt_struct(fdt) + delta); + fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta); + return 0; +} + +static int fdt_splice_struct_(void *fdt, void *p, + int oldlen, int newlen) +{ + int delta = newlen - oldlen; + int err; + + if ((err = fdt_splice_(fdt, p, oldlen, newlen))) + return err; + + fdt_set_size_dt_struct(fdt, fdt_size_dt_struct(fdt) + delta); + fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta); + return 0; +} + +/* Must only be used to roll back in case of error */ +static void fdt_del_last_string_(void *fdt, const char *s) +{ + int newlen = strlen(s) + 1; + + fdt_set_size_dt_strings(fdt, fdt_size_dt_strings(fdt) - newlen); +} + +static int fdt_splice_string_(void *fdt, int newlen) +{ + void *p = (char *)fdt + + fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt); + int err; + + if ((err = fdt_splice_(fdt, p, 0, newlen))) + return err; + + fdt_set_size_dt_strings(fdt, fdt_size_dt_strings(fdt) + newlen); + return 0; +} + +static int fdt_find_add_string_(void *fdt, const char *s, int *allocated) +{ + char *strtab = (char *)fdt + fdt_off_dt_strings(fdt); + const char *p; + char *new; + int len = strlen(s) + 1; + int err; + + *allocated = 0; + + p = fdt_find_string_(strtab, fdt_size_dt_strings(fdt), s); + if (p) + /* found it */ + return (p - strtab); + + new = strtab + fdt_size_dt_strings(fdt); + err = fdt_splice_string_(fdt, len); + if (err) + return err; + + *allocated = 1; + + memcpy(new, s, len); + return (new - strtab); +} + +int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size) +{ + struct fdt_reserve_entry *re; + int err; + + FDT_RW_PROBE(fdt); + + re = fdt_mem_rsv_w_(fdt, fdt_num_mem_rsv(fdt)); + err = fdt_splice_mem_rsv_(fdt, re, 0, 1); + if (err) + return err; + + re->address = cpu_to_fdt64(address); + re->size = cpu_to_fdt64(size); + return 0; +} + +int fdt_del_mem_rsv(void *fdt, int n) +{ + struct fdt_reserve_entry *re = fdt_mem_rsv_w_(fdt, n); + + FDT_RW_PROBE(fdt); + + if (n >= fdt_num_mem_rsv(fdt)) + return -FDT_ERR_NOTFOUND; + + return fdt_splice_mem_rsv_(fdt, re, 1, 0); +} + +static int fdt_resize_property_(void *fdt, int nodeoffset, const char *name, + int len, struct fdt_property **prop) +{ + int oldlen; + int err; + + *prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen); + if (!*prop) + return oldlen; + + if ((err = fdt_splice_struct_(fdt, (*prop)->data, FDT_TAGALIGN(oldlen), + FDT_TAGALIGN(len)))) + return err; + + (*prop)->len = cpu_to_fdt32(len); + return 0; +} + +static int fdt_add_property_(void *fdt, int nodeoffset, const char *name, + int len, struct fdt_property **prop) +{ + int proplen; + int nextoffset; + int namestroff; + int err; + int allocated; + + if ((nextoffset = fdt_check_node_offset_(fdt, nodeoffset)) < 0) + return nextoffset; + + namestroff = fdt_find_add_string_(fdt, name, &allocated); + if (namestroff < 0) + return namestroff; + + *prop = fdt_offset_ptr_w_(fdt, nextoffset); + proplen = sizeof(**prop) + FDT_TAGALIGN(len); + + err = fdt_splice_struct_(fdt, *prop, 0, proplen); + if (err) { + if (allocated) + fdt_del_last_string_(fdt, name); + return err; + } + + (*prop)->tag = cpu_to_fdt32(FDT_PROP); + (*prop)->nameoff = cpu_to_fdt32(namestroff); + (*prop)->len = cpu_to_fdt32(len); + return 0; +} + +int fdt_set_name(void *fdt, int nodeoffset, const char *name) +{ + char *namep; + int oldlen, newlen; + int err; + + FDT_RW_PROBE(fdt); + + namep = (char *)(uintptr_t)fdt_get_name(fdt, nodeoffset, &oldlen); + if (!namep) + return oldlen; + + newlen = strlen(name); + + err = fdt_splice_struct_(fdt, namep, FDT_TAGALIGN(oldlen+1), + FDT_TAGALIGN(newlen+1)); + if (err) + return err; + + memcpy(namep, name, newlen+1); + return 0; +} + +int fdt_setprop_placeholder(void *fdt, int nodeoffset, const char *name, + int len, void **prop_data) +{ + struct fdt_property *prop; + int err; + + FDT_RW_PROBE(fdt); + + err = fdt_resize_property_(fdt, nodeoffset, name, len, &prop); + if (err == -FDT_ERR_NOTFOUND) + err = fdt_add_property_(fdt, nodeoffset, name, len, &prop); + if (err) + return err; + + *prop_data = prop->data; + return 0; +} + +int fdt_setprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + void *prop_data; + int err; + + err = fdt_setprop_placeholder(fdt, nodeoffset, name, len, &prop_data); + if (err) + return err; + + if (len) + memcpy(prop_data, val, len); + return 0; +} + +int fdt_appendprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + struct fdt_property *prop; + int err, oldlen, newlen; + + FDT_RW_PROBE(fdt); + + prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen); + if (prop) { + newlen = len + oldlen; + err = fdt_splice_struct_(fdt, prop->data, + FDT_TAGALIGN(oldlen), + FDT_TAGALIGN(newlen)); + if (err) + return err; + prop->len = cpu_to_fdt32(newlen); + memcpy(prop->data + oldlen, val, len); + } else { + err = fdt_add_property_(fdt, nodeoffset, name, len, &prop); + if (err) + return err; + memcpy(prop->data, val, len); + } + return 0; +} + +int fdt_delprop(void *fdt, int nodeoffset, const char *name) +{ + struct fdt_property *prop; + int len, proplen; + + FDT_RW_PROBE(fdt); + + prop = fdt_get_property_w(fdt, nodeoffset, name, &len); + if (!prop) + return len; + + proplen = sizeof(*prop) + FDT_TAGALIGN(len); + return fdt_splice_struct_(fdt, prop, proplen, 0); +} + +int fdt_add_subnode_namelen(void *fdt, int parentoffset, + const char *name, int namelen) +{ + struct fdt_node_header *nh; + int offset, nextoffset; + int nodelen; + int err; + uint32_t tag; + fdt32_t *endtag; + + FDT_RW_PROBE(fdt); + + offset = fdt_subnode_offset_namelen(fdt, parentoffset, name, namelen); + if (offset >= 0) + return -FDT_ERR_EXISTS; + else if (offset != -FDT_ERR_NOTFOUND) + return offset; + + /* Try to place the new node after the parent's properties */ + fdt_next_tag(fdt, parentoffset, &nextoffset); /* skip the BEGIN_NODE */ + do { + offset = nextoffset; + tag = fdt_next_tag(fdt, offset, &nextoffset); + } while ((tag == FDT_PROP) || (tag == FDT_NOP)); + + nh = fdt_offset_ptr_w_(fdt, offset); + nodelen = sizeof(*nh) + FDT_TAGALIGN(namelen+1) + FDT_TAGSIZE; + + err = fdt_splice_struct_(fdt, nh, 0, nodelen); + if (err) + return err; + + nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE); + memset(nh->name, 0, FDT_TAGALIGN(namelen+1)); + memcpy(nh->name, name, namelen); + endtag = (fdt32_t *)((char *)nh + nodelen - FDT_TAGSIZE); + *endtag = cpu_to_fdt32(FDT_END_NODE); + + return offset; +} + +int fdt_add_subnode(void *fdt, int parentoffset, const char *name) +{ + return fdt_add_subnode_namelen(fdt, parentoffset, name, strlen(name)); +} + +int fdt_del_node(void *fdt, int nodeoffset) +{ + int endoffset; + + FDT_RW_PROBE(fdt); + + endoffset = fdt_node_end_offset_(fdt, nodeoffset); + if (endoffset < 0) + return endoffset; + + return fdt_splice_struct_(fdt, fdt_offset_ptr_w_(fdt, nodeoffset), + endoffset - nodeoffset, 0); +} + +static void fdt_packblocks_(const char *old, char *new, + int mem_rsv_size, int struct_size) +{ + int mem_rsv_off, struct_off, strings_off; + + mem_rsv_off = FDT_ALIGN(sizeof(struct fdt_header), 8); + struct_off = mem_rsv_off + mem_rsv_size; + strings_off = struct_off + struct_size; + + memmove(new + mem_rsv_off, old + fdt_off_mem_rsvmap(old), mem_rsv_size); + fdt_set_off_mem_rsvmap(new, mem_rsv_off); + + memmove(new + struct_off, old + fdt_off_dt_struct(old), struct_size); + fdt_set_off_dt_struct(new, struct_off); + fdt_set_size_dt_struct(new, struct_size); + + memmove(new + strings_off, old + fdt_off_dt_strings(old), + fdt_size_dt_strings(old)); + fdt_set_off_dt_strings(new, strings_off); + fdt_set_size_dt_strings(new, fdt_size_dt_strings(old)); +} + +int fdt_open_into(const void *fdt, void *buf, int bufsize) +{ + int err; + int mem_rsv_size, struct_size; + int newsize; + const char *fdtstart = fdt; + const char *fdtend = fdtstart + fdt_totalsize(fdt); + char *tmp; + + FDT_RO_PROBE(fdt); + + mem_rsv_size = (fdt_num_mem_rsv(fdt)+1) + * sizeof(struct fdt_reserve_entry); + + if (fdt_version(fdt) >= 17) { + struct_size = fdt_size_dt_struct(fdt); + } else { + struct_size = 0; + while (fdt_next_tag(fdt, struct_size, &struct_size) != FDT_END) + ; + if (struct_size < 0) + return struct_size; + } + + if (!fdt_blocks_misordered_(fdt, mem_rsv_size, struct_size)) { + /* no further work necessary */ + err = fdt_move(fdt, buf, bufsize); + if (err) + return err; + fdt_set_version(buf, 17); + fdt_set_size_dt_struct(buf, struct_size); + fdt_set_totalsize(buf, bufsize); + return 0; + } + + /* Need to reorder */ + newsize = FDT_ALIGN(sizeof(struct fdt_header), 8) + mem_rsv_size + + struct_size + fdt_size_dt_strings(fdt); + + if (bufsize < newsize) + return -FDT_ERR_NOSPACE; + + /* First attempt to build converted tree at beginning of buffer */ + tmp = buf; + /* But if that overlaps with the old tree... */ + if (((tmp + newsize) > fdtstart) && (tmp < fdtend)) { + /* Try right after the old tree instead */ + tmp = (char *)(uintptr_t)fdtend; + if ((tmp + newsize) > ((char *)buf + bufsize)) + return -FDT_ERR_NOSPACE; + } + + fdt_packblocks_(fdt, tmp, mem_rsv_size, struct_size); + memmove(buf, tmp, newsize); + + fdt_set_magic(buf, FDT_MAGIC); + fdt_set_totalsize(buf, bufsize); + fdt_set_version(buf, 17); + fdt_set_last_comp_version(buf, 16); + fdt_set_boot_cpuid_phys(buf, fdt_boot_cpuid_phys(fdt)); + + return 0; +} + +int fdt_pack(void *fdt) +{ + int mem_rsv_size; + + FDT_RW_PROBE(fdt); + + mem_rsv_size = (fdt_num_mem_rsv(fdt)+1) + * sizeof(struct fdt_reserve_entry); + fdt_packblocks_(fdt, fdt, mem_rsv_size, fdt_size_dt_struct(fdt)); + fdt_set_totalsize(fdt, fdt_data_size_(fdt)); + + return 0; +} diff --git a/fdt/fdt_strerror.c b/fdt/fdt_strerror.c new file mode 100644 index 0000000000..768db66ead --- /dev/null +++ b/fdt/fdt_strerror.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +struct fdt_errtabent { + const char *str; +}; + +#define FDT_ERRTABENT(val) \ + [(val)] = { .str = #val, } + +static struct fdt_errtabent fdt_errtable[] = { + FDT_ERRTABENT(FDT_ERR_NOTFOUND), + FDT_ERRTABENT(FDT_ERR_EXISTS), + FDT_ERRTABENT(FDT_ERR_NOSPACE), + + FDT_ERRTABENT(FDT_ERR_BADOFFSET), + FDT_ERRTABENT(FDT_ERR_BADPATH), + FDT_ERRTABENT(FDT_ERR_BADPHANDLE), + FDT_ERRTABENT(FDT_ERR_BADSTATE), + + FDT_ERRTABENT(FDT_ERR_TRUNCATED), + FDT_ERRTABENT(FDT_ERR_BADMAGIC), + FDT_ERRTABENT(FDT_ERR_BADVERSION), + FDT_ERRTABENT(FDT_ERR_BADSTRUCTURE), + FDT_ERRTABENT(FDT_ERR_BADLAYOUT), + FDT_ERRTABENT(FDT_ERR_INTERNAL), + FDT_ERRTABENT(FDT_ERR_BADNCELLS), + FDT_ERRTABENT(FDT_ERR_BADVALUE), + FDT_ERRTABENT(FDT_ERR_BADOVERLAY), + FDT_ERRTABENT(FDT_ERR_NOPHANDLES), + FDT_ERRTABENT(FDT_ERR_BADFLAGS), +}; +#define FDT_ERRTABSIZE (sizeof(fdt_errtable) / sizeof(fdt_errtable[0])) + +const char *fdt_strerror(int errval) +{ + if (errval > 0) + return ""; + else if (errval == 0) + return ""; + else if (errval > -FDT_ERRTABSIZE) { + const char *s = fdt_errtable[-errval].str; + + if (s) + return s; + } + + return ""; +} diff --git a/fdt/fdt_sw.c b/fdt/fdt_sw.c new file mode 100644 index 0000000000..76bea22f73 --- /dev/null +++ b/fdt/fdt_sw.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +static int fdt_sw_probe_(void *fdt) +{ + if (fdt_magic(fdt) == FDT_MAGIC) + return -FDT_ERR_BADSTATE; + else if (fdt_magic(fdt) != FDT_SW_MAGIC) + return -FDT_ERR_BADMAGIC; + return 0; +} + +#define FDT_SW_PROBE(fdt) \ + { \ + int err; \ + if ((err = fdt_sw_probe_(fdt)) != 0) \ + return err; \ + } + +/* 'memrsv' state: Initial state after fdt_create() + * + * Allowed functions: + * fdt_add_reservmap_entry() + * fdt_finish_reservemap() [moves to 'struct' state] + */ +static int fdt_sw_probe_memrsv_(void *fdt) +{ + int err = fdt_sw_probe_(fdt); + if (err) + return err; + + if (fdt_off_dt_strings(fdt) != 0) + return -FDT_ERR_BADSTATE; + return 0; +} + +#define FDT_SW_PROBE_MEMRSV(fdt) \ + { \ + int err; \ + if ((err = fdt_sw_probe_memrsv_(fdt)) != 0) \ + return err; \ + } + +/* 'struct' state: Enter this state after fdt_finish_reservemap() + * + * Allowed functions: + * fdt_begin_node() + * fdt_end_node() + * fdt_property*() + * fdt_finish() [moves to 'complete' state] + */ +static int fdt_sw_probe_struct_(void *fdt) +{ + int err = fdt_sw_probe_(fdt); + if (err) + return err; + + if (fdt_off_dt_strings(fdt) != fdt_totalsize(fdt)) + return -FDT_ERR_BADSTATE; + return 0; +} + +#define FDT_SW_PROBE_STRUCT(fdt) \ + { \ + int err; \ + if ((err = fdt_sw_probe_struct_(fdt)) != 0) \ + return err; \ + } + +static inline uint32_t sw_flags(void *fdt) +{ + /* assert: (fdt_magic(fdt) == FDT_SW_MAGIC) */ + return fdt_last_comp_version(fdt); +} + +/* 'complete' state: Enter this state after fdt_finish() + * + * Allowed functions: none + */ + +static void *fdt_grab_space_(void *fdt, size_t len) +{ + int offset = fdt_size_dt_struct(fdt); + int spaceleft; + + spaceleft = fdt_totalsize(fdt) - fdt_off_dt_struct(fdt) + - fdt_size_dt_strings(fdt); + + if ((offset + len < offset) || (offset + len > spaceleft)) + return NULL; + + fdt_set_size_dt_struct(fdt, offset + len); + return fdt_offset_ptr_w_(fdt, offset); +} + +int fdt_create_with_flags(void *buf, int bufsize, uint32_t flags) +{ + const size_t hdrsize = FDT_ALIGN(sizeof(struct fdt_header), + sizeof(struct fdt_reserve_entry)); + void *fdt = buf; + + if (bufsize < hdrsize) + return -FDT_ERR_NOSPACE; + + if (flags & ~FDT_CREATE_FLAGS_ALL) + return -FDT_ERR_BADFLAGS; + + memset(buf, 0, bufsize); + + /* + * magic and last_comp_version keep intermediate state during the fdt + * creation process, which is replaced with the proper FDT format by + * fdt_finish(). + * + * flags should be accessed with sw_flags(). + */ + fdt_set_magic(fdt, FDT_SW_MAGIC); + fdt_set_version(fdt, FDT_LAST_SUPPORTED_VERSION); + fdt_set_last_comp_version(fdt, flags); + + fdt_set_totalsize(fdt, bufsize); + + fdt_set_off_mem_rsvmap(fdt, hdrsize); + fdt_set_off_dt_struct(fdt, fdt_off_mem_rsvmap(fdt)); + fdt_set_off_dt_strings(fdt, 0); + + return 0; +} + +int fdt_create(void *buf, int bufsize) +{ + return fdt_create_with_flags(buf, bufsize, 0); +} + +int fdt_resize(void *fdt, void *buf, int bufsize) +{ + size_t headsize, tailsize; + char *oldtail, *newtail; + + FDT_SW_PROBE(fdt); + + headsize = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt); + tailsize = fdt_size_dt_strings(fdt); + + if ((headsize + tailsize) > fdt_totalsize(fdt)) + return -FDT_ERR_INTERNAL; + + if ((headsize + tailsize) > bufsize) + return -FDT_ERR_NOSPACE; + + oldtail = (char *)fdt + fdt_totalsize(fdt) - tailsize; + newtail = (char *)buf + bufsize - tailsize; + + /* Two cases to avoid clobbering data if the old and new + * buffers partially overlap */ + if (buf <= fdt) { + memmove(buf, fdt, headsize); + memmove(newtail, oldtail, tailsize); + } else { + memmove(newtail, oldtail, tailsize); + memmove(buf, fdt, headsize); + } + + fdt_set_totalsize(buf, bufsize); + if (fdt_off_dt_strings(buf)) + fdt_set_off_dt_strings(buf, bufsize); + + return 0; +} + +int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size) +{ + struct fdt_reserve_entry *re; + int offset; + + FDT_SW_PROBE_MEMRSV(fdt); + + offset = fdt_off_dt_struct(fdt); + if ((offset + sizeof(*re)) > fdt_totalsize(fdt)) + return -FDT_ERR_NOSPACE; + + re = (struct fdt_reserve_entry *)((char *)fdt + offset); + re->address = cpu_to_fdt64(addr); + re->size = cpu_to_fdt64(size); + + fdt_set_off_dt_struct(fdt, offset + sizeof(*re)); + + return 0; +} + +int fdt_finish_reservemap(void *fdt) +{ + int err = fdt_add_reservemap_entry(fdt, 0, 0); + + if (err) + return err; + + fdt_set_off_dt_strings(fdt, fdt_totalsize(fdt)); + return 0; +} + +int fdt_begin_node(void *fdt, const char *name) +{ + struct fdt_node_header *nh; + int namelen; + + FDT_SW_PROBE_STRUCT(fdt); + + namelen = strlen(name) + 1; + nh = fdt_grab_space_(fdt, sizeof(*nh) + FDT_TAGALIGN(namelen)); + if (! nh) + return -FDT_ERR_NOSPACE; + + nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE); + memcpy(nh->name, name, namelen); + return 0; +} + +int fdt_end_node(void *fdt) +{ + fdt32_t *en; + + FDT_SW_PROBE_STRUCT(fdt); + + en = fdt_grab_space_(fdt, FDT_TAGSIZE); + if (! en) + return -FDT_ERR_NOSPACE; + + *en = cpu_to_fdt32(FDT_END_NODE); + return 0; +} + +static int fdt_add_string_(void *fdt, const char *s) +{ + char *strtab = (char *)fdt + fdt_totalsize(fdt); + int strtabsize = fdt_size_dt_strings(fdt); + int len = strlen(s) + 1; + int struct_top, offset; + + offset = -strtabsize - len; + struct_top = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt); + if (fdt_totalsize(fdt) + offset < struct_top) + return 0; /* no more room :( */ + + memcpy(strtab + offset, s, len); + fdt_set_size_dt_strings(fdt, strtabsize + len); + return offset; +} + +/* Must only be used to roll back in case of error */ +static void fdt_del_last_string_(void *fdt, const char *s) +{ + int strtabsize = fdt_size_dt_strings(fdt); + int len = strlen(s) + 1; + + fdt_set_size_dt_strings(fdt, strtabsize - len); +} + +static int fdt_find_add_string_(void *fdt, const char *s, int *allocated) +{ + char *strtab = (char *)fdt + fdt_totalsize(fdt); + int strtabsize = fdt_size_dt_strings(fdt); + const char *p; + + *allocated = 0; + + p = fdt_find_string_(strtab - strtabsize, strtabsize, s); + if (p) + return p - strtab; + + *allocated = 1; + + return fdt_add_string_(fdt, s); +} + +int fdt_property_placeholder(void *fdt, const char *name, int len, void **valp) +{ + struct fdt_property *prop; + int nameoff; + int allocated; + + FDT_SW_PROBE_STRUCT(fdt); + + /* String de-duplication can be slow, _NO_NAME_DEDUP skips it */ + if (sw_flags(fdt) & FDT_CREATE_FLAG_NO_NAME_DEDUP) { + allocated = 1; + nameoff = fdt_add_string_(fdt, name); + } else { + nameoff = fdt_find_add_string_(fdt, name, &allocated); + } + if (nameoff == 0) + return -FDT_ERR_NOSPACE; + + prop = fdt_grab_space_(fdt, sizeof(*prop) + FDT_TAGALIGN(len)); + if (! prop) { + if (allocated) + fdt_del_last_string_(fdt, name); + return -FDT_ERR_NOSPACE; + } + + prop->tag = cpu_to_fdt32(FDT_PROP); + prop->nameoff = cpu_to_fdt32(nameoff); + prop->len = cpu_to_fdt32(len); + *valp = prop->data; + return 0; +} + +int fdt_property(void *fdt, const char *name, const void *val, int len) +{ + void *ptr; + int ret; + + ret = fdt_property_placeholder(fdt, name, len, &ptr); + if (ret) + return ret; + memcpy(ptr, val, len); + return 0; +} + +int fdt_finish(void *fdt) +{ + char *p = (char *)fdt; + fdt32_t *end; + int oldstroffset, newstroffset; + uint32_t tag; + int offset, nextoffset; + + FDT_SW_PROBE_STRUCT(fdt); + + /* Add terminator */ + end = fdt_grab_space_(fdt, sizeof(*end)); + if (! end) + return -FDT_ERR_NOSPACE; + *end = cpu_to_fdt32(FDT_END); + + /* Relocate the string table */ + oldstroffset = fdt_totalsize(fdt) - fdt_size_dt_strings(fdt); + newstroffset = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt); + memmove(p + newstroffset, p + oldstroffset, fdt_size_dt_strings(fdt)); + fdt_set_off_dt_strings(fdt, newstroffset); + + /* Walk the structure, correcting string offsets */ + offset = 0; + while ((tag = fdt_next_tag(fdt, offset, &nextoffset)) != FDT_END) { + if (tag == FDT_PROP) { + struct fdt_property *prop = + fdt_offset_ptr_w_(fdt, offset); + int nameoff; + + nameoff = fdt32_to_cpu(prop->nameoff); + nameoff += fdt_size_dt_strings(fdt); + prop->nameoff = cpu_to_fdt32(nameoff); + } + offset = nextoffset; + } + if (nextoffset < 0) + return nextoffset; + + /* Finally, adjust the header */ + fdt_set_totalsize(fdt, newstroffset + fdt_size_dt_strings(fdt)); + + /* And fix up fields that were keeping intermediate state. */ + fdt_set_last_comp_version(fdt, FDT_FIRST_SUPPORTED_VERSION); + fdt_set_magic(fdt, FDT_MAGIC); + + return 0; +} diff --git a/fdt/fdt_wip.c b/fdt/fdt_wip.c new file mode 100644 index 0000000000..f64139e0b3 --- /dev/null +++ b/fdt/fdt_wip.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +int fdt_setprop_inplace_namelen_partial(void *fdt, int nodeoffset, + const char *name, int namelen, + uint32_t idx, const void *val, + int len) +{ + void *propval; + int proplen; + + propval = fdt_getprop_namelen_w(fdt, nodeoffset, name, namelen, + &proplen); + if (!propval) + return proplen; + + if (proplen < (len + idx)) + return -FDT_ERR_NOSPACE; + + memcpy((char *)propval + idx, val, len); + return 0; +} + +int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + const void *propval; + int proplen; + + propval = fdt_getprop(fdt, nodeoffset, name, &proplen); + if (!propval) + return proplen; + + if (proplen != len) + return -FDT_ERR_NOSPACE; + + return fdt_setprop_inplace_namelen_partial(fdt, nodeoffset, name, + strlen(name), 0, + val, len); +} + +static void fdt_nop_region_(void *start, int len) +{ + fdt32_t *p; + + for (p = start; (char *)p < ((char *)start + len); p++) + *p = cpu_to_fdt32(FDT_NOP); +} + +int fdt_nop_property(void *fdt, int nodeoffset, const char *name) +{ + struct fdt_property *prop; + int len; + + prop = fdt_get_property_w(fdt, nodeoffset, name, &len); + if (!prop) + return len; + + fdt_nop_region_(prop, len + sizeof(*prop)); + + return 0; +} + +int fdt_node_end_offset_(void *fdt, int offset) +{ + int depth = 0; + + while ((offset >= 0) && (depth >= 0)) + offset = fdt_next_node(fdt, offset, &depth); + + return offset; +} + +int fdt_nop_node(void *fdt, int nodeoffset) +{ + int endoffset; + + endoffset = fdt_node_end_offset_(fdt, nodeoffset); + if (endoffset < 0) + return endoffset; + + fdt_nop_region_(fdt_offset_ptr_w(fdt, nodeoffset, 0), + endoffset - nodeoffset); + return 0; +} diff --git a/fdt/libfdt.h b/fdt/libfdt.h new file mode 100644 index 0000000000..d2356cce43 --- /dev/null +++ b/fdt/libfdt.h @@ -0,0 +1,2077 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ +#ifndef LIBFDT_H +#define LIBFDT_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ + +#include +#include + +#define FDT_FIRST_SUPPORTED_VERSION 0x02 +#define FDT_LAST_SUPPORTED_VERSION 0x11 + +/* Error codes: informative error codes */ +#define FDT_ERR_NOTFOUND 1 + /* FDT_ERR_NOTFOUND: The requested node or property does not exist */ +#define FDT_ERR_EXISTS 2 + /* FDT_ERR_EXISTS: Attempted to create a node or property which + * already exists */ +#define FDT_ERR_NOSPACE 3 + /* FDT_ERR_NOSPACE: Operation needed to expand the device + * tree, but its buffer did not have sufficient space to + * contain the expanded tree. Use fdt_open_into() to move the + * device tree to a buffer with more space. */ + +/* Error codes: codes for bad parameters */ +#define FDT_ERR_BADOFFSET 4 + /* FDT_ERR_BADOFFSET: Function was passed a structure block + * offset which is out-of-bounds, or which points to an + * unsuitable part of the structure for the operation. */ +#define FDT_ERR_BADPATH 5 + /* FDT_ERR_BADPATH: Function was passed a badly formatted path + * (e.g. missing a leading / for a function which requires an + * absolute path) */ +#define FDT_ERR_BADPHANDLE 6 + /* FDT_ERR_BADPHANDLE: Function was passed an invalid phandle. + * This can be caused either by an invalid phandle property + * length, or the phandle value was either 0 or -1, which are + * not permitted. */ +#define FDT_ERR_BADSTATE 7 + /* FDT_ERR_BADSTATE: Function was passed an incomplete device + * tree created by the sequential-write functions, which is + * not sufficiently complete for the requested operation. */ + +/* Error codes: codes for bad device tree blobs */ +#define FDT_ERR_TRUNCATED 8 + /* FDT_ERR_TRUNCATED: FDT or a sub-block is improperly + * terminated (overflows, goes outside allowed bounds, or + * isn't properly terminated). */ +#define FDT_ERR_BADMAGIC 9 + /* FDT_ERR_BADMAGIC: Given "device tree" appears not to be a + * device tree at all - it is missing the flattened device + * tree magic number. */ +#define FDT_ERR_BADVERSION 10 + /* FDT_ERR_BADVERSION: Given device tree has a version which + * can't be handled by the requested operation. For + * read-write functions, this may mean that fdt_open_into() is + * required to convert the tree to the expected version. */ +#define FDT_ERR_BADSTRUCTURE 11 + /* FDT_ERR_BADSTRUCTURE: Given device tree has a corrupt + * structure block or other serious error (e.g. misnested + * nodes, or subnodes preceding properties). */ +#define FDT_ERR_BADLAYOUT 12 + /* FDT_ERR_BADLAYOUT: For read-write functions, the given + * device tree has it's sub-blocks in an order that the + * function can't handle (memory reserve map, then structure, + * then strings). Use fdt_open_into() to reorganize the tree + * into a form suitable for the read-write operations. */ + +/* "Can't happen" error indicating a bug in libfdt */ +#define FDT_ERR_INTERNAL 13 + /* FDT_ERR_INTERNAL: libfdt has failed an internal assertion. + * Should never be returned, if it is, it indicates a bug in + * libfdt itself. */ + +/* Errors in device tree content */ +#define FDT_ERR_BADNCELLS 14 + /* FDT_ERR_BADNCELLS: Device tree has a #address-cells, #size-cells + * or similar property with a bad format or value */ + +#define FDT_ERR_BADVALUE 15 + /* FDT_ERR_BADVALUE: Device tree has a property with an unexpected + * value. For example: a property expected to contain a string list + * is not NUL-terminated within the length of its value. */ + +#define FDT_ERR_BADOVERLAY 16 + /* FDT_ERR_BADOVERLAY: The device tree overlay, while + * correctly structured, cannot be applied due to some + * unexpected or missing value, property or node. */ + +#define FDT_ERR_NOPHANDLES 17 + /* FDT_ERR_NOPHANDLES: The device tree doesn't have any + * phandle available anymore without causing an overflow */ + +#define FDT_ERR_BADFLAGS 18 + /* FDT_ERR_BADFLAGS: The function was passed a flags field that + * contains invalid flags or an invalid combination of flags. */ + +#define FDT_ERR_MAX 18 + +/* constants */ +#define FDT_MAX_PHANDLE 0xfffffffe + /* Valid values for phandles range from 1 to 2^32-2. */ + +#ifdef __cplusplus +extern "C" { +#endif +/**********************************************************************/ +/* Low-level functions (you probably don't need these) */ +/**********************************************************************/ + +#ifndef SWIG /* This function is not useful in Python */ +const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int checklen); +#endif +static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen) +{ + return (void *)(uintptr_t)fdt_offset_ptr(fdt, offset, checklen); +} + +uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset); + +/* + * Alignment helpers: + * These helpers access words from a device tree blob. They're + * built to work even with unaligned pointers on platforms (ike + * ARM) that don't like unaligned loads and stores + */ + +static inline uint32_t fdt32_ld(const fdt32_t *p) +{ + const uint8_t *bp = (const uint8_t *)p; + + return ((uint32_t)bp[0] << 24) + | ((uint32_t)bp[1] << 16) + | ((uint32_t)bp[2] << 8) + | bp[3]; +} + +static inline void fdt32_st(void *property, uint32_t value) +{ + uint8_t *bp = (uint8_t *)property; + + bp[0] = value >> 24; + bp[1] = (value >> 16) & 0xff; + bp[2] = (value >> 8) & 0xff; + bp[3] = value & 0xff; +} + +static inline uint64_t fdt64_ld(const fdt64_t *p) +{ + const uint8_t *bp = (const uint8_t *)p; + + return ((uint64_t)bp[0] << 56) + | ((uint64_t)bp[1] << 48) + | ((uint64_t)bp[2] << 40) + | ((uint64_t)bp[3] << 32) + | ((uint64_t)bp[4] << 24) + | ((uint64_t)bp[5] << 16) + | ((uint64_t)bp[6] << 8) + | bp[7]; +} + +static inline void fdt64_st(void *property, uint64_t value) +{ + uint8_t *bp = (uint8_t *)property; + + bp[0] = value >> 56; + bp[1] = (value >> 48) & 0xff; + bp[2] = (value >> 40) & 0xff; + bp[3] = (value >> 32) & 0xff; + bp[4] = (value >> 24) & 0xff; + bp[5] = (value >> 16) & 0xff; + bp[6] = (value >> 8) & 0xff; + bp[7] = value & 0xff; +} + +/**********************************************************************/ +/* Traversal functions */ +/**********************************************************************/ + +int fdt_next_node(const void *fdt, int offset, int *depth); + +/** + * fdt_first_subnode() - get offset of first direct subnode + * + * @fdt: FDT blob + * @offset: Offset of node to check + * @return offset of first subnode, or -FDT_ERR_NOTFOUND if there is none + */ +int fdt_first_subnode(const void *fdt, int offset); + +/** + * fdt_next_subnode() - get offset of next direct subnode + * + * After first calling fdt_first_subnode(), call this function repeatedly to + * get direct subnodes of a parent node. + * + * @fdt: FDT blob + * @offset: Offset of previous subnode + * @return offset of next subnode, or -FDT_ERR_NOTFOUND if there are no more + * subnodes + */ +int fdt_next_subnode(const void *fdt, int offset); + +/** + * fdt_for_each_subnode - iterate over all subnodes of a parent + * + * @node: child node (int, lvalue) + * @fdt: FDT blob (const void *) + * @parent: parent node (int) + * + * This is actually a wrapper around a for loop and would be used like so: + * + * fdt_for_each_subnode(node, fdt, parent) { + * Use node + * ... + * } + * + * if ((node < 0) && (node != -FDT_ERR_NOTFOUND)) { + * Error handling + * } + * + * Note that this is implemented as a macro and @node is used as + * iterator in the loop. The parent variable be constant or even a + * literal. + * + */ +#define fdt_for_each_subnode(node, fdt, parent) \ + for (node = fdt_first_subnode(fdt, parent); \ + node >= 0; \ + node = fdt_next_subnode(fdt, node)) + +/**********************************************************************/ +/* General functions */ +/**********************************************************************/ +#define fdt_get_header(fdt, field) \ + (fdt32_ld(&((const struct fdt_header *)(fdt))->field)) +#define fdt_magic(fdt) (fdt_get_header(fdt, magic)) +#define fdt_totalsize(fdt) (fdt_get_header(fdt, totalsize)) +#define fdt_off_dt_struct(fdt) (fdt_get_header(fdt, off_dt_struct)) +#define fdt_off_dt_strings(fdt) (fdt_get_header(fdt, off_dt_strings)) +#define fdt_off_mem_rsvmap(fdt) (fdt_get_header(fdt, off_mem_rsvmap)) +#define fdt_version(fdt) (fdt_get_header(fdt, version)) +#define fdt_last_comp_version(fdt) (fdt_get_header(fdt, last_comp_version)) +#define fdt_boot_cpuid_phys(fdt) (fdt_get_header(fdt, boot_cpuid_phys)) +#define fdt_size_dt_strings(fdt) (fdt_get_header(fdt, size_dt_strings)) +#define fdt_size_dt_struct(fdt) (fdt_get_header(fdt, size_dt_struct)) + +#define fdt_set_hdr_(name) \ + static inline void fdt_set_##name(void *fdt, uint32_t val) \ + { \ + struct fdt_header *fdth = (struct fdt_header *)fdt; \ + fdth->name = cpu_to_fdt32(val); \ + } +fdt_set_hdr_(magic); +fdt_set_hdr_(totalsize); +fdt_set_hdr_(off_dt_struct); +fdt_set_hdr_(off_dt_strings); +fdt_set_hdr_(off_mem_rsvmap); +fdt_set_hdr_(version); +fdt_set_hdr_(last_comp_version); +fdt_set_hdr_(boot_cpuid_phys); +fdt_set_hdr_(size_dt_strings); +fdt_set_hdr_(size_dt_struct); +#undef fdt_set_hdr_ + +/** + * fdt_header_size - return the size of the tree's header + * @fdt: pointer to a flattened device tree + */ +size_t fdt_header_size_(uint32_t version); +static inline size_t fdt_header_size(const void *fdt) +{ + return fdt_header_size_(fdt_version(fdt)); +} + +/** + * fdt_check_header - sanity check a device tree header + + * @fdt: pointer to data which might be a flattened device tree + * + * fdt_check_header() checks that the given buffer contains what + * appears to be a flattened device tree, and that the header contains + * valid information (to the extent that can be determined from the + * header alone). + * + * returns: + * 0, if the buffer appears to contain a valid device tree + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_TRUNCATED, standard meanings, as above + */ +int fdt_check_header(const void *fdt); + +/** + * fdt_move - move a device tree around in memory + * @fdt: pointer to the device tree to move + * @buf: pointer to memory where the device is to be moved + * @bufsize: size of the memory space at buf + * + * fdt_move() relocates, if possible, the device tree blob located at + * fdt to the buffer at buf of size bufsize. The buffer may overlap + * with the existing device tree blob at fdt. Therefore, + * fdt_move(fdt, fdt, fdt_totalsize(fdt)) + * should always succeed. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, bufsize is insufficient to contain the device tree + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_move(const void *fdt, void *buf, int bufsize); + +/**********************************************************************/ +/* Read-only functions */ +/**********************************************************************/ + +int fdt_check_full(const void *fdt, size_t bufsize); + +/** + * fdt_get_string - retrieve a string from the strings block of a device tree + * @fdt: pointer to the device tree blob + * @stroffset: offset of the string within the strings block (native endian) + * @lenp: optional pointer to return the string's length + * + * fdt_get_string() retrieves a pointer to a single string from the + * strings block of the device tree blob at fdt, and optionally also + * returns the string's length in *lenp. + * + * returns: + * a pointer to the string, on success + * NULL, if stroffset is out of bounds, or doesn't point to a valid string + */ +const char *fdt_get_string(const void *fdt, int stroffset, int *lenp); + +/** + * fdt_string - retrieve a string from the strings block of a device tree + * @fdt: pointer to the device tree blob + * @stroffset: offset of the string within the strings block (native endian) + * + * fdt_string() retrieves a pointer to a single string from the + * strings block of the device tree blob at fdt. + * + * returns: + * a pointer to the string, on success + * NULL, if stroffset is out of bounds, or doesn't point to a valid string + */ +const char *fdt_string(const void *fdt, int stroffset); + +/** + * fdt_find_max_phandle - find and return the highest phandle in a tree + * @fdt: pointer to the device tree blob + * @phandle: return location for the highest phandle value found in the tree + * + * fdt_find_max_phandle() finds the highest phandle value in the given device + * tree. The value returned in @phandle is only valid if the function returns + * success. + * + * returns: + * 0 on success or a negative error code on failure + */ +int fdt_find_max_phandle(const void *fdt, uint32_t *phandle); + +/** + * fdt_get_max_phandle - retrieves the highest phandle in a tree + * @fdt: pointer to the device tree blob + * + * fdt_get_max_phandle retrieves the highest phandle in the given + * device tree. This will ignore badly formatted phandles, or phandles + * with a value of 0 or -1. + * + * This function is deprecated in favour of fdt_find_max_phandle(). + * + * returns: + * the highest phandle on success + * 0, if no phandle was found in the device tree + * -1, if an error occurred + */ +static inline uint32_t fdt_get_max_phandle(const void *fdt) +{ + uint32_t phandle; + int err; + + err = fdt_find_max_phandle(fdt, &phandle); + if (err < 0) + return (uint32_t)-1; + + return phandle; +} + +/** + * fdt_generate_phandle - return a new, unused phandle for a device tree blob + * @fdt: pointer to the device tree blob + * @phandle: return location for the new phandle + * + * Walks the device tree blob and looks for the highest phandle value. On + * success, the new, unused phandle value (one higher than the previously + * highest phandle value in the device tree blob) will be returned in the + * @phandle parameter. + * + * Returns: + * 0 on success or a negative error-code on failure + */ +int fdt_generate_phandle(const void *fdt, uint32_t *phandle); + +/** + * fdt_num_mem_rsv - retrieve the number of memory reserve map entries + * @fdt: pointer to the device tree blob + * + * Returns the number of entries in the device tree blob's memory + * reservation map. This does not include the terminating 0,0 entry + * or any other (0,0) entries reserved for expansion. + * + * returns: + * the number of entries + */ +int fdt_num_mem_rsv(const void *fdt); + +/** + * fdt_get_mem_rsv - retrieve one memory reserve map entry + * @fdt: pointer to the device tree blob + * @address, @size: pointers to 64-bit variables + * + * On success, *address and *size will contain the address and size of + * the n-th reserve map entry from the device tree blob, in + * native-endian format. + * + * returns: + * 0, on success + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size); + +/** + * fdt_subnode_offset_namelen - find a subnode based on substring + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * @namelen: number of characters of name to consider + * + * Identical to fdt_subnode_offset(), but only examine the first + * namelen characters of name for matching the subnode name. This is + * useful for finding subnodes based on a portion of a larger string, + * such as a full path. + */ +#ifndef SWIG /* Not available in Python */ +int fdt_subnode_offset_namelen(const void *fdt, int parentoffset, + const char *name, int namelen); +#endif +/** + * fdt_subnode_offset - find a subnode of a given node + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * + * fdt_subnode_offset() finds a subnode of the node at structure block + * offset parentoffset with the given name. name may include a unit + * address, in which case fdt_subnode_offset() will find the subnode + * with that unit address, or the unit address may be omitted, in + * which case fdt_subnode_offset() will find an arbitrary subnode + * whose name excluding unit address matches the given name. + * + * returns: + * structure block offset of the requested subnode (>=0), on success + * -FDT_ERR_NOTFOUND, if the requested subnode does not exist + * -FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE + * tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_subnode_offset(const void *fdt, int parentoffset, const char *name); + +/** + * fdt_path_offset_namelen - find a tree node by its full path + * @fdt: pointer to the device tree blob + * @path: full path of the node to locate + * @namelen: number of characters of path to consider + * + * Identical to fdt_path_offset(), but only consider the first namelen + * characters of path as the path name. + */ +#ifndef SWIG /* Not available in Python */ +int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen); +#endif + +/** + * fdt_path_offset - find a tree node by its full path + * @fdt: pointer to the device tree blob + * @path: full path of the node to locate + * + * fdt_path_offset() finds a node of a given path in the device tree. + * Each path component may omit the unit address portion, but the + * results of this are undefined if any such path component is + * ambiguous (that is if there are multiple nodes at the relevant + * level matching the given component, differentiated only by unit + * address). + * + * returns: + * structure block offset of the node with the requested path (>=0), on + * success + * -FDT_ERR_BADPATH, given path does not begin with '/' or is invalid + * -FDT_ERR_NOTFOUND, if the requested node does not exist + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_path_offset(const void *fdt, const char *path); + +/** + * fdt_get_name - retrieve the name of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of the starting node + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_name() retrieves the name (including unit address) of the + * device tree node at structure block offset nodeoffset. If lenp is + * non-NULL, the length of this name is also returned, in the integer + * pointed to by lenp. + * + * returns: + * pointer to the node's name, on success + * If lenp is non-NULL, *lenp contains the length of that name + * (>=0) + * NULL, on error + * if lenp is non-NULL *lenp contains an error code (<0): + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE + * tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +const char *fdt_get_name(const void *fdt, int nodeoffset, int *lenp); + +/** + * fdt_first_property_offset - find the offset of a node's first property + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of a node + * + * fdt_first_property_offset() finds the first property of the node at + * the given structure block offset. + * + * returns: + * structure block offset of the property (>=0), on success + * -FDT_ERR_NOTFOUND, if the requested node has no properties + * -FDT_ERR_BADOFFSET, if nodeoffset did not point to an FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_first_property_offset(const void *fdt, int nodeoffset); + +/** + * fdt_next_property_offset - step through a node's properties + * @fdt: pointer to the device tree blob + * @offset: structure block offset of a property + * + * fdt_next_property_offset() finds the property immediately after the + * one at the given structure block offset. This will be a property + * of the same node as the given property. + * + * returns: + * structure block offset of the next property (>=0), on success + * -FDT_ERR_NOTFOUND, if the given property is the last in its node + * -FDT_ERR_BADOFFSET, if nodeoffset did not point to an FDT_PROP tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_next_property_offset(const void *fdt, int offset); + +/** + * fdt_for_each_property_offset - iterate over all properties of a node + * + * @property_offset: property offset (int, lvalue) + * @fdt: FDT blob (const void *) + * @node: node offset (int) + * + * This is actually a wrapper around a for loop and would be used like so: + * + * fdt_for_each_property_offset(property, fdt, node) { + * Use property + * ... + * } + * + * if ((property < 0) && (property != -FDT_ERR_NOTFOUND)) { + * Error handling + * } + * + * Note that this is implemented as a macro and property is used as + * iterator in the loop. The node variable can be constant or even a + * literal. + */ +#define fdt_for_each_property_offset(property, fdt, node) \ + for (property = fdt_first_property_offset(fdt, node); \ + property >= 0; \ + property = fdt_next_property_offset(fdt, property)) + +/** + * fdt_get_property_by_offset - retrieve the property at a given offset + * @fdt: pointer to the device tree blob + * @offset: offset of the property to retrieve + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_property_by_offset() retrieves a pointer to the + * fdt_property structure within the device tree blob at the given + * offset. If lenp is non-NULL, the length of the property value is + * also returned, in the integer pointed to by lenp. + * + * Note that this code only works on device tree versions >= 16. fdt_getprop() + * works on all versions. + * + * returns: + * pointer to the structure representing the property + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_PROP tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const struct fdt_property *fdt_get_property_by_offset(const void *fdt, + int offset, + int *lenp); + +/** + * fdt_get_property_namelen - find a property based on substring + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @namelen: number of characters of name to consider + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * Identical to fdt_get_property(), but only examine the first namelen + * characters of name for matching the property name. + */ +#ifndef SWIG /* Not available in Python */ +const struct fdt_property *fdt_get_property_namelen(const void *fdt, + int nodeoffset, + const char *name, + int namelen, int *lenp); +#endif + +/** + * fdt_get_property - find a given property in a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_property() retrieves a pointer to the fdt_property + * structure within the device tree blob corresponding to the property + * named 'name' of the node at offset nodeoffset. If lenp is + * non-NULL, the length of the property value is also returned, in the + * integer pointed to by lenp. + * + * returns: + * pointer to the structure representing the property + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_NOTFOUND, node does not have named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE + * tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const struct fdt_property *fdt_get_property(const void *fdt, int nodeoffset, + const char *name, int *lenp); +static inline struct fdt_property *fdt_get_property_w(void *fdt, int nodeoffset, + const char *name, + int *lenp) +{ + return (struct fdt_property *)(uintptr_t) + fdt_get_property(fdt, nodeoffset, name, lenp); +} + +/** + * fdt_getprop_by_offset - retrieve the value of a property at a given offset + * @fdt: pointer to the device tree blob + * @offset: offset of the property to read + * @namep: pointer to a string variable (will be overwritten) or NULL + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_getprop_by_offset() retrieves a pointer to the value of the + * property at structure block offset 'offset' (this will be a pointer + * to within the device blob itself, not a copy of the value). If + * lenp is non-NULL, the length of the property value is also + * returned, in the integer pointed to by lenp. If namep is non-NULL, + * the property's namne will also be returned in the char * pointed to + * by namep (this will be a pointer to within the device tree's string + * block, not a new copy of the name). + * + * returns: + * pointer to the property's value + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * if namep is non-NULL *namep contiains a pointer to the property + * name. + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_PROP tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#ifndef SWIG /* This function is not useful in Python */ +const void *fdt_getprop_by_offset(const void *fdt, int offset, + const char **namep, int *lenp); +#endif + +/** + * fdt_getprop_namelen - get property value based on substring + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @namelen: number of characters of name to consider + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * Identical to fdt_getprop(), but only examine the first namelen + * characters of name for matching the property name. + */ +#ifndef SWIG /* Not available in Python */ +const void *fdt_getprop_namelen(const void *fdt, int nodeoffset, + const char *name, int namelen, int *lenp); +static inline void *fdt_getprop_namelen_w(void *fdt, int nodeoffset, + const char *name, int namelen, + int *lenp) +{ + return (void *)(uintptr_t)fdt_getprop_namelen(fdt, nodeoffset, name, + namelen, lenp); +} +#endif + +/** + * fdt_getprop - retrieve the value of a given property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_getprop() retrieves a pointer to the value of the property + * named 'name' of the node at offset nodeoffset (this will be a + * pointer to within the device blob itself, not a copy of the value). + * If lenp is non-NULL, the length of the property value is also + * returned, in the integer pointed to by lenp. + * + * returns: + * pointer to the property's value + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_NOTFOUND, node does not have named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE + * tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const void *fdt_getprop(const void *fdt, int nodeoffset, + const char *name, int *lenp); +static inline void *fdt_getprop_w(void *fdt, int nodeoffset, + const char *name, int *lenp) +{ + return (void *)(uintptr_t)fdt_getprop(fdt, nodeoffset, name, lenp); +} + +/** + * fdt_get_phandle - retrieve the phandle of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of the node + * + * fdt_get_phandle() retrieves the phandle of the device tree node at + * structure block offset nodeoffset. + * + * returns: + * the phandle of the node at nodeoffset, on success (!= 0, != -1) + * 0, if the node has no phandle, or another error occurs + */ +uint32_t fdt_get_phandle(const void *fdt, int nodeoffset); + +/** + * fdt_get_alias_namelen - get alias based on substring + * @fdt: pointer to the device tree blob + * @name: name of the alias th look up + * @namelen: number of characters of name to consider + * + * Identical to fdt_get_alias(), but only examine the first namelen + * characters of name for matching the alias name. + */ +#ifndef SWIG /* Not available in Python */ +const char *fdt_get_alias_namelen(const void *fdt, + const char *name, int namelen); +#endif + +/** + * fdt_get_alias - retrieve the path referenced by a given alias + * @fdt: pointer to the device tree blob + * @name: name of the alias th look up + * + * fdt_get_alias() retrieves the value of a given alias. That is, the + * value of the property named 'name' in the node /aliases. + * + * returns: + * a pointer to the expansion of the alias named 'name', if it exists + * NULL, if the given alias or the /aliases node does not exist + */ +const char *fdt_get_alias(const void *fdt, const char *name); + +/** + * fdt_get_path - determine the full path of a node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose path to find + * @buf: character buffer to contain the returned path (will be overwritten) + * @buflen: size of the character buffer at buf + * + * fdt_get_path() computes the full path of the node at offset + * nodeoffset, and records that path in the buffer at buf. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + * 0, on success + * buf contains the absolute path of the node at + * nodeoffset, as a NUL-terminated string. + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_NOSPACE, the path of the given node is longer than (bufsize-1) + * characters and will not fit in the given buffer. + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen); + +/** + * fdt_supernode_atdepth_offset - find a specific ancestor of a node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * @supernodedepth: depth of the ancestor to find + * @nodedepth: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_supernode_atdepth_offset() finds an ancestor of the given node + * at a specific depth from the root (where the root itself has depth + * 0, its immediate subnodes depth 1 and so forth). So + * fdt_supernode_atdepth_offset(fdt, nodeoffset, 0, NULL); + * will always return 0, the offset of the root node. If the node at + * nodeoffset has depth D, then: + * fdt_supernode_atdepth_offset(fdt, nodeoffset, D, NULL); + * will return nodeoffset itself. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + * structure block offset of the node at node offset's ancestor + * of depth supernodedepth (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_NOTFOUND, supernodedepth was greater than the depth of + * nodeoffset + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset, + int supernodedepth, int *nodedepth); + +/** + * fdt_node_depth - find the depth of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * + * fdt_node_depth() finds the depth of a given node. The root node + * has depth 0, its immediate subnodes depth 1 and so forth. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + * depth of the node at nodeoffset (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_depth(const void *fdt, int nodeoffset); + +/** + * fdt_parent_offset - find the parent of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * + * fdt_parent_offset() locates the parent node of a given node (that + * is, it finds the offset of the node which contains the node at + * nodeoffset as a subnode). + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset, *twice*. + * + * returns: + * structure block offset of the parent of the node at nodeoffset + * (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_parent_offset(const void *fdt, int nodeoffset); + +/** + * fdt_node_offset_by_prop_value - find nodes with a given property value + * @fdt: pointer to the device tree blob + * @startoffset: only find nodes after this offset + * @propname: property name to check + * @propval: property value to search for + * @proplen: length of the value in propval + * + * fdt_node_offset_by_prop_value() returns the offset of the first + * node after startoffset, which has a property named propname whose + * value is of length proplen and has value equal to propval; or if + * startoffset is -1, the very first such node in the tree. + * + * To iterate through all nodes matching the criterion, the following + * idiom can be used: + * offset = fdt_node_offset_by_prop_value(fdt, -1, propname, + * propval, proplen); + * while (offset != -FDT_ERR_NOTFOUND) { + * // other code here + * offset = fdt_node_offset_by_prop_value(fdt, offset, propname, + * propval, proplen); + * } + * + * Note the -1 in the first call to the function, if 0 is used here + * instead, the function will never locate the root node, even if it + * matches the criterion. + * + * returns: + * structure block offset of the located node (>= 0, >startoffset), + * on success + * -FDT_ERR_NOTFOUND, no node matching the criterion exists in the + * tree after startoffset + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_prop_value(const void *fdt, int startoffset, + const char *propname, + const void *propval, int proplen); + +/** + * fdt_node_offset_by_phandle - find the node with a given phandle + * @fdt: pointer to the device tree blob + * @phandle: phandle value + * + * fdt_node_offset_by_phandle() returns the offset of the node + * which has the given phandle value. If there is more than one node + * in the tree with the given phandle (an invalid tree), results are + * undefined. + * + * returns: + * structure block offset of the located node (>= 0), on success + * -FDT_ERR_NOTFOUND, no node with that phandle exists + * -FDT_ERR_BADPHANDLE, given phandle value was invalid (0 or -1) + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle); + +/** + * fdt_node_check_compatible: check a node's compatible property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @compatible: string to match against + * + * + * fdt_node_check_compatible() returns 0 if the given node contains a + * 'compatible' property with the given string as one of its elements, + * it returns non-zero otherwise, or on error. + * + * returns: + * 0, if the node has a 'compatible' property listing the given string + * 1, if the node has a 'compatible' property, but it does not list + * the given string + * -FDT_ERR_NOTFOUND, if the given node has no 'compatible' property + * -FDT_ERR_BADOFFSET, if nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_check_compatible(const void *fdt, int nodeoffset, + const char *compatible); + +/** + * fdt_node_offset_by_compatible - find nodes with a given 'compatible' value + * @fdt: pointer to the device tree blob + * @startoffset: only find nodes after this offset + * @compatible: 'compatible' string to match against + * + * fdt_node_offset_by_compatible() returns the offset of the first + * node after startoffset, which has a 'compatible' property which + * lists the given compatible string; or if startoffset is -1, the + * very first such node in the tree. + * + * To iterate through all nodes matching the criterion, the following + * idiom can be used: + * offset = fdt_node_offset_by_compatible(fdt, -1, compatible); + * while (offset != -FDT_ERR_NOTFOUND) { + * // other code here + * offset = fdt_node_offset_by_compatible(fdt, offset, compatible); + * } + * + * Note the -1 in the first call to the function, if 0 is used here + * instead, the function will never locate the root node, even if it + * matches the criterion. + * + * returns: + * structure block offset of the located node (>= 0, >startoffset), + * on success + * -FDT_ERR_NOTFOUND, no node matching the criterion exists in the + * tree after startoffset + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_compatible(const void *fdt, int startoffset, + const char *compatible); + +/** + * fdt_stringlist_contains - check a string list property for a string + * @strlist: Property containing a list of strings to check + * @listlen: Length of property + * @str: String to search for + * + * This is a utility function provided for convenience. The list contains + * one or more strings, each terminated by \0, as is found in a device tree + * "compatible" property. + * + * @return: 1 if the string is found in the list, 0 not found, or invalid list + */ +int fdt_stringlist_contains(const char *strlist, int listlen, const char *str); + +/** + * fdt_stringlist_count - count the number of strings in a string list + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @property: name of the property containing the string list + * @return: + * the number of strings in the given property + * -FDT_ERR_BADVALUE if the property value is not NUL-terminated + * -FDT_ERR_NOTFOUND if the property does not exist + */ +int fdt_stringlist_count(const void *fdt, int nodeoffset, const char *property); + +/** + * fdt_stringlist_search - find a string in a string list and return its index + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @property: name of the property containing the string list + * @string: string to look up in the string list + * + * Note that it is possible for this function to succeed on property values + * that are not NUL-terminated. That's because the function will stop after + * finding the first occurrence of @string. This can for example happen with + * small-valued cell properties, such as #address-cells, when searching for + * the empty string. + * + * @return: + * the index of the string in the list of strings + * -FDT_ERR_BADVALUE if the property value is not NUL-terminated + * -FDT_ERR_NOTFOUND if the property does not exist or does not contain + * the given string + */ +int fdt_stringlist_search(const void *fdt, int nodeoffset, const char *property, + const char *string); + +/** + * fdt_stringlist_get() - obtain the string at a given index in a string list + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @property: name of the property containing the string list + * @index: index of the string to return + * @lenp: return location for the string length or an error code on failure + * + * Note that this will successfully extract strings from properties with + * non-NUL-terminated values. For example on small-valued cell properties + * this function will return the empty string. + * + * If non-NULL, the length of the string (on success) or a negative error-code + * (on failure) will be stored in the integer pointer to by lenp. + * + * @return: + * A pointer to the string at the given index in the string list or NULL on + * failure. On success the length of the string will be stored in the memory + * location pointed to by the lenp parameter, if non-NULL. On failure one of + * the following negative error codes will be returned in the lenp parameter + * (if non-NULL): + * -FDT_ERR_BADVALUE if the property value is not NUL-terminated + * -FDT_ERR_NOTFOUND if the property does not exist + */ +const char *fdt_stringlist_get(const void *fdt, int nodeoffset, + const char *property, int index, + int *lenp); + +/**********************************************************************/ +/* Read-only functions (addressing related) */ +/**********************************************************************/ + +/** + * FDT_MAX_NCELLS - maximum value for #address-cells and #size-cells + * + * This is the maximum value for #address-cells, #size-cells and + * similar properties that will be processed by libfdt. IEE1275 + * requires that OF implementations handle values up to 4. + * Implementations may support larger values, but in practice higher + * values aren't used. + */ +#define FDT_MAX_NCELLS 4 + +/** + * fdt_address_cells - retrieve address size for a bus represented in the tree + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to find the address size for + * + * When the node has a valid #address-cells property, returns its value. + * + * returns: + * 0 <= n < FDT_MAX_NCELLS, on success + * 2, if the node has no #address-cells property + * -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid + * #address-cells property + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_address_cells(const void *fdt, int nodeoffset); + +/** + * fdt_size_cells - retrieve address range size for a bus represented in the + * tree + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to find the address range size for + * + * When the node has a valid #size-cells property, returns its value. + * + * returns: + * 0 <= n < FDT_MAX_NCELLS, on success + * 1, if the node has no #size-cells property + * -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid + * #size-cells property + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_size_cells(const void *fdt, int nodeoffset); + + +/**********************************************************************/ +/* Write-in-place functions */ +/**********************************************************************/ + +/** + * fdt_setprop_inplace_namelen_partial - change a property's value, + * but not its size + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @namelen: number of characters of name to consider + * @idx: index of the property to change in the array + * @val: pointer to data to replace the property value with + * @len: length of the property value + * + * Identical to fdt_setprop_inplace(), but modifies the given property + * starting from the given index, and using only the first characters + * of the name. It is useful when you want to manipulate only one value of + * an array and you have a string that doesn't end with \0. + */ +#ifndef SWIG /* Not available in Python */ +int fdt_setprop_inplace_namelen_partial(void *fdt, int nodeoffset, + const char *name, int namelen, + uint32_t idx, const void *val, + int len); +#endif + +/** + * fdt_setprop_inplace - change a property's value, but not its size + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: pointer to data to replace the property value with + * @len: length of the property value + * + * fdt_setprop_inplace() replaces the value of a given property with + * the data in val, of length len. This function cannot change the + * size of a property, and so will only work if len is equal to the + * current length of the property. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if len is not equal to the property's current length + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#ifndef SWIG /* Not available in Python */ +int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name, + const void *val, int len); +#endif + +/** + * fdt_setprop_inplace_u32 - change the value of a 32-bit integer property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 32-bit integer value to replace the property with + * + * fdt_setprop_inplace_u32() replaces the value of a given property + * with the 32-bit integer value in val, converting val to big-endian + * if necessary. This function cannot change the size of a property, + * and so will only work if the property already exists and has length + * 4. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if the property's length is not equal to 4 + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_inplace_u32(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_setprop_inplace(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_inplace_u64 - change the value of a 64-bit integer property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 64-bit integer value to replace the property with + * + * fdt_setprop_inplace_u64() replaces the value of a given property + * with the 64-bit integer value in val, converting val to big-endian + * if necessary. This function cannot change the size of a property, + * and so will only work if the property already exists and has length + * 8. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if the property's length is not equal to 8 + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_inplace_u64(void *fdt, int nodeoffset, + const char *name, uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_setprop_inplace(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_inplace_cell - change the value of a single-cell property + * + * This is an alternative name for fdt_setprop_inplace_u32() + */ +static inline int fdt_setprop_inplace_cell(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + return fdt_setprop_inplace_u32(fdt, nodeoffset, name, val); +} + +/** + * fdt_nop_property - replace a property with nop tags + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to nop + * @name: name of the property to nop + * + * fdt_nop_property() will replace a given property's representation + * in the blob with FDT_NOP tags, effectively removing it from the + * tree. + * + * This function will alter only the bytes in the blob which contain + * the property, and will not alter or move any other part of the + * tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_nop_property(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_nop_node - replace a node (subtree) with nop tags + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to nop + * + * fdt_nop_node() will replace a given node's representation in the + * blob, including all its subnodes, if any, with FDT_NOP tags, + * effectively removing it from the tree. + * + * This function will alter only the bytes in the blob which contain + * the node and its properties and subnodes, and will not alter or + * move any other part of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_nop_node(void *fdt, int nodeoffset); + +/**********************************************************************/ +/* Sequential write functions */ +/**********************************************************************/ + +/* fdt_create_with_flags flags */ +#define FDT_CREATE_FLAG_NO_NAME_DEDUP 0x1 + /* FDT_CREATE_FLAG_NO_NAME_DEDUP: Do not try to de-duplicate property + * names in the fdt. This can result in faster creation times, but + * a larger fdt. */ + +#define FDT_CREATE_FLAGS_ALL (FDT_CREATE_FLAG_NO_NAME_DEDUP) + +/** + * fdt_create_with_flags - begin creation of a new fdt + * @fdt: pointer to memory allocated where fdt will be created + * @bufsize: size of the memory space at fdt + * @flags: a valid combination of FDT_CREATE_FLAG_ flags, or 0. + * + * fdt_create_with_flags() begins the process of creating a new fdt with + * the sequential write interface. + * + * fdt creation process must end with fdt_finished() to produce a valid fdt. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, bufsize is insufficient for a minimal fdt + * -FDT_ERR_BADFLAGS, flags is not valid + */ +int fdt_create_with_flags(void *buf, int bufsize, uint32_t flags); + +/** + * fdt_create - begin creation of a new fdt + * @fdt: pointer to memory allocated where fdt will be created + * @bufsize: size of the memory space at fdt + * + * fdt_create() is equivalent to fdt_create_with_flags() with flags=0. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, bufsize is insufficient for a minimal fdt + */ +int fdt_create(void *buf, int bufsize); + +int fdt_resize(void *fdt, void *buf, int bufsize); +int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size); +int fdt_finish_reservemap(void *fdt); +int fdt_begin_node(void *fdt, const char *name); +int fdt_property(void *fdt, const char *name, const void *val, int len); +static inline int fdt_property_u32(void *fdt, const char *name, uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_property(fdt, name, &tmp, sizeof(tmp)); +} +static inline int fdt_property_u64(void *fdt, const char *name, uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_property(fdt, name, &tmp, sizeof(tmp)); +} + +#ifndef SWIG /* Not available in Python */ +static inline int fdt_property_cell(void *fdt, const char *name, uint32_t val) +{ + return fdt_property_u32(fdt, name, val); +} +#endif + +/** + * fdt_property_placeholder - add a new property and return a ptr to its value + * + * @fdt: pointer to the device tree blob + * @name: name of property to add + * @len: length of property value in bytes + * @valp: returns a pointer to where where the value should be placed + * + * returns: + * 0, on success + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_NOSPACE, standard meanings + */ +int fdt_property_placeholder(void *fdt, const char *name, int len, void **valp); + +#define fdt_property_string(fdt, name, str) \ + fdt_property(fdt, name, str, strlen(str)+1) +int fdt_end_node(void *fdt); +int fdt_finish(void *fdt); + +/**********************************************************************/ +/* Read-write functions */ +/**********************************************************************/ + +int fdt_create_empty_tree(void *buf, int bufsize); +int fdt_open_into(const void *fdt, void *buf, int bufsize); +int fdt_pack(void *fdt); + +/** + * fdt_add_mem_rsv - add one memory reserve map entry + * @fdt: pointer to the device tree blob + * @address, @size: 64-bit values (native endian) + * + * Adds a reserve map entry to the given blob reserving a region at + * address address of length size. + * + * This function will insert data into the reserve map and will + * therefore change the indexes of some entries in the table. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new reservation entry + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size); + +/** + * fdt_del_mem_rsv - remove a memory reserve map entry + * @fdt: pointer to the device tree blob + * @n: entry to remove + * + * fdt_del_mem_rsv() removes the n-th memory reserve map entry from + * the blob. + * + * This function will delete data from the reservation table and will + * therefore change the indexes of some entries in the table. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, there is no entry of the given index (i.e. there + * are less than n+1 reserve map entries) + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_del_mem_rsv(void *fdt, int n); + +/** + * fdt_set_name - change the name of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of a node + * @name: name to give the node + * + * fdt_set_name() replaces the name (including unit address, if any) + * of the given node with the given string. NOTE: this function can't + * efficiently check if the new name is unique amongst the given + * node's siblings; results are undefined if this function is invoked + * with a name equal to one of the given node's siblings. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob + * to contain the new name + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_set_name(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_setprop - create or change a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: pointer to data to set the property value to + * @len: length of the property value + * + * fdt_setprop() sets the value of the named property in the given + * node to the given value and length, creating the property if it + * does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_setprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len); + +/** + * fdt_setprop_placeholder - allocate space for a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @len: length of the property value + * @prop_data: return pointer to property data + * + * fdt_setprop_placeholer() allocates the named property in the given node. + * If the property exists it is resized. In either case a pointer to the + * property data is returned. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_setprop_placeholder(void *fdt, int nodeoffset, const char *name, + int len, void **prop_data); + +/** + * fdt_setprop_u32 - set a property to a 32-bit integer + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 32-bit integer value for the property (native endian) + * + * fdt_setprop_u32() sets the value of the named property in the given + * node to the given 32-bit integer value (converting to big-endian if + * necessary), or creates a new property with that value if it does + * not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_u32(void *fdt, int nodeoffset, const char *name, + uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_setprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_u64 - set a property to a 64-bit integer + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 64-bit integer value for the property (native endian) + * + * fdt_setprop_u64() sets the value of the named property in the given + * node to the given 64-bit integer value (converting to big-endian if + * necessary), or creates a new property with that value if it does + * not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_u64(void *fdt, int nodeoffset, const char *name, + uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_setprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_cell - set a property to a single cell value + * + * This is an alternative name for fdt_setprop_u32() + */ +static inline int fdt_setprop_cell(void *fdt, int nodeoffset, const char *name, + uint32_t val) +{ + return fdt_setprop_u32(fdt, nodeoffset, name, val); +} + +/** + * fdt_setprop_string - set a property to a string value + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @str: string value for the property + * + * fdt_setprop_string() sets the value of the named property in the + * given node to the given string value (using the length of the + * string to determine the new length of the property), or creates a + * new property with that value if it does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#define fdt_setprop_string(fdt, nodeoffset, name, str) \ + fdt_setprop((fdt), (nodeoffset), (name), (str), strlen(str)+1) + + +/** + * fdt_setprop_empty - set a property to an empty value + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * + * fdt_setprop_empty() sets the value of the named property in the + * given node to an empty (zero length) value, or creates a new empty + * property if it does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#define fdt_setprop_empty(fdt, nodeoffset, name) \ + fdt_setprop((fdt), (nodeoffset), (name), NULL, 0) + +/** + * fdt_appendprop - append to or create a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to append to + * @val: pointer to data to append to the property value + * @len: length of the data to append to the property value + * + * fdt_appendprop() appends the value to the named property in the + * given node, creating the property if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_appendprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len); + +/** + * fdt_appendprop_u32 - append a 32-bit integer value to a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 32-bit integer value to append to the property (native endian) + * + * fdt_appendprop_u32() appends the given 32-bit integer value + * (converting to big-endian if necessary) to the value of the named + * property in the given node, or creates a new property with that + * value if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_appendprop_u32(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_appendprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_appendprop_u64 - append a 64-bit integer value to a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 64-bit integer value to append to the property (native endian) + * + * fdt_appendprop_u64() appends the given 64-bit integer value + * (converting to big-endian if necessary) to the value of the named + * property in the given node, or creates a new property with that + * value if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_appendprop_u64(void *fdt, int nodeoffset, + const char *name, uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_appendprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_appendprop_cell - append a single cell value to a property + * + * This is an alternative name for fdt_appendprop_u32() + */ +static inline int fdt_appendprop_cell(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + return fdt_appendprop_u32(fdt, nodeoffset, name, val); +} + +/** + * fdt_appendprop_string - append a string to a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @str: string value to append to the property + * + * fdt_appendprop_string() appends the given string to the value of + * the named property in the given node, or creates a new property + * with that value if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#define fdt_appendprop_string(fdt, nodeoffset, name, str) \ + fdt_appendprop((fdt), (nodeoffset), (name), (str), strlen(str)+1) + +/** + * fdt_appendprop_addrrange - append a address range property + * @fdt: pointer to the device tree blob + * @parent: offset of the parent node + * @nodeoffset: offset of the node to add a property at + * @name: name of property + * @addr: start address of a given range + * @size: size of a given range + * + * fdt_appendprop_addrrange() appends an address range value (start + * address and size) to the value of the named property in the given + * node, or creates a new property with that value if it does not + * already exist. + * If "name" is not specified, a default "reg" is used. + * Cell sizes are determined by parent's #address-cells and #size-cells. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid + * #address-cells property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADVALUE, addr or size doesn't fit to respective cells size + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain a new property + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_appendprop_addrrange(void *fdt, int parent, int nodeoffset, + const char *name, uint64_t addr, uint64_t size); + +/** + * fdt_delprop - delete a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to nop + * @name: name of the property to nop + * + * fdt_del_property() will delete the given property. + * + * This function will delete data from the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_delprop(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_add_subnode_namelen - creates a new node based on substring + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * @namelen: number of characters of name to consider + * + * Identical to fdt_add_subnode(), but use only the first namelen + * characters of name as the name of the new node. This is useful for + * creating subnodes based on a portion of a larger string, such as a + * full path. + */ +#ifndef SWIG /* Not available in Python */ +int fdt_add_subnode_namelen(void *fdt, int parentoffset, + const char *name, int namelen); +#endif + +/** + * fdt_add_subnode - creates a new node + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * + * fdt_add_subnode() creates a new node as a subnode of the node at + * structure block offset parentoffset, with the given name (which + * should include the unit address, if any). + * + * This function will insert data into the blob, and will therefore + * change the offsets of some existing nodes. + + * returns: + * structure block offset of the created nodeequested subnode (>=0), on + * success + * -FDT_ERR_NOTFOUND, if the requested subnode does not exist + * -FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE + * tag + * -FDT_ERR_EXISTS, if the node at parentoffset already has a subnode of + * the given name + * -FDT_ERR_NOSPACE, if there is insufficient free space in the + * blob to contain the new node + * -FDT_ERR_NOSPACE + * -FDT_ERR_BADLAYOUT + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_add_subnode(void *fdt, int parentoffset, const char *name); + +/** + * fdt_del_node - delete a node (subtree) + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to nop + * + * fdt_del_node() will remove the given node, including all its + * subnodes if any, from the blob. + * + * This function will delete data from the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_del_node(void *fdt, int nodeoffset); + +/** + * fdt_overlay_apply - Applies a DT overlay on a base DT + * @fdt: pointer to the base device tree blob + * @fdto: pointer to the device tree overlay blob + * + * fdt_overlay_apply() will apply the given device tree overlay on the + * given base device tree. + * + * Expect the base device tree to be modified, even if the function + * returns an error. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there's not enough space in the base device tree + * -FDT_ERR_NOTFOUND, the overlay points to some inexistant nodes or + * properties in the base DT + * -FDT_ERR_BADPHANDLE, + * -FDT_ERR_BADOVERLAY, + * -FDT_ERR_NOPHANDLES, + * -FDT_ERR_INTERNAL, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADOFFSET, + * -FDT_ERR_BADPATH, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_overlay_apply(void *fdt, void *fdto); + +/**********************************************************************/ +/* Debugging / informational functions */ +/**********************************************************************/ + +const char *fdt_strerror(int errval); +#ifdef __cplusplus +} +#endif + +#endif /* LIBFDT_H */ diff --git a/fdt/libfdt_env.h b/fdt/libfdt_env.h new file mode 100644 index 0000000000..2363810c17 --- /dev/null +++ b/fdt/libfdt_env.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ +#ifndef LIBFDT_ENV_H +#define LIBFDT_ENV_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * Copyright 2012 Kim Phillips, Freescale Semiconductor. + */ + +#include +#include +#include +#include + +#define INT_MAX INT32_MAX +#define UINT_MAX UINT32_MAX + +#ifdef __CHECKER__ +#define FDT_FORCE __attribute__((force)) +#define FDT_BITWISE __attribute__((bitwise)) +#else +#define FDT_FORCE +#define FDT_BITWISE +#endif + +typedef uint16_t FDT_BITWISE fdt16_t; +typedef uint32_t FDT_BITWISE fdt32_t; +typedef uint64_t FDT_BITWISE fdt64_t; + +#define EXTRACT_BYTE(x, n) ((unsigned long long)((uint8_t *)&x)[n]) +#define CPU_TO_FDT16(x) ((EXTRACT_BYTE(x, 0) << 8) | EXTRACT_BYTE(x, 1)) +#define CPU_TO_FDT32(x) ((EXTRACT_BYTE(x, 0) << 24) | (EXTRACT_BYTE(x, 1) << 16) | \ + (EXTRACT_BYTE(x, 2) << 8) | EXTRACT_BYTE(x, 3)) +#define CPU_TO_FDT64(x) ((EXTRACT_BYTE(x, 0) << 56) | (EXTRACT_BYTE(x, 1) << 48) | \ + (EXTRACT_BYTE(x, 2) << 40) | (EXTRACT_BYTE(x, 3) << 32) | \ + (EXTRACT_BYTE(x, 4) << 24) | (EXTRACT_BYTE(x, 5) << 16) | \ + (EXTRACT_BYTE(x, 6) << 8) | EXTRACT_BYTE(x, 7)) + +static inline uint16_t fdt16_to_cpu(fdt16_t x) +{ + return (FDT_FORCE uint16_t)CPU_TO_FDT16(x); +} +static inline fdt16_t cpu_to_fdt16(uint16_t x) +{ + return (FDT_FORCE fdt16_t)CPU_TO_FDT16(x); +} + +static inline uint32_t fdt32_to_cpu(fdt32_t x) +{ + return (FDT_FORCE uint32_t)CPU_TO_FDT32(x); +} +static inline fdt32_t cpu_to_fdt32(uint32_t x) +{ + return (FDT_FORCE fdt32_t)CPU_TO_FDT32(x); +} + +static inline uint64_t fdt64_to_cpu(fdt64_t x) +{ + return (FDT_FORCE uint64_t)CPU_TO_FDT64(x); +} +static inline fdt64_t cpu_to_fdt64(uint64_t x) +{ + return (FDT_FORCE fdt64_t)CPU_TO_FDT64(x); +} +#undef CPU_TO_FDT64 +#undef CPU_TO_FDT32 +#undef CPU_TO_FDT16 +#undef EXTRACT_BYTE + +#ifdef __APPLE__ +#include + +/* strnlen() is not available on Mac OS < 10.7 */ +# if !defined(MAC_OS_X_VERSION_10_7) || (MAC_OS_X_VERSION_MAX_ALLOWED < \ + MAC_OS_X_VERSION_10_7) + +#define strnlen fdt_strnlen + +/* + * fdt_strnlen: returns the length of a string or max_count - which ever is + * smallest. + * Input 1 string: the string whose size is to be determined + * Input 2 max_count: the maximum value returned by this function + * Output: length of the string or max_count (the smallest of the two) + */ +static inline size_t fdt_strnlen(const char *string, size_t max_count) +{ + const char *p = memchr(string, 0, max_count); + return p ? p - string : max_count; +} + +#endif /* !defined(MAC_OS_X_VERSION_10_7) || (MAC_OS_X_VERSION_MAX_ALLOWED < + MAC_OS_X_VERSION_10_7) */ + +#endif /* __APPLE__ */ + +#endif /* LIBFDT_ENV_H */ diff --git a/fdt/libfdt_internal.h b/fdt/libfdt_internal.h new file mode 100644 index 0000000000..741eeb3150 --- /dev/null +++ b/fdt/libfdt_internal.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ +#ifndef LIBFDT_INTERNAL_H +#define LIBFDT_INTERNAL_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include + +#define FDT_ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) +#define FDT_TAGALIGN(x) (FDT_ALIGN((x), FDT_TAGSIZE)) + +int fdt_ro_probe_(const void *fdt); +#define FDT_RO_PROBE(fdt) \ + { \ + int totalsize_; \ + if ((totalsize_ = fdt_ro_probe_(fdt)) < 0) \ + return totalsize_; \ + } + +int fdt_check_node_offset_(const void *fdt, int offset); +int fdt_check_prop_offset_(const void *fdt, int offset); +const char *fdt_find_string_(const char *strtab, int tabsize, const char *s); +int fdt_node_end_offset_(void *fdt, int nodeoffset); + +static inline const void *fdt_offset_ptr_(const void *fdt, int offset) +{ + return (const char *)fdt + fdt_off_dt_struct(fdt) + offset; +} + +static inline void *fdt_offset_ptr_w_(void *fdt, int offset) +{ + return (void *)(uintptr_t)fdt_offset_ptr_(fdt, offset); +} + +static inline const struct fdt_reserve_entry *fdt_mem_rsv_(const void *fdt, int n) +{ + const struct fdt_reserve_entry *rsv_table = + (const struct fdt_reserve_entry *) + ((const char *)fdt + fdt_off_mem_rsvmap(fdt)); + + return rsv_table + n; +} +static inline struct fdt_reserve_entry *fdt_mem_rsv_w_(void *fdt, int n) +{ + return (void *)(uintptr_t)fdt_mem_rsv_(fdt, n); +} + +#define FDT_SW_MAGIC (~FDT_MAGIC) + +#endif /* LIBFDT_INTERNAL_H */ diff --git a/fesvr/context.cc b/fesvr/context.cc new file mode 100644 index 0000000000..ca73813768 --- /dev/null +++ b/fesvr/context.cc @@ -0,0 +1,115 @@ +#include "context.h" +#include +#include +#include + +static __thread context_t* cur; + +context_t::context_t() + : creator(NULL), func(NULL), arg(NULL), +#ifndef USE_UCONTEXT + mutex(PTHREAD_MUTEX_INITIALIZER), + cond(PTHREAD_COND_INITIALIZER), flag(0) +#else + context(new ucontext_t) +#endif +{ +} + +#ifdef USE_UCONTEXT +#ifndef GLIBC_64BIT_PTR_BUG +void context_t::wrapper(context_t* ctx) +{ +#else +void context_t::wrapper(unsigned int hi, unsigned int lo) +{ + context_t* ctx = reinterpret_cast(static_cast(lo) | (static_cast(hi) << 32)); +#endif + ctx->creator->switch_to(); + ctx->func(ctx->arg); +} +#else +void* context_t::wrapper(void* a) +{ + context_t* ctx = static_cast(a); + cur = ctx; + ctx->creator->switch_to(); + + ctx->func(ctx->arg); + return NULL; +} +#endif + +void context_t::init(void (*f)(void*), void* a) +{ + func = f; + arg = a; + creator = current(); + +#ifdef USE_UCONTEXT + getcontext(context.get()); + context->uc_link = creator->context.get(); + context->uc_stack.ss_size = 64*1024; + context->uc_stack.ss_sp = new void*[context->uc_stack.ss_size/sizeof(void*)]; +#ifndef GLIBC_64BIT_PTR_BUG + makecontext(context.get(), (void(*)(void))&context_t::wrapper, 1, this); +#else + unsigned int hi(reinterpret_cast(this) >> 32); + unsigned int lo(reinterpret_cast(this)); + makecontext(context.get(), (void(*)(void))&context_t::wrapper, 2, hi, lo); +#endif + switch_to(); +#else + assert(flag == 0); + + pthread_mutex_lock(&creator->mutex); + creator->flag = 0; + if (pthread_create(&thread, NULL, &context_t::wrapper, this) != 0) + abort(); + pthread_detach(thread); + while (!creator->flag) + pthread_cond_wait(&creator->cond, &creator->mutex); + pthread_mutex_unlock(&creator->mutex); +#endif +} + +context_t::~context_t() +{ + assert(this != cur); +} + +void context_t::switch_to() +{ + assert(this != cur); +#ifdef USE_UCONTEXT + context_t* prev = cur; + cur = this; + if (swapcontext(prev->context.get(), context.get()) != 0) + abort(); +#else + cur->flag = 0; + this->flag = 1; + pthread_mutex_lock(&this->mutex); + pthread_cond_signal(&this->cond); + pthread_mutex_unlock(&this->mutex); + pthread_mutex_lock(&cur->mutex); + while (!cur->flag) + pthread_cond_wait(&cur->cond, &cur->mutex); + pthread_mutex_unlock(&cur->mutex); +#endif +} + +context_t* context_t::current() +{ + if (cur == NULL) + { + cur = new context_t; +#ifdef USE_UCONTEXT + getcontext(cur->context.get()); +#else + cur->thread = pthread_self(); + cur->flag = 1; +#endif + } + return cur; +} diff --git a/fesvr/context.h b/fesvr/context.h new file mode 100644 index 0000000000..18bf50ef8d --- /dev/null +++ b/fesvr/context.h @@ -0,0 +1,54 @@ +#ifndef _HTIF_CONTEXT_H +#define _HTIF_CONTEXT_H + +// A replacement for ucontext.h, which is sadly deprecated. + +#include + +#if defined(__GLIBC__) +# undef USE_UCONTEXT +# define USE_UCONTEXT +# include +# include +#include + +#if (ULONG_MAX > UINT_MAX) // 64-bit systems only +#if (100*GLIB_MAJOR_VERSION+GLIB_MINOR_VERSION < 208) +#define GLIBC_64BIT_PTR_BUG +static_assert (sizeof(unsigned int) == 4, "uint size doesn't match expected 32bit"); +static_assert (sizeof(unsigned long) == 8, "ulong size doesn't match expected 64bit"); +static_assert (sizeof(void*) == 8, "ptr size doesn't match expected 64bit"); +#endif +#endif /* ULONG_MAX > UINT_MAX */ + +#endif + +class context_t +{ + public: + context_t(); + ~context_t(); + void init(void (*func)(void*), void* arg); + void switch_to(); + static context_t* current(); + private: + context_t* creator; + void (*func)(void*); + void* arg; +#ifdef USE_UCONTEXT + std::unique_ptr context; +#ifndef GLIBC_64BIT_PTR_BUG + static void wrapper(context_t*); +#else + static void wrapper(unsigned int, unsigned int); +#endif +#else + pthread_t thread; + pthread_mutex_t mutex; + pthread_cond_t cond; + volatile int flag; + static void* wrapper(void*); +#endif +}; + +#endif diff --git a/fesvr/debug_defines.h b/fesvr/debug_defines.h new file mode 100644 index 0000000000..e5f9291058 --- /dev/null +++ b/fesvr/debug_defines.h @@ -0,0 +1,1418 @@ +#define DTM_IDCODE 0x01 +/* +* Identifies the release version of this part. + */ +#define DTM_IDCODE_VERSION_OFFSET 28 +#define DTM_IDCODE_VERSION_LENGTH 4 +#define DTM_IDCODE_VERSION (0xf << DTM_IDCODE_VERSION_OFFSET) +/* +* Identifies the designer's part number of this part. + */ +#define DTM_IDCODE_PARTNUMBER_OFFSET 12 +#define DTM_IDCODE_PARTNUMBER_LENGTH 16 +#define DTM_IDCODE_PARTNUMBER (0xffff << DTM_IDCODE_PARTNUMBER_OFFSET) +/* +* Identifies the designer/manufacturer of this part. Bits 6:0 must be +* bits 6:0 of the designer/manufacturer's Identification Code as +* assigned by JEDEC Standard JEP106. Bits 10:7 contain the modulo-16 +* count of the number of continuation characters (0x7f) in that same +* Identification Code. + */ +#define DTM_IDCODE_MANUFID_OFFSET 1 +#define DTM_IDCODE_MANUFID_LENGTH 11 +#define DTM_IDCODE_MANUFID (0x7ff << DTM_IDCODE_MANUFID_OFFSET) +#define DTM_IDCODE_1_OFFSET 0 +#define DTM_IDCODE_1_LENGTH 1 +#define DTM_IDCODE_1 (0x1 << DTM_IDCODE_1_OFFSET) +#define DTM_DTMCS 0x10 +/* +* Writing 1 to this bit does a hard reset of the DTM, +* causing the DTM to forget about any outstanding DMI transactions. +* In general this should only be used when the Debugger has +* reason to expect that the outstanding DMI transaction will never +* complete (e.g. a reset condition caused an inflight DMI transaction to +* be cancelled). + */ +#define DTM_DTMCS_DMIHARDRESET_OFFSET 17 +#define DTM_DTMCS_DMIHARDRESET_LENGTH 1 +#define DTM_DTMCS_DMIHARDRESET (0x1 << DTM_DTMCS_DMIHARDRESET_OFFSET) +/* +* Writing 1 to this bit clears the sticky error state +* and allows the DTM to retry or complete the previous +* transaction. + */ +#define DTM_DTMCS_DMIRESET_OFFSET 16 +#define DTM_DTMCS_DMIRESET_LENGTH 1 +#define DTM_DTMCS_DMIRESET (0x1 << DTM_DTMCS_DMIRESET_OFFSET) +/* +* This is a hint to the debugger of the minimum number of +* cycles a debugger should spend in +* Run-Test/Idle after every DMI scan to avoid a `busy' +* return code (\Fdmistat of 3). A debugger must still +* check \Fdmistat when necessary. +* +* 0: It is not necessary to enter Run-Test/Idle at all. +* +* 1: Enter Run-Test/Idle and leave it immediately. +* +* 2: Enter Run-Test/Idle and stay there for 1 cycle before leaving. +* +* And so on. + */ +#define DTM_DTMCS_IDLE_OFFSET 12 +#define DTM_DTMCS_IDLE_LENGTH 3 +#define DTM_DTMCS_IDLE (0x7 << DTM_DTMCS_IDLE_OFFSET) +/* +* 0: No error. +* +* 1: Reserved. Interpret the same as 2. +* +* 2: An operation failed (resulted in \Fop of 2). +* +* 3: An operation was attempted while a DMI access was still in +* progress (resulted in \Fop of 3). + */ +#define DTM_DTMCS_DMISTAT_OFFSET 10 +#define DTM_DTMCS_DMISTAT_LENGTH 2 +#define DTM_DTMCS_DMISTAT (0x3 << DTM_DTMCS_DMISTAT_OFFSET) +/* +* The size of \Faddress in \Rdmi. + */ +#define DTM_DTMCS_ABITS_OFFSET 4 +#define DTM_DTMCS_ABITS_LENGTH 6 +#define DTM_DTMCS_ABITS (0x3f << DTM_DTMCS_ABITS_OFFSET) +/* +* 0: Version described in spec version 0.11. +* +* 1: Version described in spec version 0.13 (and later?), which +* reduces the DMI data width to 32 bits. +* +* Other values are reserved for future use. + */ +#define DTM_DTMCS_VERSION_OFFSET 0 +#define DTM_DTMCS_VERSION_LENGTH 4 +#define DTM_DTMCS_VERSION (0xf << DTM_DTMCS_VERSION_OFFSET) +#define DTM_DMI 0x11 +/* +* Address used for DMI access. In Update-DR this value is used +* to access the DM over the DMI. + */ +#define DTM_DMI_ADDRESS_OFFSET 34 +#define DTM_DMI_ADDRESS_LENGTH abits +#define DTM_DMI_ADDRESS (((1L< +#include +#include +#include +#include +#include +#include +#include +using namespace std::placeholders; + +device_t::device_t() + : command_handlers(command_t::MAX_COMMANDS), + command_names(command_t::MAX_COMMANDS) +{ + for (size_t cmd = 0; cmd < command_t::MAX_COMMANDS; cmd++) + register_command(cmd, std::bind(&device_t::handle_null_command, this, _1), ""); + register_command(command_t::MAX_COMMANDS-1, std::bind(&device_t::handle_identify, this, _1), "identity"); +} + +void device_t::register_command(size_t cmd, command_func_t handler, const char* name) +{ + assert(cmd < command_t::MAX_COMMANDS); + assert(strlen(name) < IDENTITY_SIZE); + command_handlers[cmd] = handler; + command_names[cmd] = name; +} + +void device_t::handle_command(command_t cmd) +{ + command_handlers[cmd.cmd()](cmd); +} + +void device_t::handle_null_command(command_t cmd) +{ +} + +void device_t::handle_identify(command_t cmd) +{ + size_t what = cmd.payload() % command_t::MAX_COMMANDS; + uint64_t addr = cmd.payload() / command_t::MAX_COMMANDS; + assert(addr % IDENTITY_SIZE == 0); + + char id[IDENTITY_SIZE] = {0}; + if (what == command_t::MAX_COMMANDS-1) + { + assert(strlen(identity()) < IDENTITY_SIZE); + strcpy(id, identity()); + } + else + strcpy(id, command_names[what].c_str()); + + cmd.memif().write(addr, IDENTITY_SIZE, id); + cmd.respond(1); +} + +bcd_t::bcd_t() +{ + register_command(0, std::bind(&bcd_t::handle_read, this, _1), "read"); + register_command(1, std::bind(&bcd_t::handle_write, this, _1), "write"); +} + +void bcd_t::handle_read(command_t cmd) +{ + pending_reads.push(cmd); +} + +void bcd_t::handle_write(command_t cmd) +{ + canonical_terminal_t::write(cmd.payload()); +} + +void bcd_t::tick() +{ + int ch; + if (!pending_reads.empty() && (ch = canonical_terminal_t::read()) != -1) + { + pending_reads.front().respond(0x100 | ch); + pending_reads.pop(); + } +} + +disk_t::disk_t(const char* fn) +{ + fd = ::open(fn, O_RDWR); + if (fd < 0) + throw std::runtime_error("could not open " + std::string(fn)); + + register_command(0, std::bind(&disk_t::handle_read, this, _1), "read"); + register_command(1, std::bind(&disk_t::handle_write, this, _1), "write"); + + struct stat st; + if (fstat(fd, &st) < 0) + throw std::runtime_error("could not stat " + std::string(fn)); + + size = st.st_size; + id = "disk size=" + std::to_string(size); +} + +disk_t::~disk_t() +{ + close(fd); +} + +void disk_t::handle_read(command_t cmd) +{ + request_t req; + cmd.memif().read(cmd.payload(), sizeof(req), &req); + + std::vector buf(req.size); + if ((size_t)::pread(fd, &buf[0], buf.size(), req.offset) != req.size) + throw std::runtime_error("could not read " + id + " @ " + std::to_string(req.offset)); + + cmd.memif().write(req.addr, buf.size(), &buf[0]); + cmd.respond(req.tag); +} + +void disk_t::handle_write(command_t cmd) +{ + request_t req; + cmd.memif().read(cmd.payload(), sizeof(req), &req); + + std::vector buf(req.size); + cmd.memif().read(req.addr, buf.size(), &buf[0]); + + if ((size_t)::pwrite(fd, &buf[0], buf.size(), req.offset) != req.size) + throw std::runtime_error("could not write " + id + " @ " + std::to_string(req.offset)); + + cmd.respond(req.tag); +} + +device_list_t::device_list_t() + : devices(command_t::MAX_COMMANDS, &null_device), num_devices(0) +{ +} + +void device_list_t::register_device(device_t* dev) +{ + num_devices++; + assert(num_devices < command_t::MAX_DEVICES); + devices[num_devices-1] = dev; +} + +void device_list_t::handle_command(command_t cmd) +{ + devices[cmd.device()]->handle_command(cmd); +} + +void device_list_t::tick() +{ + for (size_t i = 0; i < num_devices; i++) + devices[i]->tick(); +} diff --git a/fesvr/device.h b/fesvr/device.h new file mode 100644 index 0000000000..1387b745ff --- /dev/null +++ b/fesvr/device.h @@ -0,0 +1,118 @@ +#ifndef _DEVICE_H +#define _DEVICE_H + +#include +#include +#include +#include +#include + +class memif_t; + +class command_t +{ + public: + typedef std::function callback_t; + command_t(memif_t& memif, uint64_t tohost, callback_t cb) + : _memif(memif), tohost(tohost), cb(cb) {} + + memif_t& memif() { return _memif; } + uint8_t device() { return tohost >> 56; } + uint8_t cmd() { return tohost >> 48; } + uint64_t payload() { return tohost << 16 >> 16; } + void respond(uint64_t resp) { cb((tohost >> 48 << 48) | (resp << 16 >> 16)); } + + static const size_t MAX_COMMANDS = 256; + static const size_t MAX_DEVICES = 256; + + private: + memif_t& _memif; + uint64_t tohost; + callback_t cb; +}; + +class device_t +{ + public: + device_t(); + virtual ~device_t() {} + virtual const char* identity() = 0; + virtual void tick() {} + + void handle_command(command_t cmd); + + protected: + typedef std::function command_func_t; + void register_command(size_t, command_func_t, const char*); + + private: + device_t& operator = (const device_t&); // disallow + device_t(const device_t&); // disallow + + static const size_t IDENTITY_SIZE = 64; + void handle_null_command(command_t cmd); + void handle_identify(command_t cmd); + + std::vector command_handlers; + std::vector command_names; +}; + +class bcd_t : public device_t +{ + public: + bcd_t(); + const char* identity() { return "bcd"; } + void tick(); + + private: + void handle_read(command_t cmd); + void handle_write(command_t cmd); + + std::queue pending_reads; +}; + +class disk_t : public device_t +{ + public: + disk_t(const char* fn); + ~disk_t(); + const char* identity() { return id.c_str(); } + + private: + struct request_t + { + uint64_t addr; + uint64_t offset; + uint64_t size; + uint64_t tag; + }; + + void handle_read(command_t cmd); + void handle_write(command_t cmd); + + std::string id; + size_t size; + int fd; +}; + +class null_device_t : public device_t +{ + public: + const char* identity() { return ""; } +}; + +class device_list_t +{ + public: + device_list_t(); + void register_device(device_t* dev); + void handle_command(command_t cmd); + void tick(); + + private: + std::vector devices; + null_device_t null_device; + size_t num_devices; +}; + +#endif diff --git a/fesvr/dtm.cc b/fesvr/dtm.cc new file mode 100644 index 0000000000..418ac63abd --- /dev/null +++ b/fesvr/dtm.cc @@ -0,0 +1,645 @@ +#include "dtm.h" +#include "debug_defines.h" +#include "encoding.h" +#include +#include +#include +#include +#include +#include + +#define RV_X(x, s, n) \ + (((x) >> (s)) & ((1 << (n)) - 1)) +#define ENCODE_ITYPE_IMM(x) \ + (RV_X(x, 0, 12) << 20) +#define ENCODE_STYPE_IMM(x) \ + ((RV_X(x, 0, 5) << 7) | (RV_X(x, 5, 7) << 25)) +#define ENCODE_SBTYPE_IMM(x) \ + ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) +#define ENCODE_UTYPE_IMM(x) \ + (RV_X(x, 12, 20) << 12) +#define ENCODE_UJTYPE_IMM(x) \ + ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) + +#define LOAD(xlen, dst, base, imm) \ + (((xlen) == 64 ? 0x00003003 : 0x00002003) \ + | ((dst) << 7) | ((base) << 15) | (uint32_t)ENCODE_ITYPE_IMM(imm)) +#define STORE(xlen, src, base, imm) \ + (((xlen) == 64 ? 0x00003023 : 0x00002023) \ + | ((src) << 20) | ((base) << 15) | (uint32_t)ENCODE_STYPE_IMM(imm)) +#define JUMP(there, here) (0x6f | (uint32_t)ENCODE_UJTYPE_IMM((there) - (here))) +#define BNE(r1, r2, there, here) (0x1063 | ((r1) << 15) | ((r2) << 20) | (uint32_t)ENCODE_SBTYPE_IMM((there) - (here))) +#define ADDI(dst, src, imm) (0x13 | ((dst) << 7) | ((src) << 15) | (uint32_t)ENCODE_ITYPE_IMM(imm)) +#define SRL(dst, src, sh) (0x5033 | ((dst) << 7) | ((src) << 15) | ((sh) << 20)) +#define FENCE_I 0x100f +#define EBREAK 0x00100073 +#define X0 0 +#define S0 8 +#define S1 9 + +#define AC_AR_REGNO(x) ((0x1000 | x) << AC_ACCESS_REGISTER_REGNO_OFFSET) +#define AC_AR_SIZE(x) (((x == 128)? 4 : (x == 64 ? 3 : 2)) << AC_ACCESS_REGISTER_SIZE_OFFSET) + +#define WRITE 1 +#define SET 2 +#define CLEAR 3 +#define CSRRx(type, dst, csr, src) (0x73 | ((type) << 12) | ((dst) << 7) | ((src) << 15) | (uint32_t)((csr) << 20)) + +#define get_field(reg, mask) (((reg) & (mask)) / ((mask) & ~((mask) << 1))) +#define set_field(reg, mask, val) (((reg) & ~(mask)) | (((val) * ((mask) & ~((mask) << 1))) & (mask))) + +#define RUN_AC_OR_DIE(a, b, c, d, e) { \ + uint32_t cmderr = run_abstract_command(a, b, c, d, e); \ + if (cmderr) { \ + die(cmderr); \ + } \ + } + +uint32_t dtm_t::do_command(dtm_t::req r) +{ + req_buf = r; + target->switch_to(); + assert(resp_buf.resp == 0); + return resp_buf.data; +} + +uint32_t dtm_t::read(uint32_t addr) +{ + return do_command((req){addr, 1, 0}); +} + +uint32_t dtm_t::write(uint32_t addr, uint32_t data) +{ + return do_command((req){addr, 2, data}); +} + +void dtm_t::nop() +{ + do_command((req){0, 0, 0}); +} + +void dtm_t::select_hart(int hartsel) { + int dmcontrol = read(DMI_DMCONTROL); + write (DMI_DMCONTROL, set_field(dmcontrol, DMI_DMCONTROL_HARTSEL, hartsel)); + current_hart = hartsel; +} + +int dtm_t::enumerate_harts() { + int max_hart = (1 << DMI_DMCONTROL_HARTSEL_LENGTH) - 1; + write(DMI_DMCONTROL, set_field(read(DMI_DMCONTROL), DMI_DMCONTROL_HARTSEL, max_hart)); + read(DMI_DMSTATUS); + max_hart = get_field(read(DMI_DMCONTROL), DMI_DMCONTROL_HARTSEL); + + int hartsel; + for (hartsel = 0; hartsel <= max_hart; hartsel++) { + select_hart(hartsel); + int dmstatus = read(DMI_DMSTATUS); + if (get_field(dmstatus, DMI_DMSTATUS_ANYNONEXISTENT)) + break; + } + return hartsel; +} + +void dtm_t::halt(int hartsel) +{ + if (running) { + write(DMI_DMCONTROL, DMI_DMCONTROL_DMACTIVE); + // Read dmstatus to avoid back-to-back writes to dmcontrol. + read(DMI_DMSTATUS); + } + + int dmcontrol = DMI_DMCONTROL_HALTREQ | DMI_DMCONTROL_DMACTIVE; + dmcontrol = set_field(dmcontrol, DMI_DMCONTROL_HARTSEL, hartsel); + write(DMI_DMCONTROL, dmcontrol); + int dmstatus; + do { + dmstatus = read(DMI_DMSTATUS); + } while(get_field(dmstatus, DMI_DMSTATUS_ALLHALTED) == 0); + dmcontrol &= ~DMI_DMCONTROL_HALTREQ; + write(DMI_DMCONTROL, dmcontrol); + // Read dmstatus to avoid back-to-back writes to dmcontrol. + read(DMI_DMSTATUS); + current_hart = hartsel; +} + +void dtm_t::resume(int hartsel) +{ + int dmcontrol = DMI_DMCONTROL_RESUMEREQ | DMI_DMCONTROL_DMACTIVE; + dmcontrol = set_field(dmcontrol, DMI_DMCONTROL_HARTSEL, hartsel); + write(DMI_DMCONTROL, dmcontrol); + int dmstatus; + do { + dmstatus = read(DMI_DMSTATUS); + } while (get_field(dmstatus, DMI_DMSTATUS_ALLRESUMEACK) == 0); + dmcontrol &= ~DMI_DMCONTROL_RESUMEREQ; + write(DMI_DMCONTROL, dmcontrol); + // Read dmstatus to avoid back-to-back writes to dmcontrol. + read(DMI_DMSTATUS); + current_hart = hartsel; + + if (running) { + write(DMI_DMCONTROL, DMI_DMCONTROL_DMACTIVE); + // Read dmstatus to avoid back-to-back writes to dmcontrol. + read(DMI_DMSTATUS); + } +} + +uint64_t dtm_t::save_reg(unsigned regno) +{ + uint32_t data[xlen/(8*4)]; + uint32_t command = AC_ACCESS_REGISTER_TRANSFER | AC_AR_SIZE(xlen) | AC_AR_REGNO(regno); + RUN_AC_OR_DIE(command, 0, 0, data, xlen / (8*4)); + + uint64_t result = data[0]; + if (xlen > 32) { + result |= ((uint64_t)data[1]) << 32; + } + return result; +} + +void dtm_t::restore_reg(unsigned regno, uint64_t val) +{ + uint32_t data[xlen/(8*4)]; + data[0] = (uint32_t) val; + if (xlen > 32) { + data[1] = (uint32_t) (val >> 32); + } + + uint32_t command = AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(regno); + + RUN_AC_OR_DIE(command, 0, 0, data, xlen / (8*4)); + +} + +uint32_t dtm_t::run_abstract_command(uint32_t command, + const uint32_t program[], size_t program_n, + uint32_t data[], size_t data_n) +{ + assert(program_n <= ram_words); + assert(data_n <= data_words); + + for (size_t i = 0; i < program_n; i++) { + write(DMI_PROGBUF0 + i, program[i]); + } + + if (get_field(command, AC_ACCESS_REGISTER_WRITE) && + get_field(command, AC_ACCESS_REGISTER_TRANSFER)) { + for (size_t i = 0; i < data_n; i++) { + write(DMI_DATA0 + i, data[i]); + } + } + + write(DMI_COMMAND, command); + + // Wait for not busy and then check for error. + uint32_t abstractcs; + do { + abstractcs = read(DMI_ABSTRACTCS); + } while (abstractcs & DMI_ABSTRACTCS_BUSY); + + if ((get_field(command, AC_ACCESS_REGISTER_WRITE) == 0) && + get_field(command, AC_ACCESS_REGISTER_TRANSFER)) { + for (size_t i = 0; i < data_n; i++){ + data[i] = read(DMI_DATA0 + i); + } + } + + return get_field(abstractcs, DMI_ABSTRACTCS_CMDERR); + +} + +size_t dtm_t::chunk_align() +{ + return xlen / 8; +} + +void dtm_t::read_chunk(uint64_t taddr, size_t len, void* dst) +{ + uint32_t prog[ram_words]; + uint32_t data[data_words]; + + uint8_t * curr = (uint8_t*) dst; + + halt(current_hart); + + uint64_t s0 = save_reg(S0); + uint64_t s1 = save_reg(S1); + + prog[0] = LOAD(xlen, S1, S0, 0); + prog[1] = ADDI(S0, S0, xlen/8); + prog[2] = EBREAK; + + data[0] = (uint32_t) taddr; + if (xlen > 32) { + data[1] = (uint32_t) (taddr >> 32); + } + + // Write s0 with the address, then execute program buffer. + // This will get S1 with the data and increment s0. + uint32_t command = AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_ACCESS_REGISTER_POSTEXEC | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(S0); + + RUN_AC_OR_DIE(command, prog, 3, data, xlen/(4*8)); + + // TODO: could use autoexec here. + for (size_t i = 0; i < (len * 8 / xlen); i++){ + command = AC_ACCESS_REGISTER_TRANSFER | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(S1); + if ((i + 1) < (len * 8 / xlen)) { + command |= AC_ACCESS_REGISTER_POSTEXEC; + } + + RUN_AC_OR_DIE(command, 0, 0, data, xlen/(4*8)); + + memcpy(curr, data, xlen/8); + curr += xlen/8; + } + + restore_reg(S0, s0); + restore_reg(S1, s1); + + resume(current_hart); + +} + +void dtm_t::write_chunk(uint64_t taddr, size_t len, const void* src) +{ + uint32_t prog[ram_words]; + uint32_t data[data_words]; + + const uint8_t * curr = (const uint8_t*) src; + + halt(current_hart); + + uint64_t s0 = save_reg(S0); + uint64_t s1 = save_reg(S1); + + prog[0] = STORE(xlen, S1, S0, 0); + prog[1] = ADDI(S0, S0, xlen/8); + prog[2] = EBREAK; + + data[0] = (uint32_t) taddr; + if (xlen > 32) { + data[1] = (uint32_t) (taddr >> 32); + } + + // Write the program (not used yet). + // Write s0 with the address. + uint32_t command = AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(S0); + + RUN_AC_OR_DIE(command, prog, 3, data, xlen/(4*8)); + + // Use Autoexec for more than one word of transfer. + // Write S1 with data, then execution stores S1 to + // 0(S0) and increments S0. + // Each time we write XLEN bits. + memcpy(data, curr, xlen/8); + curr += xlen/8; + + command = AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_POSTEXEC | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(S1); + + RUN_AC_OR_DIE(command, 0, 0, data, xlen/(4*8)); + + uint32_t abstractcs; + for (size_t i = 1; i < (len * 8 / xlen); i++){ + if (i == 1) { + write(DMI_ABSTRACTAUTO, 1 << DMI_ABSTRACTAUTO_AUTOEXECDATA_OFFSET); + } + memcpy(data, curr, xlen/8); + curr += xlen/8; + if (xlen == 64) { + write(DMI_DATA0 + 1, data[1]); + } + write(DMI_DATA0, data[0]); //Triggers a command w/ autoexec. + + do { + abstractcs = read(DMI_ABSTRACTCS); + } while (abstractcs & DMI_ABSTRACTCS_BUSY); + if ( get_field(abstractcs, DMI_ABSTRACTCS_CMDERR)) { + die(get_field(abstractcs, DMI_ABSTRACTCS_CMDERR)); + } + } + if ((len * 8 / xlen) > 1) { + write(DMI_ABSTRACTAUTO, 0); + } + + restore_reg(S0, s0); + restore_reg(S1, s1); + resume(current_hart); +} + +void dtm_t::die(uint32_t cmderr) +{ + const char * codes[] = { + "OK", + "BUSY", + "NOT_SUPPORTED", + "EXCEPTION", + "HALT/RESUME" + }; + const char * msg; + if (cmderr < (sizeof(codes) / sizeof(*codes))){ + msg = codes[cmderr]; + } else { + msg = "OTHER"; + } + //throw std::runtime_error("Debug Abstract Command Error #" + std::to_string(cmderr) + "(" + msg + ")"); + printf("ERROR: %s:%d, Debug Abstract Command Error #%d (%s)", __FILE__, __LINE__, cmderr, msg); + printf("ERROR: %s:%d, Should die, but allowing simulation to continue and fail.", __FILE__, __LINE__); + write(DMI_ABSTRACTCS, DMI_ABSTRACTCS_CMDERR); + +} + +void dtm_t::clear_chunk(uint64_t taddr, size_t len) +{ + uint32_t prog[ram_words]; + uint32_t data[data_words]; + + halt(current_hart); + uint64_t s0 = save_reg(S0); + uint64_t s1 = save_reg(S1); + + uint32_t command; + + // S0 = Addr + data[0] = (uint32_t) taddr; + data[1] = (uint32_t) (taddr >> 32); + command = AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(S0); + RUN_AC_OR_DIE(command, 0, 0, data, xlen/(4*8)); + + // S1 = Addr + len, loop until S0 = S1 + prog[0] = STORE(xlen, X0, S0, 0); + prog[1] = ADDI(S0, S0, xlen/8); + prog[2] = BNE(S0, S1, 0*4, 2*4); + prog[3] = EBREAK; + + data[0] = (uint32_t) (taddr + len); + data[1] = (uint32_t) ((taddr + len) >> 32); + command = AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(S1) | + AC_ACCESS_REGISTER_POSTEXEC; + RUN_AC_OR_DIE(command, prog, 4, data, xlen/(4*8)); + + restore_reg(S0, s0); + restore_reg(S1, s1); + + resume(current_hart); +} + +uint64_t dtm_t::write_csr(unsigned which, uint64_t data) +{ + return modify_csr(which, data, WRITE); +} + +uint64_t dtm_t::set_csr(unsigned which, uint64_t data) +{ + return modify_csr(which, data, SET); +} + +uint64_t dtm_t::clear_csr(unsigned which, uint64_t data) +{ + return modify_csr(which, data, CLEAR); +} + +uint64_t dtm_t::read_csr(unsigned which) +{ + return set_csr(which, 0); +} + +uint64_t dtm_t::modify_csr(unsigned which, uint64_t data, uint32_t type) +{ + halt(current_hart); + + // This code just uses DSCRATCH to save S0 + // and data_base to do the transfer so we don't + // need to run more commands to save and restore + // S0. + uint32_t prog[] = { + CSRRx(WRITE, S0, CSR_DSCRATCH0, S0), + LOAD(xlen, S0, X0, data_base), + CSRRx(type, S0, which, S0), + STORE(xlen, S0, X0, data_base), + CSRRx(WRITE, S0, CSR_DSCRATCH0, S0), + EBREAK + }; + + //TODO: Use transfer = 0. For now both HW and OpenOCD + // ignore transfer bit, so use "store to X0" NOOP. + // We sort of need this anyway because run_abstract_command + // needs the DATA to be written so may as well use the WRITE flag. + + uint32_t adata[] = {(uint32_t) data, + (uint32_t) (data >> 32)}; + + uint32_t command = AC_ACCESS_REGISTER_POSTEXEC | + AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(X0); + + RUN_AC_OR_DIE(command, prog, sizeof(prog) / sizeof(*prog), adata, xlen/(4*8)); + + uint64_t res = read(DMI_DATA0);//adata[0]; + if (xlen == 64) + res |= read(DMI_DATA0 + 1);//((uint64_t) adata[1]) << 32; + + resume(current_hart); + return res; +} + +size_t dtm_t::chunk_max_size() +{ + // Arbitrary choice. 4k Page size seems reasonable. + return 4096; +} + +uint32_t dtm_t::get_xlen() +{ + // Attempt to read S0 to find out what size it is. + // You could also attempt to run code, but you need to save registers + // to do that anyway. If what you really want to do is figure out + // the size of S0 so you can save it later, then do that. + uint32_t command = AC_ACCESS_REGISTER_TRANSFER | AC_AR_REGNO(S0); + uint32_t cmderr; + + const uint32_t prog[] = {}; + uint32_t data[] = {}; + + cmderr = run_abstract_command(command | AC_AR_SIZE(128), prog, 0, data, 0); + if (cmderr == 0){ + throw std::runtime_error("FESVR DTM Does not support 128-bit"); + abort(); + return 128; + } + write(DMI_ABSTRACTCS, DMI_ABSTRACTCS_CMDERR); + + cmderr = run_abstract_command(command | AC_AR_SIZE(64), prog, 0, data, 0); + if (cmderr == 0){ + return 64; + } + write(DMI_ABSTRACTCS, DMI_ABSTRACTCS_CMDERR); + + cmderr = run_abstract_command(command | AC_AR_SIZE(32), prog, 0, data, 0); + if (cmderr == 0){ + return 32; + } + + throw std::runtime_error("FESVR DTM can't determine XLEN. Aborting"); +} + +void dtm_t::fence_i() +{ + halt(current_hart); + + const uint32_t prog[] = { + FENCE_I, + EBREAK + }; + + //TODO: Use the transfer = 0. + uint32_t command = AC_ACCESS_REGISTER_POSTEXEC | + AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(X0); + + RUN_AC_OR_DIE(command, prog, sizeof(prog)/sizeof(*prog), 0, 0); + + resume(current_hart); + +} + +void host_thread_main(void* arg) +{ + ((dtm_t*)arg)->producer_thread(); +} + +void dtm_t::reset() +{ + for (int hartsel = 0; hartsel < num_harts; hartsel ++ ){ + select_hart(hartsel); + // this command also does a halt and resume + fence_i(); + // after this command, the hart will run from _start. + write_csr(0x7b1, get_entry_point()); + } + // In theory any hart can handle the memory accesses, + // this will enforce that hart 0 handles them. + select_hart(0); + read(DMI_DMSTATUS); +} + +void dtm_t::idle() +{ + for (int idle_cycles = 0; idle_cycles < max_idle_cycles; idle_cycles++) + nop(); +} + +void dtm_t::producer_thread() +{ + // Learn about the Debug Module and assert things we + // depend on in this code. + + // Enable the debugger. + write(DMI_DMCONTROL, DMI_DMCONTROL_DMACTIVE); + // Poll until the debugger agrees it's enabled. + while ((read(DMI_DMCONTROL) & DMI_DMCONTROL_DMACTIVE) == 0) ; + + // These are checked every time we run an abstract command. + uint32_t abstractcs = read(DMI_ABSTRACTCS); + ram_words = get_field(abstractcs, DMI_ABSTRACTCS_PROGSIZE); + data_words = get_field(abstractcs, DMI_ABSTRACTCS_DATACOUNT); + + // These things are only needed for the 'modify_csr' function. + // That could be re-written to not use these at some performance + // overhead. + uint32_t hartinfo = read(DMI_HARTINFO); + assert(get_field(hartinfo, DMI_HARTINFO_NSCRATCH) > 0); + assert(get_field(hartinfo, DMI_HARTINFO_DATAACCESS)); + + data_base = get_field(hartinfo, DMI_HARTINFO_DATAADDR); + + num_harts = enumerate_harts(); + halt(0); + // Note: We don't support systems with heterogeneous XLEN. + // It's possible to do this at the cost of extra cycles. + xlen = get_xlen(); + resume(0); + + running = true; + + htif_t::run(); + + while (true) + nop(); +} + +void dtm_t::start_host_thread() +{ + req_wait = false; + resp_wait = false; + + target = context_t::current(); + host.init(host_thread_main, this); + host.switch_to(); +} + +dtm_t::dtm_t(int argc, char** argv) + : htif_t(argc, argv), running(false) +{ + start_host_thread(); +} + +dtm_t::~dtm_t() +{ +} + +void dtm_t::tick( + bool req_ready, + bool resp_valid, + resp resp_bits) +{ + if (!resp_wait) { + if (!req_wait) { + req_wait = true; + } else if (req_ready) { + req_wait = false; + resp_wait = true; + } + } + + if (resp_valid) { + assert(resp_wait); + resp_wait = false; + + resp_buf = resp_bits; + // update the target with the current context + target = context_t::current(); + host.switch_to(); + } +} + +void dtm_t::return_resp(resp resp_bits){ + resp_buf = resp_bits; + target = context_t::current(); + host.switch_to(); +} diff --git a/fesvr/dtm.h b/fesvr/dtm.h new file mode 100644 index 0000000000..fbf161efec --- /dev/null +++ b/fesvr/dtm.h @@ -0,0 +1,115 @@ +#ifndef _ROCKET_DTM_H +#define _ROCKET_DTM_H + +#include "htif.h" +#include "context.h" +#include +#include +#include +#include +#include +#include + +// abstract debug transport module +class dtm_t : public htif_t +{ + public: + dtm_t(int argc, char**argv); + ~dtm_t(); + + struct req { + uint32_t addr; + uint32_t op; + uint32_t data; + }; + + struct resp { + uint32_t resp; + uint32_t data; + }; + + void tick( + bool req_ready, + bool resp_valid, + resp resp_bits + ); + // Akin to tick, but the target thread returns a response on every invocation + void return_resp( + resp resp_bits + ); + + + bool req_valid() { return req_wait; } + req req_bits() { return req_buf; } + bool resp_ready() { return true; } + + uint32_t read(uint32_t addr); + uint32_t write(uint32_t addr, uint32_t data); + void nop(); + + uint64_t read_csr(unsigned which); + uint64_t write_csr(unsigned which, uint64_t data); + uint64_t clear_csr(unsigned which, uint64_t data); + uint64_t set_csr(unsigned which, uint64_t data); + void fence_i(); + + void producer_thread(); + + protected: + virtual void read_chunk(addr_t taddr, size_t len, void* dst) override; + virtual void write_chunk(addr_t taddr, size_t len, const void* src) override; + virtual void clear_chunk(addr_t taddr, size_t len) override; + virtual size_t chunk_align() override; + virtual size_t chunk_max_size() override; + virtual void reset() override; + virtual void idle() override; + + private: + context_t host; + context_t* target; + pthread_t producer; + sem_t req_produce; + sem_t req_consume; + sem_t resp_produce; + sem_t resp_consume; + req req_buf; + resp resp_buf; + bool running; + + uint32_t run_abstract_command(uint32_t command, const uint32_t program[], size_t program_n, + uint32_t data[], size_t data_n); + + void die(uint32_t cmderr); + void halt(int); + int enumerate_harts(); + void select_hart(int); + void resume(int); + uint64_t save_reg(unsigned regno); + void restore_reg(unsigned regno, uint64_t val); + + uint64_t modify_csr(unsigned which, uint64_t data, uint32_t type); + + bool req_wait; + bool resp_wait; + uint32_t data_base; + + uint32_t xlen; + + static const int max_idle_cycles = 10000; + + size_t ram_words; + size_t data_words; + int num_harts; + int current_hart; + + uint32_t get_xlen(); + uint32_t do_command(dtm_t::req r); + + void parse_args(const std::vector& args); + void register_devices(); + void start_host_thread(); + + friend class memif_t; +}; + +#endif diff --git a/fesvr/dummy.cc b/fesvr/dummy.cc new file mode 100644 index 0000000000..a155d3e56c --- /dev/null +++ b/fesvr/dummy.cc @@ -0,0 +1,4 @@ +// See LICENSE for license details. + +// help out poor, C-centric autoconf +extern "C" void libfesvr_is_present() {} diff --git a/fesvr/elf.h b/fesvr/elf.h new file mode 100644 index 0000000000..a213832755 --- /dev/null +++ b/fesvr/elf.h @@ -0,0 +1,132 @@ +// See LICENSE for details. + +#ifndef _ELF_H +#define _ELF_H + +#include + +#define ET_EXEC 2 +#define EM_RISCV 243 +#define EM_NONE 0 +#define EV_CURRENT 1 + +#define IS_ELF(hdr) \ + ((hdr).e_ident[0] == 0x7f && (hdr).e_ident[1] == 'E' && \ + (hdr).e_ident[2] == 'L' && (hdr).e_ident[3] == 'F') + +#define IS_ELF32(hdr) (IS_ELF(hdr) && (hdr).e_ident[4] == 1) +#define IS_ELF64(hdr) (IS_ELF(hdr) && (hdr).e_ident[4] == 2) +#define IS_ELFLE(hdr) (IS_ELF(hdr) && (hdr).e_ident[5] == 1) +#define IS_ELFBE(hdr) (IS_ELF(hdr) && (hdr).e_ident[5] == 2) +#define IS_ELF_EXEC(hdr) (IS_ELF(hdr) && (hdr).e_type == ET_EXEC) +#define IS_ELF_RISCV(hdr) (IS_ELF(hdr) && (hdr).e_machine == EM_RISCV) +#define IS_ELF_EM_NONE(hdr) (IS_ELF(hdr) && (hdr).e_machine == EM_NONE) +#define IS_ELF_VCURRENT(hdr) (IS_ELF(hdr) && (hdr).e_version == EV_CURRENT) + +#define PT_LOAD 1 + +#define SHT_NOBITS 8 + +typedef struct { + uint8_t e_ident[16]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint32_t e_entry; + uint32_t e_phoff; + uint32_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} Elf32_Ehdr; + +typedef struct { + uint32_t sh_name; + uint32_t sh_type; + uint32_t sh_flags; + uint32_t sh_addr; + uint32_t sh_offset; + uint32_t sh_size; + uint32_t sh_link; + uint32_t sh_info; + uint32_t sh_addralign; + uint32_t sh_entsize; +} Elf32_Shdr; + +typedef struct +{ + uint32_t p_type; + uint32_t p_offset; + uint32_t p_vaddr; + uint32_t p_paddr; + uint32_t p_filesz; + uint32_t p_memsz; + uint32_t p_flags; + uint32_t p_align; +} Elf32_Phdr; + +typedef struct +{ + uint32_t st_name; + uint32_t st_value; + uint32_t st_size; + uint8_t st_info; + uint8_t st_other; + uint16_t st_shndx; +} Elf32_Sym; + +typedef struct { + uint8_t e_ident[16]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint64_t e_entry; + uint64_t e_phoff; + uint64_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} Elf64_Ehdr; + +typedef struct { + uint32_t sh_name; + uint32_t sh_type; + uint64_t sh_flags; + uint64_t sh_addr; + uint64_t sh_offset; + uint64_t sh_size; + uint32_t sh_link; + uint32_t sh_info; + uint64_t sh_addralign; + uint64_t sh_entsize; +} Elf64_Shdr; + +typedef struct { + uint32_t p_type; + uint32_t p_flags; + uint64_t p_offset; + uint64_t p_vaddr; + uint64_t p_paddr; + uint64_t p_filesz; + uint64_t p_memsz; + uint64_t p_align; +} Elf64_Phdr; + +typedef struct { + uint32_t st_name; + uint8_t st_info; + uint8_t st_other; + uint16_t st_shndx; + uint64_t st_value; + uint64_t st_size; +} Elf64_Sym; + +#endif diff --git a/fesvr/elf2hex.cc b/fesvr/elf2hex.cc new file mode 100644 index 0000000000..327cf2d933 --- /dev/null +++ b/fesvr/elf2hex.cc @@ -0,0 +1,47 @@ +// See LICENSE for license details. + +#include +#include "htif_hexwriter.h" +#include "memif.h" +#include "elfloader.h" + +int main(int argc, char** argv) +{ + if(argc < 4 || argc > 5) + { + std::cerr << "Usage: " << argv[0] << " [base]" << std::endl; + return 1; + } + + unsigned width = atoi(argv[1]); + if(width == 0 || (width & (width-1))) + { + std::cerr << "width must be a power of 2" << std::endl; + return 1; + } + + unsigned long long int base = 0; + if(argc==5) { + base = atoll(argv[4]); + if(base & (width-1)) + { + std::cerr << "base must be divisible by width" << std::endl; + return 1; + } + } + + unsigned depth = atoi(argv[2]); + if(depth == 0 || (depth & (depth-1))) + { + std::cerr << "depth must be a power of 2" << std::endl; + return 1; + } + + htif_hexwriter_t htif(base, width, depth); + memif_t memif(&htif); + reg_t entry; + load_elf(argv[3], &memif, &entry); + std::cout << htif; + + return 0; +} diff --git a/fesvr/elfloader.cc b/fesvr/elfloader.cc new file mode 100644 index 0000000000..a4bae1e7d7 --- /dev/null +++ b/fesvr/elfloader.cc @@ -0,0 +1,94 @@ +// See LICENSE for license details. + +#include "elf.h" +#include "memif.h" +#include "byteorder.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +std::map load_elf(const char* fn, memif_t* memif, reg_t* entry) +{ + int fd = open(fn, O_RDONLY); + struct stat s; + assert(fd != -1); + if (fstat(fd, &s) < 0) + abort(); + size_t size = s.st_size; + + char* buf = (char*)mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + assert(buf != MAP_FAILED); + close(fd); + + assert(size >= sizeof(Elf64_Ehdr)); + const Elf64_Ehdr* eh64 = (const Elf64_Ehdr*)buf; + assert(IS_ELF32(*eh64) || IS_ELF64(*eh64)); + assert(IS_ELFLE(*eh64)); + assert(IS_ELF_EXEC(*eh64)); + assert(IS_ELF_RISCV(*eh64) || IS_ELF_EM_NONE(*eh64)); + assert(IS_ELF_VCURRENT(*eh64)); + + std::vector zeros; + std::map symbols; + + #define LOAD_ELF(ehdr_t, phdr_t, shdr_t, sym_t, bswap) do { \ + ehdr_t* eh = (ehdr_t*)buf; \ + phdr_t* ph = (phdr_t*)(buf + bswap(eh->e_phoff)); \ + *entry = bswap(eh->e_entry); \ + assert(size >= bswap(eh->e_phoff) + bswap(eh->e_phnum)*sizeof(*ph)); \ + for (unsigned i = 0; i < bswap(eh->e_phnum); i++) { \ + if(bswap(ph[i].p_type) == PT_LOAD && bswap(ph[i].p_memsz)) { \ + if (bswap(ph[i].p_filesz)) { \ + assert(size >= bswap(ph[i].p_offset) + bswap(ph[i].p_filesz)); \ + memif->write(bswap(ph[i].p_paddr), bswap(ph[i].p_filesz), (uint8_t*)buf + bswap(ph[i].p_offset)); \ + } \ + zeros.resize(bswap(ph[i].p_memsz) - bswap(ph[i].p_filesz)); \ + memif->write(bswap(ph[i].p_paddr) + bswap(ph[i].p_filesz), bswap(ph[i].p_memsz) - bswap(ph[i].p_filesz), &zeros[0]); \ + } \ + } \ + shdr_t* sh = (shdr_t*)(buf + bswap(eh->e_shoff)); \ + assert(size >= bswap(eh->e_shoff) + bswap(eh->e_shnum)*sizeof(*sh)); \ + assert(bswap(eh->e_shstrndx) < bswap(eh->e_shnum)); \ + assert(size >= bswap(sh[bswap(eh->e_shstrndx)].sh_offset) + bswap(sh[bswap(eh->e_shstrndx)].sh_size)); \ + char *shstrtab = buf + bswap(sh[bswap(eh->e_shstrndx)].sh_offset); \ + unsigned strtabidx = 0, symtabidx = 0; \ + for (unsigned i = 0; i < bswap(eh->e_shnum); i++) { \ + unsigned max_len = bswap(sh[bswap(eh->e_shstrndx)].sh_size) - bswap(sh[i].sh_name); \ + assert(bswap(sh[i].sh_name) < bswap(sh[bswap(eh->e_shstrndx)].sh_size)); \ + assert(strnlen(shstrtab + bswap(sh[i].sh_name), max_len) < max_len); \ + if (bswap(sh[i].sh_type) & SHT_NOBITS) continue; \ + assert(size >= bswap(sh[i].sh_offset) + bswap(sh[i].sh_size)); \ + if (strcmp(shstrtab + bswap(sh[i].sh_name), ".strtab") == 0) \ + strtabidx = i; \ + if (strcmp(shstrtab + bswap(sh[i].sh_name), ".symtab") == 0) \ + symtabidx = i; \ + } \ + if (strtabidx && symtabidx) { \ + char* strtab = buf + bswap(sh[strtabidx].sh_offset); \ + sym_t* sym = (sym_t*)(buf + bswap(sh[symtabidx].sh_offset)); \ + for (unsigned i = 0; i < bswap(sh[symtabidx].sh_size)/sizeof(sym_t); i++) { \ + unsigned max_len = bswap(sh[strtabidx].sh_size) - bswap(sym[i].st_name); \ + assert(bswap(sym[i].st_name) < bswap(sh[strtabidx].sh_size)); \ + assert(strnlen(strtab + bswap(sym[i].st_name), max_len) < max_len); \ + symbols[strtab + bswap(sym[i].st_name)] = bswap(sym[i].st_value); \ + } \ + } \ + } while(0) + + if (IS_ELF32(*eh64)) + LOAD_ELF(Elf32_Ehdr, Elf32_Phdr, Elf32_Shdr, Elf32_Sym, from_le); + else + LOAD_ELF(Elf64_Ehdr, Elf64_Phdr, Elf64_Shdr, Elf64_Sym, from_le); + + munmap(buf, size); + + return symbols; +} diff --git a/fesvr/elfloader.h b/fesvr/elfloader.h new file mode 100644 index 0000000000..696ef47849 --- /dev/null +++ b/fesvr/elfloader.h @@ -0,0 +1,13 @@ +// See LICENSE for license details. + +#ifndef _ELFLOADER_H +#define _ELFLOADER_H + +#include "elf.h" +#include +#include + +class memif_t; +std::map load_elf(const char* fn, memif_t* memif, reg_t* entry); + +#endif diff --git a/fesvr/fesvr.ac b/fesvr/fesvr.ac new file mode 100644 index 0000000000..60e6c57fc3 --- /dev/null +++ b/fesvr/fesvr.ac @@ -0,0 +1 @@ +AC_CHECK_LIB(pthread, pthread_create, [], [AC_MSG_ERROR([libpthread is required])]) diff --git a/fesvr/fesvr.mk.in b/fesvr/fesvr.mk.in new file mode 100644 index 0000000000..30c8bfeb94 --- /dev/null +++ b/fesvr/fesvr.mk.in @@ -0,0 +1,40 @@ +fesvr_hdrs = \ + elf.h \ + elfloader.h \ + htif.h \ + dtm.h \ + memif.h \ + syscall.h \ + context.h \ + htif_pthread.h \ + htif_hexwriter.h \ + option_parser.h \ + term.h \ + device.h \ + rfb.h \ + tsi.h \ + +fesvr_CFLAGS = -fPIC + +fesvr_install_hdrs = $(fesvr_hdrs) + +fesvr_install_lib = yes + +fesvr_srcs = \ + elfloader.cc \ + htif.cc \ + memif.cc \ + dtm.cc \ + syscall.cc \ + device.cc \ + rfb.cc \ + context.cc \ + htif_pthread.cc \ + htif_hexwriter.cc \ + dummy.cc \ + option_parser.cc \ + term.cc \ + tsi.cc \ + +fesvr_install_prog_srcs = \ + elf2hex.cc \ diff --git a/fesvr/fesvr.pc.in b/fesvr/fesvr.pc.in new file mode 100644 index 0000000000..f2d12563b4 --- /dev/null +++ b/fesvr/fesvr.pc.in @@ -0,0 +1,26 @@ +#========================================================================= +# Modular C++ Build System Subproject Package Config +#========================================================================= +# Please read the documenation in 'mcppbs-uguide.txt' for more details +# on how the Modular C++ Build System works. + +#------------------------------------------------------------------------- +# Generic variables +#------------------------------------------------------------------------- + +prefix=@prefix@ +include_dir=${prefix}/include/fesvr +lib_dir=${prefix}/lib + +#------------------------------------------------------------------------- +# Keywords +#------------------------------------------------------------------------- + +Name : fesvr +Version : @PACKAGE_VERSION@ +Description : Frontend Server C/C++ API +Requires : @fesvr_pkcdeps@ +Cflags : -I${include_dir} @CPPFLAGS@ @fesvr_extra_cppflags@ +Libs : -L${lib_dir} @LDFLAGS@ @fesvr_extra_ldflags@ \ + -lfesvr @fesvr_extra_libs@ + diff --git a/fesvr/htif.cc b/fesvr/htif.cc new file mode 100644 index 0000000000..f828494654 --- /dev/null +++ b/fesvr/htif.cc @@ -0,0 +1,371 @@ +// See LICENSE for license details. + +#include "htif.h" +#include "rfb.h" +#include "elfloader.h" +#include "encoding.h" +#include "byteorder.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Attempt to determine the execution prefix automatically. autoconf + * sets PREFIX, and pconfigure sets __PCONFIGURE__PREFIX. */ +#if !defined(PREFIX) && defined(__PCONFIGURE__PREFIX) +# define PREFIX __PCONFIGURE__PREFIX +#endif + +#ifndef TARGET_ARCH +# define TARGET_ARCH "riscv64-unknown-elf" +#endif + +#ifndef TARGET_DIR +# define TARGET_DIR "/" TARGET_ARCH "/bin/" +#endif + +static volatile bool signal_exit = false; +static void handle_signal(int sig) +{ + if (sig == SIGABRT || signal_exit) // someone set up us the bomb! + exit(-1); + signal_exit = true; + signal(sig, &handle_signal); +} + +htif_t::htif_t() + : mem(this), entry(DRAM_BASE), sig_addr(0), sig_len(0), + tohost_addr(0), fromhost_addr(0), exitcode(0), stopped(false), + syscall_proxy(this) +{ + signal(SIGINT, &handle_signal); + signal(SIGTERM, &handle_signal); + signal(SIGABRT, &handle_signal); // we still want to call static destructors +} + +htif_t::htif_t(int argc, char** argv) : htif_t() +{ + parse_arguments(argc, argv); + register_devices(); +} + +htif_t::htif_t(const std::vector& args) : htif_t() +{ + int argc = args.size() + 1; + char * argv[argc]; + argv[0] = (char *) "htif"; + for (unsigned int i = 0; i < args.size(); i++) { + argv[i+1] = (char *) args[i].c_str(); + } + + parse_arguments(argc, argv); + register_devices(); +} + +htif_t::~htif_t() +{ + for (auto d : dynamic_devices) + delete d; +} + +void htif_t::start() +{ + if (!targs.empty() && targs[0] != "none") + load_program(); + + reset(); +} + +std::map htif_t::load_payload(const std::string& payload, reg_t* entry) +{ + std::string path; + if (access(payload.c_str(), F_OK) == 0) + path = payload; + else if (payload.find('/') == std::string::npos) + { + std::string test_path = PREFIX TARGET_DIR + payload; + if (access(test_path.c_str(), F_OK) == 0) + path = test_path; + } + + if (path.empty()) + throw std::runtime_error( + "could not open " + payload + + " (did you misspell it? If VCS, did you forget +permissive/+permissive-off?)"); + + // temporarily construct a memory interface that skips writing bytes + // that have already been preloaded through a sideband + class preload_aware_memif_t : public memif_t { + public: + preload_aware_memif_t(htif_t* htif) : memif_t(htif), htif(htif) {} + + void write(addr_t taddr, size_t len, const void* src) override + { + if (!htif->is_address_preloaded(taddr, len)) + memif_t::write(taddr, len, src); + } + + private: + htif_t* htif; + } preload_aware_memif(this); + + return load_elf(path.c_str(), &preload_aware_memif, entry); +} + +void htif_t::load_program() +{ + std::map symbols = load_payload(targs[0], &entry); + + if (symbols.count("tohost") && symbols.count("fromhost")) { + tohost_addr = symbols["tohost"]; + fromhost_addr = symbols["fromhost"]; + } else { + fprintf(stderr, "warning: tohost and fromhost symbols not in ELF; can't communicate with target\n"); + } + + // detect torture tests so we can print the memory signature at the end + if (symbols.count("begin_signature") && symbols.count("end_signature")) + { + sig_addr = symbols["begin_signature"]; + sig_len = symbols["end_signature"] - sig_addr; + } + + for (auto payload : payloads) + { + reg_t dummy_entry; + load_payload(payload, &dummy_entry); + } + + for (auto i : symbols) + { + auto it = addr2symbol.find(i.second); + if ( it == addr2symbol.end()) + addr2symbol[i.second] = i.first; + } + + return; +} + +const char* htif_t::get_symbol(uint64_t addr) +{ + auto it = addr2symbol.find(addr); + + if(it == addr2symbol.end()) + return nullptr; + + return it->second.c_str(); +} + +void htif_t::stop() +{ + if (!sig_file.empty() && sig_len) // print final torture test signature + { + std::vector buf(sig_len); + mem.read(sig_addr, sig_len, &buf[0]); + + std::ofstream sigs(sig_file); + assert(sigs && "can't open signature file!"); + sigs << std::setfill('0') << std::hex; + + const addr_t incr = 16; + assert(sig_len % incr == 0); + for (addr_t i = 0; i < sig_len; i += incr) + { + for (addr_t j = incr; j > 0; j--) + sigs << std::setw(2) << (uint16_t)buf[i+j-1]; + sigs << '\n'; + } + + sigs.close(); + } + + stopped = true; +} + +void htif_t::clear_chunk(addr_t taddr, size_t len) +{ + char zeros[chunk_max_size()]; + memset(zeros, 0, chunk_max_size()); + + for (size_t pos = 0; pos < len; pos += chunk_max_size()) + write_chunk(taddr + pos, std::min(len - pos, chunk_max_size()), zeros); +} + +int htif_t::run() +{ + start(); + + auto enq_func = [](std::queue* q, uint64_t x) { q->push(x); }; + std::queue fromhost_queue; + std::function fromhost_callback = + std::bind(enq_func, &fromhost_queue, std::placeholders::_1); + + if (tohost_addr == 0) { + while (true) + idle(); + } + + while (!signal_exit && exitcode == 0) + { + if (auto tohost = from_le(mem.read_uint64(tohost_addr))) { + mem.write_uint64(tohost_addr, 0); + command_t cmd(mem, tohost, fromhost_callback); + device_list.handle_command(cmd); + } else { + idle(); + } + + device_list.tick(); + + if (!fromhost_queue.empty() && mem.read_uint64(fromhost_addr) == 0) { + mem.write_uint64(fromhost_addr, to_le(fromhost_queue.front())); + fromhost_queue.pop(); + } + } + + stop(); + + return exit_code(); +} + +bool htif_t::done() +{ + return stopped; +} + +int htif_t::exit_code() +{ + return exitcode >> 1; +} + +void htif_t::parse_arguments(int argc, char ** argv) +{ + optind = 0; // reset optind as HTIF may run getopt _after_ others + while (1) { + static struct option long_options[] = { HTIF_LONG_OPTIONS }; + int option_index = 0; + int c = getopt_long(argc, argv, "-h", long_options, &option_index); + + if (c == -1) break; + retry: + switch (c) { + case 'h': usage(argv[0]); + throw std::invalid_argument("User queried htif_t help text"); + case HTIF_LONG_OPTIONS_OPTIND: + if (optarg) dynamic_devices.push_back(new rfb_t(atoi(optarg))); + else dynamic_devices.push_back(new rfb_t); + break; + case HTIF_LONG_OPTIONS_OPTIND + 1: + // [TODO] Remove once disks are supported again + throw std::invalid_argument("--disk/+disk unsupported (use a ramdisk)"); + dynamic_devices.push_back(new disk_t(optarg)); + break; + case HTIF_LONG_OPTIONS_OPTIND + 2: + sig_file = optarg; + break; + case HTIF_LONG_OPTIONS_OPTIND + 3: + syscall_proxy.set_chroot(optarg); + break; + case HTIF_LONG_OPTIONS_OPTIND + 4: + payloads.push_back(optarg); + break; + case '?': + if (!opterr) + break; + throw std::invalid_argument("Unknown argument (did you mean to enable +permissive parsing?)"); + case 1: { + std::string arg = optarg; + if (arg == "+h" || arg == "+help") { + c = 'h'; + optarg = nullptr; + } + else if (arg == "+rfb") { + c = HTIF_LONG_OPTIONS_OPTIND; + optarg = nullptr; + } + else if (arg.find("+rfb=") == 0) { + c = HTIF_LONG_OPTIONS_OPTIND; + optarg = optarg + 5; + } + else if (arg.find("+disk=") == 0) { + c = HTIF_LONG_OPTIONS_OPTIND + 1; + optarg = optarg + 6; + } + else if (arg.find("+signature=") == 0) { + c = HTIF_LONG_OPTIONS_OPTIND + 2; + optarg = optarg + 11; + } + else if (arg.find("+chroot=") == 0) { + c = HTIF_LONG_OPTIONS_OPTIND + 3; + optarg = optarg + 8; + } + else if (arg.find("+payload=") == 0) { + c = HTIF_LONG_OPTIONS_OPTIND + 4; + optarg = optarg + 9; + } + else if (arg.find("+permissive-off") == 0) { + if (opterr) + throw std::invalid_argument("Found +permissive-off when not parsing permissively"); + opterr = 1; + break; + } + else if (arg.find("+permissive") == 0) { + if (!opterr) + throw std::invalid_argument("Found +permissive when already parsing permissively"); + opterr = 0; + break; + } + else { + if (!opterr) + break; + else { + optind--; + goto done_processing; + } + } + goto retry; + } + } + } + +done_processing: + while (optind < argc) + targs.push_back(argv[optind++]); + if (!targs.size()) { + usage(argv[0]); + throw std::invalid_argument("No binary specified (Did you forget it? Did you forget '+permissive-off' if running with +permissive?)"); + } +} + +void htif_t::register_devices() +{ + device_list.register_device(&syscall_proxy); + device_list.register_device(&bcd); + for (auto d : dynamic_devices) + device_list.register_device(d); +} + +void htif_t::usage(const char * program_name) +{ + printf("Usage: %s [EMULATOR OPTION]... [VERILOG PLUSARG]... [HOST OPTION]... BINARY [TARGET OPTION]...\n ", + program_name); + fputs("\ +Run a BINARY on the Rocket Chip emulator.\n\ +\n\ +Mandatory arguments to long options are mandatory for short options too.\n\ +\n\ +EMULATOR OPTIONS\n\ + Consult emulator.cc if using Verilator or VCS documentation if using VCS\n\ + for available options.\n\ +EMUALTOR VERILOG PLUSARGS\n\ + Consult generated-src*/*.plusArgs for available options\n\ +", stdout); + fputs("\n" HTIF_USAGE_OPTIONS, stdout); +} diff --git a/fesvr/htif.h b/fesvr/htif.h new file mode 100644 index 0000000000..5b16a60db4 --- /dev/null +++ b/fesvr/htif.h @@ -0,0 +1,126 @@ +// See LICENSE for license details. + +#ifndef __HTIF_H +#define __HTIF_H + +#include "memif.h" +#include "syscall.h" +#include "device.h" +#include +#include +#include + +class htif_t : public chunked_memif_t +{ + public: + htif_t(); + htif_t(int argc, char** argv); + htif_t(const std::vector& args); + virtual ~htif_t(); + + virtual void start(); + virtual void stop(); + + int run(); + bool done(); + int exit_code(); + + virtual memif_t& memif() { return mem; } + + protected: + virtual void reset() = 0; + + virtual void read_chunk(addr_t taddr, size_t len, void* dst) = 0; + virtual void write_chunk(addr_t taddr, size_t len, const void* src) = 0; + virtual void clear_chunk(addr_t taddr, size_t len); + + virtual size_t chunk_align() = 0; + virtual size_t chunk_max_size() = 0; + + virtual std::map load_payload(const std::string& payload, reg_t* entry); + virtual void load_program(); + virtual void idle() {} + + const std::vector& host_args() { return hargs; } + + reg_t get_entry_point() { return entry; } + + // indicates that the initial program load can skip writing this address + // range to memory, because it has already been loaded through a sideband + virtual bool is_address_preloaded(addr_t taddr, size_t len) { return false; } + + // Given an address, return symbol from addr2symbol map + const char* get_symbol(uint64_t addr); + + private: + void parse_arguments(int argc, char ** argv); + void register_devices(); + void usage(const char * program_name); + + memif_t mem; + reg_t entry; + bool writezeros; + std::vector hargs; + std::vector targs; + std::string sig_file; + addr_t sig_addr; // torture + addr_t sig_len; // torture + addr_t tohost_addr; + addr_t fromhost_addr; + int exitcode; + bool stopped; + + device_list_t device_list; + syscall_t syscall_proxy; + bcd_t bcd; + std::vector dynamic_devices; + std::vector payloads; + + const std::vector& target_args() { return targs; } + + std::map addr2symbol; + + friend class memif_t; + friend class syscall_t; +}; + +/* Alignment guide for emulator.cc options: + -x, --long-option Description with max 80 characters --------------->\n\ + +plus-arg-equivalent\n\ + */ +#define HTIF_USAGE_OPTIONS \ +"HOST OPTIONS\n\ + -h, --help Display this help and exit\n\ + +h, +help\n\ + +permissive The host will ignore any unparsed options up until\n\ + +permissive-off (Only needed for VCS)\n\ + +permissive-off Stop ignoring options. This is mandatory if using\n\ + +permissive (Only needed for VCS)\n\ + --rfb=DISPLAY Add new remote frame buffer on display DISPLAY\n\ + +rfb=DISPLAY to be accessible on 5900 + DISPLAY (default = 0)\n\ + --signature=FILE Write torture test signature to FILE\n\ + +signature=FILE\n\ + --chroot=PATH Use PATH as location of syscall-servicing binaries\n\ + +chroot=PATH\n\ + --payload=PATH Load PATH memory as an additional ELF payload\n\ + +payload=PATH\n\ +\n\ +HOST OPTIONS (currently unsupported)\n\ + --disk=DISK Add DISK device. Use a ramdisk since this isn't\n\ + +disk=DISK supported\n\ +\n\ +TARGET (RISC-V BINARY) OPTIONS\n\ + These are the options passed to the program executing on the emulated RISC-V\n\ + microprocessor.\n" + +#define HTIF_LONG_OPTIONS_OPTIND 1024 +#define HTIF_LONG_OPTIONS \ +{"help", no_argument, 0, 'h' }, \ +{"rfb", optional_argument, 0, HTIF_LONG_OPTIONS_OPTIND }, \ +{"disk", required_argument, 0, HTIF_LONG_OPTIONS_OPTIND + 1 }, \ +{"signature", required_argument, 0, HTIF_LONG_OPTIONS_OPTIND + 2 }, \ +{"chroot", required_argument, 0, HTIF_LONG_OPTIONS_OPTIND + 3 }, \ +{"payload", required_argument, 0, HTIF_LONG_OPTIONS_OPTIND + 4 }, \ +{0, 0, 0, 0} + +#endif // __HTIF_H diff --git a/fesvr/htif_hexwriter.cc b/fesvr/htif_hexwriter.cc new file mode 100644 index 0000000000..e4811b3bee --- /dev/null +++ b/fesvr/htif_hexwriter.cc @@ -0,0 +1,76 @@ +// See LICENSE for license details. + +#include +#include +#include "htif_hexwriter.h" + +htif_hexwriter_t::htif_hexwriter_t(size_t b, size_t w, size_t d) + : base(b), width(w), depth(d) +{ +} + +void htif_hexwriter_t::read_chunk(addr_t taddr, size_t len, void* vdst) +{ + taddr -= base; + + assert(len % chunk_align() == 0); + assert(taddr < width*depth); + assert(taddr+len <= width*depth); + + uint8_t* dst = (uint8_t*)vdst; + while(len) + { + if(mem[taddr/width].size() == 0) + mem[taddr/width].resize(width,0); + + for(size_t j = 0; j < width; j++) + dst[j] = mem[taddr/width][j]; + + len -= width; + taddr += width; + dst += width; + } +} + +void htif_hexwriter_t::write_chunk(addr_t taddr, size_t len, const void* vsrc) +{ + taddr -= base; + + assert(len % chunk_align() == 0); + assert(taddr < width*depth); + assert(taddr+len <= width*depth); + + const uint8_t* src = (const uint8_t*)vsrc; + while(len) + { + if(mem[taddr/width].size() == 0) + mem[taddr/width].resize(width,0); + + for(size_t j = 0; j < width; j++) + mem[taddr/width][j] = src[j]; + + len -= width; + taddr += width; + } +} + +std::ostream& operator<< (std::ostream& o, const htif_hexwriter_t& h) +{ + std::ios_base::fmtflags flags = o.setf(std::ios::hex,std::ios::basefield); + + for(size_t addr = 0; addr < h.depth; addr++) + { + std::map >::const_iterator i = h.mem.find(addr); + if(i == h.mem.end()) + for(size_t j = 0; j < h.width; j++) + o << "00"; + else + for(size_t j = 0; j < h.width; j++) + o << ((i->second[h.width-j-1] >> 4) & 0xF) << (i->second[h.width-j-1] & 0xF); + o << std::endl; + } + + o.setf(flags); + + return o; +} diff --git a/fesvr/htif_hexwriter.h b/fesvr/htif_hexwriter.h new file mode 100644 index 0000000000..725616626e --- /dev/null +++ b/fesvr/htif_hexwriter.h @@ -0,0 +1,32 @@ +// See LICENSE for license details. + +#ifndef __HTIF_HEXWRITER_H +#define __HTIF_HEXWRITER_H + +#include +#include +#include +#include "memif.h" + +class htif_hexwriter_t : public chunked_memif_t +{ +public: + htif_hexwriter_t(size_t b, size_t w, size_t d); + +protected: + size_t base; + size_t width; + size_t depth; + std::map > mem; + + void read_chunk(addr_t taddr, size_t len, void* dst); + void write_chunk(addr_t taddr, size_t len, const void* src); + void clear_chunk(addr_t taddr, size_t len) {} + + size_t chunk_max_size() { return width; } + size_t chunk_align() { return width; } + + friend std::ostream& operator<< (std::ostream&, const htif_hexwriter_t&); +}; + +#endif // __HTIF_HEXWRITER_H diff --git a/fesvr/htif_pthread.cc b/fesvr/htif_pthread.cc new file mode 100644 index 0000000000..b9e3832b94 --- /dev/null +++ b/fesvr/htif_pthread.cc @@ -0,0 +1,66 @@ +// See LICENSE for license details. + +#include "htif_pthread.h" +#include +#include + +void htif_pthread_t::thread_main(void* arg) +{ + htif_pthread_t* htif = static_cast(arg); + htif->run(); + while (true) + htif->target->switch_to(); +} + +htif_pthread_t::htif_pthread_t(int argc, char** argv) + : htif_t(argc, argv) +{ + target = context_t::current(); + host.init(thread_main, this); +} + +htif_pthread_t::~htif_pthread_t() +{ +} + +ssize_t htif_pthread_t::read(void* buf, size_t max_size) +{ + while (th_data.size() == 0) + target->switch_to(); + + size_t s = std::min(max_size, th_data.size()); + std::copy(th_data.begin(), th_data.begin() + s, (char*)buf); + th_data.erase(th_data.begin(), th_data.begin() + s); + + return s; +} + +ssize_t htif_pthread_t::write(const void* buf, size_t size) +{ + ht_data.insert(ht_data.end(), (const char*)buf, (const char*)buf + size); + return size; +} + +void htif_pthread_t::send(const void* buf, size_t size) +{ + th_data.insert(th_data.end(), (const char*)buf, (const char*)buf + size); +} + +void htif_pthread_t::recv(void* buf, size_t size) +{ + while (!this->recv_nonblocking(buf, size)) + ; +} + +bool htif_pthread_t::recv_nonblocking(void* buf, size_t size) +{ + if (ht_data.size() < size) + { + host.switch_to(); + return false; + } + + std::copy(ht_data.begin(), ht_data.begin() + size, (char*)buf); + ht_data.erase(ht_data.begin(), ht_data.begin() + size); + return true; +} diff --git a/fesvr/htif_pthread.h b/fesvr/htif_pthread.h new file mode 100644 index 0000000000..c00c38230c --- /dev/null +++ b/fesvr/htif_pthread.h @@ -0,0 +1,38 @@ +// See LICENSE for license details. + +#ifndef _HTIF_PTHREAD_H +#define _HTIF_PTHREAD_H + +#include "htif.h" +#include "context.h" +#include + +class htif_pthread_t : public htif_t +{ + public: + htif_pthread_t(int argc, char** argv); + virtual ~htif_pthread_t(); + + // target inteface + void send(const void* buf, size_t size); + void recv(void* buf, size_t size); + bool recv_nonblocking(void* buf, size_t size); + + protected: + // host interface + virtual ssize_t read(void* buf, size_t max_size); + virtual ssize_t write(const void* buf, size_t size); + + virtual size_t chunk_align() { return 64; } + virtual size_t chunk_max_size() { return 1024; } + + private: + context_t host; + context_t* target; + std::deque th_data; + std::deque ht_data; + + static void thread_main(void* htif); +}; + +#endif diff --git a/fesvr/memif.cc b/fesvr/memif.cc new file mode 100644 index 0000000000..fd9629144f --- /dev/null +++ b/fesvr/memif.cc @@ -0,0 +1,183 @@ +// See LICENSE for license details. + +#include +#include +#include +#include +#include "memif.h" + +void memif_t::read(addr_t addr, size_t len, void* bytes) +{ + size_t align = cmemif->chunk_align(); + if (len && (addr & (align-1))) + { + size_t this_len = std::min(len, align - size_t(addr & (align-1))); + uint8_t chunk[align]; + + cmemif->read_chunk(addr & ~(align-1), align, chunk); + memcpy(bytes, chunk + (addr & (align-1)), this_len); + + bytes = (char*)bytes + this_len; + addr += this_len; + len -= this_len; + } + + if (len & (align-1)) + { + size_t this_len = len & (align-1); + size_t start = len - this_len; + uint8_t chunk[align]; + + cmemif->read_chunk(addr + start, align, chunk); + memcpy((char*)bytes + start, chunk, this_len); + + len -= this_len; + } + + // now we're aligned + for (size_t pos = 0; pos < len; pos += cmemif->chunk_max_size()) + cmemif->read_chunk(addr + pos, std::min(cmemif->chunk_max_size(), len - pos), (char*)bytes + pos); +} + +void memif_t::write(addr_t addr, size_t len, const void* bytes) +{ + size_t align = cmemif->chunk_align(); + if (len && (addr & (align-1))) + { + size_t this_len = std::min(len, align - size_t(addr & (align-1))); + uint8_t chunk[align]; + + cmemif->read_chunk(addr & ~(align-1), align, chunk); + memcpy(chunk + (addr & (align-1)), bytes, this_len); + cmemif->write_chunk(addr & ~(align-1), align, chunk); + + bytes = (char*)bytes + this_len; + addr += this_len; + len -= this_len; + } + + if (len & (align-1)) + { + size_t this_len = len & (align-1); + size_t start = len - this_len; + uint8_t chunk[align]; + + cmemif->read_chunk(addr + start, align, chunk); + memcpy(chunk, (char*)bytes + start, this_len); + cmemif->write_chunk(addr + start, align, chunk); + + len -= this_len; + } + + // now we're aligned + bool all_zero = len != 0; + for (size_t i = 0; i < len; i++) + all_zero &= ((const char*)bytes)[i] == 0; + + if (all_zero) { + cmemif->clear_chunk(addr, len); + } else { + size_t max_chunk = cmemif->chunk_max_size(); + for (size_t pos = 0; pos < len; pos += max_chunk) + cmemif->write_chunk(addr + pos, std::min(max_chunk, len - pos), (char*)bytes + pos); + } +} + +#define MEMIF_READ_FUNC \ + if(addr & (sizeof(val)-1)) \ + throw std::runtime_error("misaligned address"); \ + this->read(addr, sizeof(val), &val); \ + return val + +#define MEMIF_WRITE_FUNC \ + if(addr & (sizeof(val)-1)) \ + throw std::runtime_error("misaligned address"); \ + this->write(addr, sizeof(val), &val) + +uint8_t memif_t::read_uint8(addr_t addr) +{ + uint8_t val; + MEMIF_READ_FUNC; +} + +int8_t memif_t::read_int8(addr_t addr) +{ + int8_t val; + MEMIF_READ_FUNC; +} + +void memif_t::write_uint8(addr_t addr, uint8_t val) +{ + MEMIF_WRITE_FUNC; +} + +void memif_t::write_int8(addr_t addr, int8_t val) +{ + MEMIF_WRITE_FUNC; +} + +uint16_t memif_t::read_uint16(addr_t addr) +{ + uint16_t val; + MEMIF_READ_FUNC; +} + +int16_t memif_t::read_int16(addr_t addr) +{ + int16_t val; + MEMIF_READ_FUNC; +} + +void memif_t::write_uint16(addr_t addr, uint16_t val) +{ + MEMIF_WRITE_FUNC; +} + +void memif_t::write_int16(addr_t addr, int16_t val) +{ + MEMIF_WRITE_FUNC; +} + +uint32_t memif_t::read_uint32(addr_t addr) +{ + uint32_t val; + MEMIF_READ_FUNC; +} + +int32_t memif_t::read_int32(addr_t addr) +{ + int32_t val; + MEMIF_READ_FUNC; +} + +void memif_t::write_uint32(addr_t addr, uint32_t val) +{ + MEMIF_WRITE_FUNC; +} + +void memif_t::write_int32(addr_t addr, int32_t val) +{ + MEMIF_WRITE_FUNC; +} + +uint64_t memif_t::read_uint64(addr_t addr) +{ + uint64_t val; + MEMIF_READ_FUNC; +} + +int64_t memif_t::read_int64(addr_t addr) +{ + int64_t val; + MEMIF_READ_FUNC; +} + +void memif_t::write_uint64(addr_t addr, uint64_t val) +{ + MEMIF_WRITE_FUNC; +} + +void memif_t::write_int64(addr_t addr, int64_t val) +{ + MEMIF_WRITE_FUNC; +} diff --git a/fesvr/memif.h b/fesvr/memif.h new file mode 100644 index 0000000000..3854d664c8 --- /dev/null +++ b/fesvr/memif.h @@ -0,0 +1,62 @@ +// See LICENSE for license details. + +#ifndef __MEMIF_H +#define __MEMIF_H + +#include +#include + +typedef uint64_t reg_t; +typedef int64_t sreg_t; +typedef reg_t addr_t; + +class chunked_memif_t +{ +public: + virtual void read_chunk(addr_t taddr, size_t len, void* dst) = 0; + virtual void write_chunk(addr_t taddr, size_t len, const void* src) = 0; + virtual void clear_chunk(addr_t taddr, size_t len) = 0; + + virtual size_t chunk_align() = 0; + virtual size_t chunk_max_size() = 0; +}; + +class memif_t +{ +public: + memif_t(chunked_memif_t* _cmemif) : cmemif(_cmemif) {} + virtual ~memif_t(){} + + // read and write byte arrays + virtual void read(addr_t addr, size_t len, void* bytes); + virtual void write(addr_t addr, size_t len, const void* bytes); + + // read and write 8-bit words + virtual uint8_t read_uint8(addr_t addr); + virtual int8_t read_int8(addr_t addr); + virtual void write_uint8(addr_t addr, uint8_t val); + virtual void write_int8(addr_t addr, int8_t val); + + // read and write 16-bit words + virtual uint16_t read_uint16(addr_t addr); + virtual int16_t read_int16(addr_t addr); + virtual void write_uint16(addr_t addr, uint16_t val); + virtual void write_int16(addr_t addr, int16_t val); + + // read and write 32-bit words + virtual uint32_t read_uint32(addr_t addr); + virtual int32_t read_int32(addr_t addr); + virtual void write_uint32(addr_t addr, uint32_t val); + virtual void write_int32(addr_t addr, int32_t val); + + // read and write 64-bit words + virtual uint64_t read_uint64(addr_t addr); + virtual int64_t read_int64(addr_t addr); + virtual void write_uint64(addr_t addr, uint64_t val); + virtual void write_int64(addr_t addr, int64_t val); + +protected: + chunked_memif_t* cmemif; +}; + +#endif // __MEMIF_H diff --git a/fesvr/option_parser.cc b/fesvr/option_parser.cc new file mode 100644 index 0000000000..72daec40ef --- /dev/null +++ b/fesvr/option_parser.cc @@ -0,0 +1,51 @@ +// See LICENSE for license details. + +#include "option_parser.h" +#include +#include +#include +#include + +void option_parser_t::option(char c, const char* s, int arg, std::function action) +{ + opts.push_back(option_t(c, s, arg, action)); +} + +const char* const* option_parser_t::parse(const char* const* argv0) +{ + assert(argv0); + const char* const* argv = argv0 + 1; + for (const char* opt; (opt = *argv) != NULL && opt[0] == '-'; argv++) + { + bool found = false; + for (auto it = opts.begin(); !found && it != opts.end(); it++) + { + size_t slen = it->str ? strlen(it->str) : 0; + bool chr_match = opt[1] != '-' && it->chr && opt[1] == it->chr; + bool str_match = opt[1] == '-' && slen && strncmp(opt+2, it->str, slen) == 0; + if (chr_match || (str_match && (opt[2+slen] == '=' || opt[2+slen] == '\0'))) + { + const char* optarg = + chr_match ? (opt[2] ? &opt[2] : NULL) : + opt[2+slen] ? &opt[3+slen] : + it->arg ? *(++argv) : NULL; + if (optarg && !it->arg) + error("no argument allowed for option", *argv0, opt); + if (!optarg && it->arg) + error("argument required for option", *argv0, opt); + it->func(optarg); + found = true; + } + } + if (!found) + error("unrecognized option", *argv0, opt); + } + return argv; +} + +void option_parser_t::error(const char* msg, const char* argv0, const char* arg) +{ + fprintf(stderr, "%s: %s %s\n", argv0, msg, arg ? arg : ""); + if (helpmsg) helpmsg(); + exit(1); +} diff --git a/fesvr/option_parser.h b/fesvr/option_parser.h new file mode 100644 index 0000000000..b2cb8edf9d --- /dev/null +++ b/fesvr/option_parser.h @@ -0,0 +1,31 @@ +// See LICENSE for license details. + +#ifndef _OPTION_PARSER_H +#define _OPTION_PARSER_H + +#include +#include + +class option_parser_t +{ + public: + option_parser_t() : helpmsg(0) {} + void help(void (*helpm)(void)) { helpmsg = helpm; } + void option(char c, const char* s, int arg, std::function action); + const char* const* parse(const char* const* argv0); + private: + struct option_t + { + char chr; + const char* str; + int arg; + std::function func; + option_t(char chr, const char* str, int arg, std::function func) + : chr(chr), str(str), arg(arg), func(func) {} + }; + std::vector opts; + void (*helpmsg)(void); + void error(const char* msg, const char* argv0, const char* arg); +}; + +#endif diff --git a/fesvr/rfb.cc b/fesvr/rfb.cc new file mode 100644 index 0000000000..2594a1b871 --- /dev/null +++ b/fesvr/rfb.cc @@ -0,0 +1,230 @@ +#include "rfb.h" +#include "memif.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std::placeholders; + +rfb_t::rfb_t(int display) + : sockfd(-1), afd(-1), + memif(0), addr(0), width(0), height(0), bpp(0), display(display), + thread(pthread_self()), fb1(0), fb2(0), read_pos(0), + lock(PTHREAD_MUTEX_INITIALIZER) +{ + register_command(0, std::bind(&rfb_t::handle_configure, this, _1), "configure"); + register_command(1, std::bind(&rfb_t::handle_set_address, this, _1), "set_address"); +} + +void* rfb_thread_main(void* arg) +{ + ((rfb_t*)arg)->thread_main(); + return 0; +} + +void rfb_t::thread_main() +{ + pthread_mutex_lock(&lock); + + int port = 5900 + display; + sockfd = socket(PF_INET, SOCK_STREAM, 0); + if (sockfd < 0) + throw std::runtime_error("could not acquire tcp socket"); + + struct sockaddr_in saddr, caddr; + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = INADDR_ANY; + saddr.sin_port = htons(port); + if (bind(sockfd, (struct sockaddr*)&saddr, sizeof(saddr)) < 0) + throw std::runtime_error("could not bind to port " + std::to_string(port)); + if (listen(sockfd, 0) < 0) + throw std::runtime_error("could not listen on port " + std::to_string(port)); + + socklen_t clen = sizeof(caddr); + afd = accept(sockfd, (struct sockaddr*)&caddr, &clen); + if (afd < 0) + throw std::runtime_error("could not accept connection"); + + std::string version = "RFB 003.003\n"; + write(version); + if (read() != version) + throw std::runtime_error("bad client version"); + + write(str(uint32_t(htonl(1)))); + + read(); // clientinit + + std::string serverinit; + serverinit += str(uint16_t(htons(width))); + serverinit += str(uint16_t(htons(height))); + serverinit += pixel_format(); + std::string name = "RISC-V"; + serverinit += str(uint32_t(htonl(name.length()))); + serverinit += name; + write(serverinit); + + pthread_mutex_unlock(&lock); + + while (memif == NULL) + sched_yield(); + + while (memif != NULL) + { + std::string s = read(); + if (s.length() < 4) + break; //throw std::runtime_error("bad command"); + + switch (s[0]) + { + case 0: set_pixel_format(s); break; + case 2: set_encodings(s); break; + case 3: break; + } + } + + pthread_mutex_lock(&lock); + close(afd); + close(sockfd); + afd = -1; + sockfd = -1; + pthread_mutex_unlock(&lock); + + thread_main(); +} + +rfb_t::~rfb_t() +{ + memif = 0; + if (!pthread_equal(pthread_self(), thread)) + pthread_join(thread, 0); + delete [] fb1; + delete [] fb2; +} + +void rfb_t::set_encodings(const std::string& s) +{ + uint16_t n = htons(*(uint16_t*)&s[2]); + for (size_t b = s.length(); b < 4U+4U*n; b += read().length()); +} + +void rfb_t::set_pixel_format(const std::string& s) +{ + if (s.length() != 20 || s.substr(4, 16) != pixel_format()) + throw std::runtime_error("bad pixel format"); +} + +void rfb_t::fb_update(const std::string& s) +{ + std::string u; + u += str(uint8_t(0)); + u += str(uint8_t(0)); + u += str(uint16_t(htons(1))); + u += str(uint16_t(htons(0))); + u += str(uint16_t(htons(0))); + u += str(uint16_t(htons(width))); + u += str(uint16_t(htons(height))); + u += str(uint32_t(htonl(0))); + u += std::string((char*)fb1, fb_bytes()); + + try + { + write(u); + } + catch (std::runtime_error& e) + { + } +} + +void rfb_t::tick() +{ + if (fb_bytes() == 0 || memif == NULL) + return; + + memif->read(addr + read_pos, FB_ALIGN, const_cast(fb2 + read_pos)); + read_pos = (read_pos + FB_ALIGN) % fb_bytes(); + if (read_pos == 0) + { + std::swap(fb1, fb2); + if (pthread_mutex_trylock(&lock) == 0) + { + fb_update(""); + pthread_mutex_unlock(&lock); + } + } +} + +std::string rfb_t::pixel_format() +{ + int red_bits = 8, green_bits = 8, blue_bits = 8; + int bpp = red_bits + green_bits + blue_bits; + while (bpp & (bpp-1)) bpp++; + + std::string fmt; + fmt += str(uint8_t(bpp)); + fmt += str(uint8_t(red_bits + green_bits + blue_bits)); + fmt += str(uint8_t(0)); // little-endian + fmt += str(uint8_t(1)); // true color + fmt += str(uint16_t(htons((1<> 16; + + bpp = cmd.payload() >> 32; + if (bpp != 32) + throw std::runtime_error("rfb requires 32 bpp true color"); + + if (fb_bytes() % FB_ALIGN != 0) + throw std::runtime_error("rfb size must be a multiple of " + std::to_string(FB_ALIGN)); + + fb1 = new char[fb_bytes()]; + fb2 = new char[fb_bytes()]; + if (pthread_create(&thread, 0, rfb_thread_main, this)) + throw std::runtime_error("could not create thread"); + cmd.respond(1); +} + +void rfb_t::handle_set_address(command_t cmd) +{ + addr = cmd.payload(); + if (addr % FB_ALIGN != 0) + throw std::runtime_error("rfb address must be " + std::to_string(FB_ALIGN) + "-byte aligned"); + memif = &cmd.memif(); + cmd.respond(1); +} diff --git a/fesvr/rfb.h b/fesvr/rfb.h new file mode 100644 index 0000000000..263663a24d --- /dev/null +++ b/fesvr/rfb.h @@ -0,0 +1,53 @@ +#ifndef _RFB_H +#define _RFB_H + +#include "device.h" +#include "memif.h" +#include + +// remote frame buffer +class rfb_t : public device_t +{ + public: + rfb_t(int display = 0); + ~rfb_t(); + void tick(); + std::string name() { return "RISC-V"; } + const char* identity() { return "rfb"; } + + private: + template + std::string str(T x) + { + return std::string((char*)&x, sizeof(x)); + } + size_t fb_bytes() { return size_t(width) * height * bpp/8; } + void thread_main(); + friend void* rfb_thread_main(void*); + std::string pixel_format(); + void fb_update(const std::string& s); + void set_encodings(const std::string& s); + void set_pixel_format(const std::string& s); + void write(const std::string& s); + std::string read(); + void handle_configure(command_t cmd); + void handle_set_address(command_t cmd); + + int sockfd; + int afd; + memif_t* memif; + reg_t addr; + uint16_t width; + uint16_t height; + uint16_t bpp; + int display; + pthread_t thread; + volatile char* volatile fb1; + volatile char* volatile fb2; + size_t read_pos; + pthread_mutex_t lock; + + static const int FB_ALIGN = 256; +}; + +#endif diff --git a/fesvr/syscall.cc b/fesvr/syscall.cc new file mode 100644 index 0000000000..f0bdd259bd --- /dev/null +++ b/fesvr/syscall.cc @@ -0,0 +1,395 @@ +// See LICENSE for license details. + +#include "syscall.h" +#include "htif.h" +#include "byteorder.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std::placeholders; + +#define RISCV_AT_FDCWD -100 + +struct riscv_stat +{ + uint64_t dev; + uint64_t ino; + uint32_t mode; + uint32_t nlink; + uint32_t uid; + uint32_t gid; + uint64_t rdev; + uint64_t __pad1; + uint64_t size; + uint32_t blksize; + uint32_t __pad2; + uint64_t blocks; + uint64_t atime; + uint64_t __pad3; + uint64_t mtime; + uint64_t __pad4; + uint64_t ctime; + uint64_t __pad5; + uint32_t __unused4; + uint32_t __unused5; + + riscv_stat(const struct stat& s) + : dev(s.st_dev), ino(s.st_ino), mode(s.st_mode), nlink(s.st_nlink), + uid(s.st_uid), gid(s.st_gid), rdev(s.st_rdev), __pad1(0), + size(s.st_size), blksize(s.st_blksize), __pad2(0), + blocks(s.st_blocks), atime(s.st_atime), __pad3(0), + mtime(s.st_mtime), __pad4(0), ctime(s.st_ctime), __pad5(0), + __unused4(0), __unused5(0) {} +}; + +syscall_t::syscall_t(htif_t* htif) + : htif(htif), memif(&htif->memif()), table(2048) +{ + table[17] = &syscall_t::sys_getcwd; + table[25] = &syscall_t::sys_fcntl; + table[34] = &syscall_t::sys_mkdirat; + table[35] = &syscall_t::sys_unlinkat; + table[37] = &syscall_t::sys_linkat; + table[38] = &syscall_t::sys_renameat; + table[46] = &syscall_t::sys_ftruncate; + table[48] = &syscall_t::sys_faccessat; + table[49] = &syscall_t::sys_chdir; + table[56] = &syscall_t::sys_openat; + table[57] = &syscall_t::sys_close; + table[62] = &syscall_t::sys_lseek; + table[63] = &syscall_t::sys_read; + table[64] = &syscall_t::sys_write; + table[67] = &syscall_t::sys_pread; + table[68] = &syscall_t::sys_pwrite; + table[79] = &syscall_t::sys_fstatat; + table[80] = &syscall_t::sys_fstat; + table[93] = &syscall_t::sys_exit; + table[1039] = &syscall_t::sys_lstat; + table[2011] = &syscall_t::sys_getmainvars; + + register_command(0, std::bind(&syscall_t::handle_syscall, this, _1), "syscall"); + + int stdin_fd = dup(0), stdout_fd0 = dup(1), stdout_fd1 = dup(1); + if (stdin_fd < 0 || stdout_fd0 < 0 || stdout_fd1 < 0) + throw std::runtime_error("could not dup stdin/stdout"); + + fds.alloc(stdin_fd); // stdin -> stdin + fds.alloc(stdout_fd0); // stdout -> stdout + fds.alloc(stdout_fd1); // stderr -> stdout +} + +std::string syscall_t::do_chroot(const char* fn) +{ + if (!chroot.empty() && *fn == '/') + return chroot + fn; + return fn; +} + +std::string syscall_t::undo_chroot(const char* fn) +{ + if (chroot.empty()) + return fn; + if (strncmp(fn, chroot.c_str(), chroot.size()) == 0 + && (chroot.back() == '/' || fn[chroot.size()] == '/')) + return fn + chroot.size() - (chroot.back() == '/'); + return "/"; +} + +void syscall_t::handle_syscall(command_t cmd) +{ + if (cmd.payload() & 1) // test pass/fail + { + htif->exitcode = cmd.payload(); + if (htif->exit_code()) + std::cerr << "*** FAILED *** (tohost = " << htif->exit_code() << ")" << std::endl; + return; + } + else // proxied system call + dispatch(cmd.payload()); + + cmd.respond(1); +} + +reg_t syscall_t::sys_exit(reg_t code, reg_t a1, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + htif->exitcode = code << 1 | 1; + return 0; +} + +static reg_t sysret_errno(sreg_t ret) +{ + return ret == -1 ? -errno : ret; +} + +reg_t syscall_t::sys_read(reg_t fd, reg_t pbuf, reg_t len, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector buf(len); + ssize_t ret = read(fds.lookup(fd), &buf[0], len); + reg_t ret_errno = sysret_errno(ret); + if (ret > 0) + memif->write(pbuf, ret, &buf[0]); + return ret_errno; +} + +reg_t syscall_t::sys_pread(reg_t fd, reg_t pbuf, reg_t len, reg_t off, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector buf(len); + ssize_t ret = pread(fds.lookup(fd), &buf[0], len, off); + reg_t ret_errno = sysret_errno(ret); + if (ret > 0) + memif->write(pbuf, ret, &buf[0]); + return ret_errno; +} + +reg_t syscall_t::sys_write(reg_t fd, reg_t pbuf, reg_t len, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector buf(len); + memif->read(pbuf, len, &buf[0]); + reg_t ret = sysret_errno(write(fds.lookup(fd), &buf[0], len)); + return ret; +} + +reg_t syscall_t::sys_pwrite(reg_t fd, reg_t pbuf, reg_t len, reg_t off, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector buf(len); + memif->read(pbuf, len, &buf[0]); + reg_t ret = sysret_errno(pwrite(fds.lookup(fd), &buf[0], len, off)); + return ret; +} + +reg_t syscall_t::sys_close(reg_t fd, reg_t a1, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + if (close(fds.lookup(fd)) < 0) + return sysret_errno(-1); + fds.dealloc(fd); + return 0; +} + +reg_t syscall_t::sys_lseek(reg_t fd, reg_t ptr, reg_t dir, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + return sysret_errno(lseek(fds.lookup(fd), ptr, dir)); +} + +reg_t syscall_t::sys_fstat(reg_t fd, reg_t pbuf, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + struct stat buf; + reg_t ret = sysret_errno(fstat(fds.lookup(fd), &buf)); + if (ret != (reg_t)-1) + { + riscv_stat rbuf(buf); + memif->write(pbuf, sizeof(rbuf), &rbuf); + } + return ret; +} + +reg_t syscall_t::sys_fcntl(reg_t fd, reg_t cmd, reg_t arg, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + return sysret_errno(fcntl(fds.lookup(fd), cmd, arg)); +} + +reg_t syscall_t::sys_ftruncate(reg_t fd, reg_t len, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + return sysret_errno(ftruncate(fds.lookup(fd), len)); +} + +reg_t syscall_t::sys_lstat(reg_t pname, reg_t len, reg_t pbuf, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector name(len); + memif->read(pname, len, &name[0]); + + struct stat buf; + reg_t ret = sysret_errno(lstat(do_chroot(&name[0]).c_str(), &buf)); + riscv_stat rbuf(buf); + if (ret != (reg_t)-1) + { + riscv_stat rbuf(buf); + memif->write(pbuf, sizeof(rbuf), &rbuf); + } + return ret; +} + +#define AT_SYSCALL(syscall, fd, name, ...) \ + (syscall(fds.lookup(fd), int(fd) == RISCV_AT_FDCWD ? do_chroot(name).c_str() : (name), __VA_ARGS__)) + +reg_t syscall_t::sys_openat(reg_t dirfd, reg_t pname, reg_t len, reg_t flags, reg_t mode, reg_t a5, reg_t a6) +{ + std::vector name(len); + memif->read(pname, len, &name[0]); + int fd = sysret_errno(AT_SYSCALL(openat, dirfd, &name[0], flags, mode)); + if (fd < 0) + return sysret_errno(-1); + return fds.alloc(fd); +} + +reg_t syscall_t::sys_fstatat(reg_t dirfd, reg_t pname, reg_t len, reg_t pbuf, reg_t flags, reg_t a5, reg_t a6) +{ + std::vector name(len); + memif->read(pname, len, &name[0]); + + struct stat buf; + reg_t ret = sysret_errno(AT_SYSCALL(fstatat, dirfd, &name[0], &buf, flags)); + if (ret != (reg_t)-1) + { + riscv_stat rbuf(buf); + memif->write(pbuf, sizeof(rbuf), &rbuf); + } + return ret; +} + +reg_t syscall_t::sys_faccessat(reg_t dirfd, reg_t pname, reg_t len, reg_t mode, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector name(len); + memif->read(pname, len, &name[0]); + return sysret_errno(AT_SYSCALL(faccessat, dirfd, &name[0], mode, 0)); +} + +reg_t syscall_t::sys_renameat(reg_t odirfd, reg_t popath, reg_t olen, reg_t ndirfd, reg_t pnpath, reg_t nlen, reg_t a6) +{ + std::vector opath(olen), npath(nlen); + memif->read(popath, olen, &opath[0]); + memif->read(pnpath, nlen, &npath[0]); + return sysret_errno(renameat(fds.lookup(odirfd), int(odirfd) == RISCV_AT_FDCWD ? do_chroot(&opath[0]).c_str() : &opath[0], + fds.lookup(ndirfd), int(ndirfd) == RISCV_AT_FDCWD ? do_chroot(&npath[0]).c_str() : &npath[0])); +} + +reg_t syscall_t::sys_linkat(reg_t odirfd, reg_t poname, reg_t olen, reg_t ndirfd, reg_t pnname, reg_t nlen, reg_t flags) +{ + std::vector oname(olen), nname(nlen); + memif->read(poname, olen, &oname[0]); + memif->read(pnname, nlen, &nname[0]); + return sysret_errno(linkat(fds.lookup(odirfd), int(odirfd) == RISCV_AT_FDCWD ? do_chroot(&oname[0]).c_str() : &oname[0], + fds.lookup(ndirfd), int(ndirfd) == RISCV_AT_FDCWD ? do_chroot(&nname[0]).c_str() : &nname[0], + flags)); +} + +reg_t syscall_t::sys_unlinkat(reg_t dirfd, reg_t pname, reg_t len, reg_t flags, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector name(len); + memif->read(pname, len, &name[0]); + return sysret_errno(AT_SYSCALL(unlinkat, dirfd, &name[0], flags)); +} + +reg_t syscall_t::sys_mkdirat(reg_t dirfd, reg_t pname, reg_t len, reg_t mode, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector name(len); + memif->read(pname, len, &name[0]); + return sysret_errno(AT_SYSCALL(mkdirat, dirfd, &name[0], mode)); +} + +reg_t syscall_t::sys_getcwd(reg_t pbuf, reg_t size, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector buf(size); + char* ret = getcwd(&buf[0], size); + if (ret == NULL) + return sysret_errno(-1); + std::string tmp = undo_chroot(&buf[0]); + if (size <= tmp.size()) + return -ENOMEM; + memif->write(pbuf, tmp.size() + 1, &tmp[0]); + return tmp.size() + 1; +} + +reg_t syscall_t::sys_getmainvars(reg_t pbuf, reg_t limit, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector args = htif->target_args(); + std::vector words(args.size() + 3); + words[0] = to_le(args.size()); + words[args.size()+1] = 0; // argv[argc] = NULL + words[args.size()+2] = 0; // envp[0] = NULL + + size_t sz = (args.size() + 3) * sizeof(words[0]); + for (size_t i = 0; i < args.size(); i++) + { + words[i+1] = to_le(sz + pbuf); + sz += args[i].length() + 1; + } + + std::vector bytes(sz); + memcpy(&bytes[0], &words[0], sizeof(words[0]) * words.size()); + for (size_t i = 0; i < args.size(); i++) + strcpy(&bytes[from_le(words[i+1]) - pbuf], args[i].c_str()); + + if (bytes.size() > limit) + return -ENOMEM; + + memif->write(pbuf, bytes.size(), &bytes[0]); + return 0; +} + +reg_t syscall_t::sys_chdir(reg_t path, reg_t a1, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + size_t size = 0; + while (memif->read_uint8(path + size++)) + ; + std::vector buf(size); + for (size_t offset = 0;; offset++) + { + buf[offset] = memif->read_uint8(path + offset); + if (!buf[offset]) + break; + } + return sysret_errno(chdir(buf.data())); +} + +void syscall_t::dispatch(reg_t mm) +{ + reg_t magicmem[8]; + memif->read(mm, sizeof(magicmem), magicmem); + + reg_t n = from_le(magicmem[0]); + if (n >= table.size() || !table[n]) + throw std::runtime_error("bad syscall #" + std::to_string(n)); + + magicmem[0] = to_le((this->*table[n])(from_le(magicmem[1]), from_le(magicmem[2]), from_le(magicmem[3]), from_le(magicmem[4]), from_le(magicmem[5]), from_le(magicmem[6]), from_le(magicmem[7]))); + + memif->write(mm, sizeof(magicmem), magicmem); +} + +reg_t fds_t::alloc(int fd) +{ + reg_t i; + for (i = 0; i < fds.size(); i++) + if (fds[i] == -1) + break; + + if (i == fds.size()) + fds.resize(i+1); + + fds[i] = fd; + return i; +} + +void fds_t::dealloc(reg_t fd) +{ + fds[fd] = -1; +} + +int fds_t::lookup(reg_t fd) +{ + if (int(fd) == RISCV_AT_FDCWD) + return AT_FDCWD; + return fd >= fds.size() ? -1 : fds[fd]; +} + +void syscall_t::set_chroot(const char* where) +{ + char buf1[PATH_MAX], buf2[PATH_MAX]; + + if (getcwd(buf1, sizeof(buf1)) == NULL + || chdir(where) != 0 + || getcwd(buf2, sizeof(buf2)) == NULL + || chdir(buf1) != 0) + { + fprintf(stderr, "could not chroot to %s\n", where); + exit(-1); + } + + chroot = buf2; +} diff --git a/fesvr/syscall.h b/fesvr/syscall.h new file mode 100644 index 0000000000..82946969b3 --- /dev/null +++ b/fesvr/syscall.h @@ -0,0 +1,72 @@ +// See LICENSE for license details. + +#ifndef __SYSCALL_H +#define __SYSCALL_H + +#include "device.h" +#include "memif.h" +#include +#include + +class syscall_t; +typedef reg_t (syscall_t::*syscall_func_t)(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + +class htif_t; +class memif_t; + +class fds_t +{ + public: + reg_t alloc(int fd); + void dealloc(reg_t fd); + int lookup(reg_t fd); + private: + std::vector fds; +}; + +class syscall_t : public device_t +{ + public: + syscall_t(htif_t*); + + void set_chroot(const char* where); + + private: + const char* identity() { return "syscall_proxy"; } + + htif_t* htif; + memif_t* memif; + std::vector table; + fds_t fds; + + void handle_syscall(command_t cmd); + void dispatch(addr_t mm); + + std::string chroot; + std::string do_chroot(const char* fn); + std::string undo_chroot(const char* fn); + + reg_t sys_exit(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_openat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_read(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_pread(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_write(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_pwrite(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_close(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_lseek(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_fstat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_lstat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_fstatat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_faccessat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_fcntl(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_ftruncate(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_renameat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_linkat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_unlinkat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_mkdirat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_getcwd(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_getmainvars(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_chdir(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); +}; + +#endif diff --git a/fesvr/term.cc b/fesvr/term.cc new file mode 100644 index 0000000000..c4cba0c07c --- /dev/null +++ b/fesvr/term.cc @@ -0,0 +1,53 @@ +#include "term.h" +#include +#include +#include +#include +#include + +class canonical_termios_t +{ + public: + canonical_termios_t() + : restore_tios(false) + { + if (tcgetattr(0, &old_tios) == 0) + { + struct termios new_tios = old_tios; + new_tios.c_lflag &= ~(ICANON | ECHO); + if (tcsetattr(0, TCSANOW, &new_tios) == 0) + restore_tios = true; + } + } + + ~canonical_termios_t() + { + if (restore_tios) + tcsetattr(0, TCSANOW, &old_tios); + } + private: + struct termios old_tios; + bool restore_tios; +}; + +static canonical_termios_t tios; // exit() will clean up for us + +int canonical_terminal_t::read() +{ + struct pollfd pfd; + pfd.fd = 0; + pfd.events = POLLIN; + int ret = poll(&pfd, 1, 0); + if (ret <= 0 || !(pfd.revents & POLLIN)) + return -1; + + unsigned char ch; + ret = ::read(0, &ch, 1); + return ret <= 0 ? -1 : ch; +} + +void canonical_terminal_t::write(char ch) +{ + if (::write(1, &ch, 1) != 1) + abort(); +} diff --git a/fesvr/term.h b/fesvr/term.h new file mode 100644 index 0000000000..7a2c22fc28 --- /dev/null +++ b/fesvr/term.h @@ -0,0 +1,11 @@ +#ifndef _TERM_H +#define _TERM_H + +class canonical_terminal_t +{ + public: + static int read(); + static void write(char); +}; + +#endif diff --git a/fesvr/tsi.cc b/fesvr/tsi.cc new file mode 100644 index 0000000000..5ccafc4b77 --- /dev/null +++ b/fesvr/tsi.cc @@ -0,0 +1,115 @@ +#include "tsi.h" +#include +#include + +#define NHARTS_MAX 16 + +void tsi_t::host_thread(void *arg) +{ + tsi_t *tsi = static_cast(arg); + tsi->run(); + + while (true) + tsi->target->switch_to(); +} + +tsi_t::tsi_t(int argc, char** argv) : htif_t(argc, argv) +{ + target = context_t::current(); + host.init(host_thread, this); +} + +tsi_t::~tsi_t(void) +{ +} + +#define MSIP_BASE 0x2000000 + +// Interrupt core 0 to make it start executing the program in DRAM +void tsi_t::reset() +{ + uint32_t one = 1; + + write_chunk(MSIP_BASE, sizeof(uint32_t), &one); +} + +void tsi_t::push_addr(addr_t addr) +{ + for (int i = 0; i < SAI_ADDR_CHUNKS; i++) { + in_data.push_back(addr & 0xffffffff); + addr = addr >> 32; + } +} + +void tsi_t::push_len(addr_t len) +{ + for (int i = 0; i < SAI_LEN_CHUNKS; i++) { + in_data.push_back(len & 0xffffffff); + len = len >> 32; + } +} + +void tsi_t::read_chunk(addr_t taddr, size_t nbytes, void* dst) +{ + uint32_t *result = static_cast(dst); + size_t len = nbytes / sizeof(uint32_t); + + in_data.push_back(SAI_CMD_READ); + push_addr(taddr); + push_len(len - 1); + + for (size_t i = 0; i < len; i++) { + while (out_data.empty()) + switch_to_target(); + result[i] = out_data.front(); + out_data.pop_front(); + } +} + +void tsi_t::write_chunk(addr_t taddr, size_t nbytes, const void* src) +{ + const uint32_t *src_data = static_cast(src); + size_t len = nbytes / sizeof(uint32_t); + + in_data.push_back(SAI_CMD_WRITE); + push_addr(taddr); + push_len(len - 1); + + in_data.insert(in_data.end(), src_data, src_data + len); +} + +void tsi_t::send_word(uint32_t word) +{ + out_data.push_back(word); +} + +uint32_t tsi_t::recv_word(void) +{ + uint32_t word = in_data.front(); + in_data.pop_front(); + return word; +} + +bool tsi_t::data_available(void) +{ + return !in_data.empty(); +} + +void tsi_t::switch_to_host(void) +{ + host.switch_to(); +} + +void tsi_t::switch_to_target(void) +{ + target->switch_to(); +} + +void tsi_t::tick(bool out_valid, uint32_t out_bits, bool in_ready) +{ + if (out_valid && out_ready()) + out_data.push_back(out_bits); + + if (in_valid() && in_ready) + in_data.pop_front(); +} diff --git a/fesvr/tsi.h b/fesvr/tsi.h new file mode 100644 index 0000000000..825a3a0038 --- /dev/null +++ b/fesvr/tsi.h @@ -0,0 +1,57 @@ +#ifndef __SAI_H +#define __SAI_H + +#include "htif.h" +#include "context.h" + +#include +#include +#include +#include + +#define SAI_CMD_READ 0 +#define SAI_CMD_WRITE 1 + +#define SAI_ADDR_CHUNKS 2 +#define SAI_LEN_CHUNKS 2 + +class tsi_t : public htif_t +{ + public: + tsi_t(int argc, char** argv); + virtual ~tsi_t(); + + bool data_available(); + void send_word(uint32_t word); + uint32_t recv_word(); + void switch_to_host(); + + uint32_t in_bits() { return in_data.front(); } + bool in_valid() { return !in_data.empty(); } + bool out_ready() { return true; } + void tick(bool out_valid, uint32_t out_bits, bool in_ready); + + protected: + void reset() override; + void read_chunk(addr_t taddr, size_t nbytes, void* dst) override; + void write_chunk(addr_t taddr, size_t nbytes, const void* src) override; + void switch_to_target(); + + size_t chunk_align() override { return 4; } + size_t chunk_max_size() override { return 1024; } + + int get_ipi_addrs(addr_t *addrs); + + private: + context_t host; + context_t* target; + std::deque in_data; + std::deque out_data; + + void push_addr(addr_t addr); + void push_len(addr_t len); + + static void host_thread(void *tsi); +}; + +#endif diff --git a/riscv-disasm.pc.in b/riscv-disasm.pc.in new file mode 100644 index 0000000000..8e022e930f --- /dev/null +++ b/riscv-disasm.pc.in @@ -0,0 +1,11 @@ +prefix=@prefix@ +exec_prefix=@prefix@ +libdir=${prefix}/@libdir@ +includedir=${prefix}/@includedir@ + +Name: riscv-disasm +Description: RISC-V disassembler +Version: git +Libs: -Wl,-rpath,${libdir} -L${libdir} -ldisasm +Cflags: -I${includedir} +URL: http://riscv.org/download.html#tab_disasm diff --git a/riscv-dummy_rocc.pc.in b/riscv-dummy_rocc.pc.in deleted file mode 100644 index 31635f6dad..0000000000 --- a/riscv-dummy_rocc.pc.in +++ /dev/null @@ -1,11 +0,0 @@ -prefix=@prefix@ -exec_prefix=@prefix@ -libdir=${prefix}/@libdir@ -includedir=${prefix}/@includedir@ - -Name: riscv-dummy_rocc -Description: Example RISC-V ROCC accelerator -Version: git -Libs: -Wl,-rpath,${libdir} -L${libdir} -ldummy_rocc -Cflags: -I${includedir} -URL: http://riscv.org/download.html#tab_spike diff --git a/riscv-fesvr.pc.in b/riscv-fesvr.pc.in new file mode 100644 index 0000000000..efd7eed1e3 --- /dev/null +++ b/riscv-fesvr.pc.in @@ -0,0 +1,11 @@ +prefix=@prefix@ +exec_prefix=@prefix@ +libdir=${prefix}/@libdir@ +includedir=${prefix}/@includedir@ + +Name: riscv-fesvr +Description: RISC-V front-end server +Version: git +Libs: -Wl,-rpath,${libdir} -L${libdir} -lfesvr +Cflags: -I${includedir} +URL: http://riscv.org/download.html#tab_fesvr diff --git a/riscv-riscv.pc.in b/riscv-riscv.pc.in deleted file mode 100644 index 5e86b1c448..0000000000 --- a/riscv-riscv.pc.in +++ /dev/null @@ -1,11 +0,0 @@ -prefix=@prefix@ -exec_prefix=@prefix@ -libdir=${prefix}/@libdir@ -includedir=${prefix}/@includedir@ - -Name: riscv-riscv -Description: RISC-V -Version: git -Libs: -Wl,-rpath,${libdir} -L${libdir} -lriscv -Cflags: -I${includedir} -URL: http://riscv.org/download.html#tab_spike diff --git a/riscv-softfloat.pc.in b/riscv-softfloat.pc.in deleted file mode 100644 index 6b18e88441..0000000000 --- a/riscv-softfloat.pc.in +++ /dev/null @@ -1,11 +0,0 @@ -prefix=@prefix@ -exec_prefix=@prefix@ -libdir=${prefix}/@libdir@ -includedir=${prefix}/@includedir@ - -Name: riscv-softfloat -Description: RISC-V softfloat library -Version: git -Libs: -Wl,-rpath,${libdir} -L${libdir} -lsoftfloat -Cflags: -I${includedir} -URL: http://riscv.org/download.html#tab_spike diff --git a/riscv-spike.pc.in b/riscv-spike.pc.in deleted file mode 100644 index 007ad392bd..0000000000 --- a/riscv-spike.pc.in +++ /dev/null @@ -1,10 +0,0 @@ -prefix=@prefix@ -exec_prefix=@prefix@ -libdir=${prefix}/@libdir@ -includedir=${prefix}/@includedir@ - -Name: riscv-spike -Description: RISC-V spike meta library -Version: git -Depends: riscv-spike_main riscv-riscv riscv-softfloat -URL: http://riscv.org/download.html#tab_spike diff --git a/riscv-spike_main.pc.in b/riscv-spike_main.pc.in deleted file mode 100644 index c9b0eccca8..0000000000 --- a/riscv-spike_main.pc.in +++ /dev/null @@ -1,12 +0,0 @@ -prefix=@prefix@ -exec_prefix=@prefix@ -libdir=${prefix}/@libdir@ -includedir=${prefix}/@includedir@ - -Name: riscv-spike_main -Description: RISC-V ISA simulator library -Version: git -Depends: riscv-riscv riscv-softfloat -Libs: -Wl,-rpath,${libdir} -L${libdir} -lspike_main -Cflags: -I${includedir} -URL: http://riscv.org/download.html#tab_spike diff --git a/riscv/arith.h b/riscv/arith.h new file mode 100644 index 0000000000..35dd53051e --- /dev/null +++ b/riscv/arith.h @@ -0,0 +1,123 @@ +// See LICENSE for license details. + +#ifndef _RISCV_ARITH_H +#define _RISCV_ARITH_H + +#include +#include +#include + +inline uint64_t mulhu(uint64_t a, uint64_t b) +{ + uint64_t t; + uint32_t y1, y2, y3; + uint64_t a0 = (uint32_t)a, a1 = a >> 32; + uint64_t b0 = (uint32_t)b, b1 = b >> 32; + + t = a1*b0 + ((a0*b0) >> 32); + y1 = t; + y2 = t >> 32; + + t = a0*b1 + y1; + y1 = t; + + t = a1*b1 + y2 + (t >> 32); + y2 = t; + y3 = t >> 32; + + return ((uint64_t)y3 << 32) | y2; +} + +inline int64_t mulh(int64_t a, int64_t b) +{ + int negate = (a < 0) != (b < 0); + uint64_t res = mulhu(a < 0 ? -a : a, b < 0 ? -b : b); + return negate ? ~res + (a * b == 0) : res; +} + +inline int64_t mulhsu(int64_t a, uint64_t b) +{ + int negate = a < 0; + uint64_t res = mulhu(a < 0 ? -a : a, b); + return negate ? ~res + (a * b == 0) : res; +} + +//ref: https://locklessinc.com/articles/sat_arithmetic/ +template +static inline T sat_add(T x, T y, bool &sat) +{ + UT ux = x; + UT uy = y; + UT res = ux + uy; + sat = false; + int sh = sizeof(T) * 8 - 1; + + /* Calculate overflowed result. (Don't change the sign bit of ux) */ + ux = (ux >> sh) + (((UT)0x1 << sh) - 1); + + /* Force compiler to use cmovns instruction */ + if ((T) ((ux ^ uy) | ~(uy ^ res)) >= 0) { + res = ux; + sat = true; + } + + return res; +} + +template +static inline T sat_sub(T x, T y, bool &sat) +{ + UT ux = x; + UT uy = y; + UT res = ux - uy; + sat = false; + int sh = sizeof(T) * 8 - 1; + + /* Calculate overflowed result. (Don't change the sign bit of ux) */ + ux = (ux >> sh) + (((UT)0x1 << sh) - 1); + + /* Force compiler to use cmovns instruction */ + if ((T) ((ux ^ uy) & (ux ^ res)) < 0) { + res = ux; + sat = true; + } + + return res; +} + +template +T sat_addu(T x, T y, bool &sat) +{ + T res = x + y; + sat = false; + + sat = res < x; + res |= -(res < x); + + return res; +} + +template +T sat_subu(T x, T y, bool &sat) +{ + T res = x - y; + sat = false; + + sat = !(res <= x); + res &= -(res <= x); + + return res; +} + +static inline uint64_t extract64(uint64_t val, int pos, int len) +{ + assert(pos >= 0 && len > 0 && len <= 64 - pos); + return (val >> pos) & (~UINT64_C(0) >> (64 - len)); +} + +static inline uint64_t make_mask64(int pos, int len) +{ + assert(pos >= 0 && len > 0 && pos < 64 && len <= 64); + return (UINT64_MAX >> (64 - len)) << pos; +} +#endif diff --git a/riscv/byteorder.h b/riscv/byteorder.h new file mode 100644 index 0000000000..393a70bd7b --- /dev/null +++ b/riscv/byteorder.h @@ -0,0 +1,30 @@ +// See LICENSE for license details. + +#ifndef _RISCV_BYTEORDER_H +#define _RISCV_BYTEORDER_H + +#include "config.h" +#include + +static inline uint8_t swap(uint8_t n) { return n; } +static inline uint16_t swap(uint16_t n) { return __builtin_bswap16(n); } +static inline uint32_t swap(uint32_t n) { return __builtin_bswap32(n); } +static inline uint64_t swap(uint64_t n) { return __builtin_bswap64(n); } +static inline int8_t swap(int8_t n) { return n; } +static inline int16_t swap(int16_t n) { return __builtin_bswap16(n); } +static inline int32_t swap(int32_t n) { return __builtin_bswap32(n); } +static inline int64_t swap(int64_t n) { return __builtin_bswap64(n); } + +#ifdef WORDS_BIGENDIAN +template static inline T from_be(T n) { return n; } +template static inline T to_be(T n) { return n; } +template static inline T from_le(T n) { return swap(n); } +template static inline T to_le(T n) { return swap(n); } +#else +template static inline T from_le(T n) { return n; } +template static inline T to_le(T n) { return n; } +template static inline T from_be(T n) { return swap(n); } +template static inline T to_be(T n) { return swap(n); } +#endif + +#endif diff --git a/riscv/clint.cc b/riscv/clint.cc index 7fd4f0cc1b..aee995bfbb 100644 --- a/riscv/clint.cc +++ b/riscv/clint.cc @@ -1,10 +1,16 @@ +#include #include "devices.h" #include "processor.h" -clint_t::clint_t(std::vector& procs) - : procs(procs), mtimecmp(procs.size()) +clint_t::clint_t(std::vector& procs, uint64_t freq_hz, bool real_time) + : procs(procs), freq_hz(freq_hz), real_time(real_time), mtime(0), mtimecmp(procs.size()) { - mtime = 0; + struct timeval base; + + gettimeofday(&base, NULL); + + real_time_ref_secs = base.tv_sec; + real_time_ref_usecs = base.tv_usec; } /* 0000 msip hart 0 @@ -23,6 +29,7 @@ clint_t::clint_t(std::vector& procs) bool clint_t::load(reg_t addr, size_t len, uint8_t* bytes) { + increment(0); if (addr >= MSIP_BASE && addr + len <= MSIP_BASE + procs.size()*sizeof(msip_t)) { std::vector msip(procs.size()); for (size_t i = 0; i < procs.size(); ++i) @@ -64,7 +71,16 @@ bool clint_t::store(reg_t addr, size_t len, const uint8_t* bytes) void clint_t::increment(reg_t inc) { - mtime += inc; + if (real_time) { + struct timeval now; + uint64_t diff_usecs; + + gettimeofday(&now, NULL); + diff_usecs = ((now.tv_sec - real_time_ref_secs) * 1000000) + (now.tv_usec - real_time_ref_usecs); + mtime = diff_usecs * freq_hz / 1000000; + } else { + mtime += inc; + } for (size_t i = 0; i < procs.size(); i++) { procs[i]->state.mip &= ~MIP_MTIP; if (mtime >= mtimecmp[i]) diff --git a/riscv/common.h b/riscv/common.h index 8ddd9849d8..3c523d00d7 100644 --- a/riscv/common.h +++ b/riscv/common.h @@ -6,4 +6,6 @@ #define likely(x) __builtin_expect(x, 1) #define unlikely(x) __builtin_expect(x, 0) +#define NOINLINE __attribute__ ((noinline)) + #endif diff --git a/riscv/debug_defines.h b/riscv/debug_defines.h index d6ddd4ff1e..e6c2c5d3ea 100644 --- a/riscv/debug_defines.h +++ b/riscv/debug_defines.h @@ -84,8 +84,7 @@ /* * 0: Version described in spec version 0.11. * -* 1: Version described in spec version 0.13 (and later?), which -* reduces the DMI data width to 32 bits. +* 1: Version described in spec version 0.13. * * 15: Version not described in any available version of this spec. */ @@ -134,7 +133,7 @@ * cleared by writing \Fdmireset in \Rdtmcs. * * This indicates that the DM itself responded with an error. -* Note: there are no specified cases in which the DM would +* There are no specified cases in which the DM would * respond with an error, and DMI is not required to support * returning errors. * @@ -145,11 +144,6 @@ * needs to give the target more TCK edges between Update-DR and * Capture-DR. The simplest way to do that is to add extra transitions * in Run-Test/Idle. -* -* (The DTM, DM, and/or component may be in different clock domains, -* so synchronization may be required. Some relatively fixed number of -* TCK ticks may be needed for the request to reach the DM, complete, -* and for the response to be synchronized back into the TCK domain.) */ #define DTM_DMI_OP_OFFSET 0 #define DTM_DMI_OP_LENGTH 2 @@ -167,20 +161,28 @@ #define CSR_DCSR_XDEBUGVER_LENGTH 4 #define CSR_DCSR_XDEBUGVER (0xfU << CSR_DCSR_XDEBUGVER_OFFSET) /* -* When 1, {\tt ebreak} instructions in Machine Mode enter Debug Mode. +* 0: {\tt ebreak} instructions in M-mode behave as described in the +* Privileged Spec. +* +* 1: {\tt ebreak} instructions in M-mode enter Debug Mode. */ #define CSR_DCSR_EBREAKM_OFFSET 15 #define CSR_DCSR_EBREAKM_LENGTH 1 #define CSR_DCSR_EBREAKM (0x1U << CSR_DCSR_EBREAKM_OFFSET) /* -* When 1, {\tt ebreak} instructions in Supervisor Mode enter Debug Mode. +* 0: {\tt ebreak} instructions in S-mode behave as described in the +* Privileged Spec. +* +* 1: {\tt ebreak} instructions in S-mode enter Debug Mode. */ #define CSR_DCSR_EBREAKS_OFFSET 13 #define CSR_DCSR_EBREAKS_LENGTH 1 #define CSR_DCSR_EBREAKS (0x1U << CSR_DCSR_EBREAKS_OFFSET) /* -* When 1, {\tt ebreak} instructions in User/Application Mode enter -* Debug Mode. +* 0: {\tt ebreak} instructions in U-mode behave as described in the +* Privileged Spec. +* +* 1: {\tt ebreak} instructions in U-mode enter Debug Mode. */ #define CSR_DCSR_EBREAKU_OFFSET 12 #define CSR_DCSR_EBREAKU_LENGTH 1 @@ -191,9 +193,10 @@ * 1: Interrupts are enabled during single stepping. * * Implementations may hard wire this bit to 0. -* The debugger must read back the value it -* writes to check whether the feature is supported. If not -* supported, interrupt behavior can be emulated by the debugger. +* In that case interrupt behavior can be emulated by the debugger. +* +* The debugger must not change the value of this bit while the hart +* is running. */ #define CSR_DCSR_STEPIE_OFFSET 11 #define CSR_DCSR_STEPIE_LENGTH 1 @@ -201,14 +204,13 @@ /* * 0: Increment counters as usual. * -* 1: Don't increment any counters while in Debug Mode or on {\tt -* ebreak} instructions that cause entry into Debug Mode. These -* counters include the {\tt cycle} and {\tt instret} CSRs. This is -* preferred for most debugging scenarios. +* 1: Don't increment any hart-local counters while in Debug Mode or +* on {\tt ebreak} instructions that cause entry into Debug Mode. +* These counters include the {\tt instret} CSR. On single-hart cores +* {\tt cycle} should be stopped, but on multi-hart cores it must keep +* incrementing. * -* An implementation may choose not to support writing to this bit. -* The debugger must read back the value it writes to check whether -* the feature is supported. +* An implementation may hardwire this bit to 0 or 1. */ #define CSR_DCSR_STOPCOUNT_OFFSET 10 #define CSR_DCSR_STOPCOUNT_LENGTH 1 @@ -218,9 +220,7 @@ * * 1: Don't increment any hart-local timers while in Debug Mode. * -* An implementation may choose not to support writing to this bit. -* The debugger must read back the value it writes to check whether -* the feature is supported. +* An implementation may hardwire this bit to 0 or 1. */ #define CSR_DCSR_STOPTIME_OFFSET 9 #define CSR_DCSR_STOPTIME_LENGTH 1 @@ -236,9 +236,16 @@ * * 2: The Trigger Module caused a breakpoint exception. (priority 4) * -* 3: The debugger requested entry to Debug Mode. (priority 2) +* 3: The debugger requested entry to Debug Mode using \Fhaltreq. +* (priority 1) * -* 4: The hart single stepped because \Fstep was set. (priority 1) +* 4: The hart single stepped because \Fstep was set. (priority 0, lowest) +* +* 5: The hart halted directly out of reset due to \Fresethaltreq. It +* is also acceptable to report 3 when this happens. (priority 2) +* +* 6: The hart halted because it's part of a halt group. (priority 5, +* highest) Harts may report 3 for this cause instead. * * Other values are reserved for future use. */ @@ -246,10 +253,11 @@ #define CSR_DCSR_CAUSE_LENGTH 3 #define CSR_DCSR_CAUSE (0x7U << CSR_DCSR_CAUSE_OFFSET) /* -* When 1, \Fmprv in \Rmstatus takes effect during debug mode. -* When 0, it is ignored during debug mode. -* Implementing this bit is optional. -* If not implemented it should be tied to 0. +* 0: \Fmprv in \Rmstatus is ignored in Debug Mode. +* +* 1: \Fmprv in \Rmstatus takes effect in Debug Mode. +* +* Implementing this bit is optional. It may be tied to either 0 or 1. */ #define CSR_DCSR_MPRVEN_OFFSET 4 #define CSR_DCSR_MPRVEN_LENGTH 1 @@ -270,6 +278,9 @@ * If the instruction does not complete due to an exception, * the hart will immediately enter Debug Mode before executing * the trap handler, with appropriate exception registers set. +* +* The debugger must not change the value of this bit while the hart +* is running. */ #define CSR_DCSR_STEP_OFFSET 2 #define CSR_DCSR_STEP_LENGTH 1 @@ -289,14 +300,14 @@ #define CSR_DCSR_PRV (0x3U << CSR_DCSR_PRV_OFFSET) #define CSR_DPC 0x7b1 #define CSR_DPC_DPC_OFFSET 0 -#define CSR_DPC_DPC_LENGTH MXLEN -#define CSR_DPC_DPC (((1L<0, 2->1, 3->2, 4->2 +static unsigned field_width(unsigned n) +{ + unsigned i = 0; + n -= 1; + while (n) { + i++; + n >>= 1; + } + return i; +} + ///////////////////////// debug_module_t -debug_module_t::debug_module_t(sim_t *sim, unsigned progbufsize, unsigned max_bus_master_bits, - bool require_authentication) : - progbufsize(progbufsize), - program_buffer_bytes(4 + 4*progbufsize), - max_bus_master_bits(max_bus_master_bits), - require_authentication(require_authentication), +debug_module_t::debug_module_t(sim_t *sim, const debug_module_config_t &config) : + nprocs(sim->nprocs()), + config(config), + program_buffer_bytes((config.support_impebreak ? 4 : 0) + 4*config.progbufsize), debug_progbuf_start(debug_data_start - program_buffer_bytes), debug_abstract_start(debug_progbuf_start - debug_abstract_size*4), custom_base(0), - sim(sim) + hartsellen(field_width(sim->nprocs())), + sim(sim), + // The spec lets a debugger select nonexistent harts. Create hart_state for + // them because I'm too lazy to add the code to just ignore accesses. + hart_state(1 << field_width(sim->nprocs())), + hart_array_mask(sim->nprocs()), + rti_remaining(0) { D(fprintf(stderr, "debug_data_start=0x%x\n", debug_data_start)); D(fprintf(stderr, "debug_progbuf_start=0x%x\n", debug_progbuf_start)); D(fprintf(stderr, "debug_abstract_start=0x%x\n", debug_abstract_start)); + assert(nprocs <= 1024); + program_buffer = new uint8_t[program_buffer_bytes]; - memset(halted, 0, sizeof(halted)); memset(debug_rom_flags, 0, sizeof(debug_rom_flags)); - memset(resumeack, 0, sizeof(resumeack)); - memset(havereset, 0, sizeof(havereset)); memset(program_buffer, 0, program_buffer_bytes); - program_buffer[4*progbufsize] = ebreak(); - program_buffer[4*progbufsize+1] = ebreak() >> 8; - program_buffer[4*progbufsize+2] = ebreak() >> 16; - program_buffer[4*progbufsize+3] = ebreak() >> 24; memset(dmdata, 0, sizeof(dmdata)); + if (config.support_impebreak) { + program_buffer[4*config.progbufsize] = ebreak(); + program_buffer[4*config.progbufsize+1] = ebreak() >> 8; + program_buffer[4*config.progbufsize+2] = ebreak() >> 16; + program_buffer[4*config.progbufsize+3] = ebreak() >> 24; + } + write32(debug_rom_whereto, 0, jal(ZERO, debug_abstract_start - DEBUG_ROM_WHERETO)); @@ -60,37 +80,38 @@ debug_module_t::~debug_module_t() void debug_module_t::reset() { + assert(sim->nprocs() > 0); for (unsigned i = 0; i < sim->nprocs(); i++) { processor_t *proc = sim->get_core(i); if (proc) - proc->halt_request = false; + proc->halt_request = proc->HR_NONE; } dmcontrol = {0}; dmstatus = {0}; - dmstatus.impebreak = true; - dmstatus.authenticated = !require_authentication; + dmstatus.impebreak = config.support_impebreak; + dmstatus.authenticated = !config.require_authentication; dmstatus.version = 2; abstractcs = {0}; abstractcs.datacount = sizeof(dmdata) / 4; - abstractcs.progbufsize = progbufsize; + abstractcs.progbufsize = config.progbufsize; abstractauto = {0}; sbcs = {0}; - if (max_bus_master_bits > 0) { + if (config.max_bus_master_bits > 0) { sbcs.version = 1; sbcs.asize = sizeof(reg_t) * 8; } - if (max_bus_master_bits >= 64) + if (config.max_bus_master_bits >= 64) sbcs.access64 = true; - if (max_bus_master_bits >= 32) + if (config.max_bus_master_bits >= 32) sbcs.access32 = true; - if (max_bus_master_bits >= 16) + if (config.max_bus_master_bits >= 16) sbcs.access16 = true; - if (max_bus_master_bits >= 8) + if (config.max_bus_master_bits >= 8) sbcs.access8 = true; challenge = random(); @@ -135,8 +156,8 @@ bool debug_module_t::load(reg_t addr, size_t len, uint8_t* bytes) return true; } - fprintf(stderr, "ERROR: invalid load from debug module: %zd bytes at 0x%016" - PRIx64 "\n", len, addr); + D(fprintf(stderr, "ERROR: invalid load from debug module: %zd bytes at 0x%016" + PRIx64 "\n", len, addr)); return false; } @@ -179,11 +200,24 @@ bool debug_module_t::store(reg_t addr, size_t len, const uint8_t* bytes) if (addr == DEBUG_ROM_HALTED) { assert (len == 4); - halted[id] = true; + if (!hart_state[id].halted) { + hart_state[id].halted = true; + if (hart_state[id].haltgroup) { + for (unsigned i = 0; i < nprocs; i++) { + if (!hart_state[i].halted && + hart_state[i].haltgroup == hart_state[id].haltgroup) { + processor_t *proc = sim->get_core(i); + proc->halt_request = proc->HR_GROUP; + // TODO: What if the debugger comes and writes dmcontrol before the + // halt occurs? + } + } + } + } if (dmcontrol.hartsel == id) { if (0 == (debug_rom_flags[id] & (1 << DEBUG_ROM_FLAG_GO))){ if (dmcontrol.hartsel == id) { - abstractcs.busy = false; + abstract_command_completed = true; } } } @@ -191,14 +225,15 @@ bool debug_module_t::store(reg_t addr, size_t len, const uint8_t* bytes) } if (addr == DEBUG_ROM_GOING) { - debug_rom_flags[dmcontrol.hartsel] &= ~(1 << DEBUG_ROM_FLAG_GO); + assert(len == 4); + debug_rom_flags[id] &= ~(1 << DEBUG_ROM_FLAG_GO); return true; } if (addr == DEBUG_ROM_RESUMING) { assert (len == 4); - halted[id] = false; - resumeack[id] = true; + hart_state[id].halted = false; + hart_state[id].resumeack = true; debug_rom_flags[id] &= ~(1 << DEBUG_ROM_FLAG_RESUME); return true; } @@ -210,8 +245,8 @@ bool debug_module_t::store(reg_t addr, size_t len, const uint8_t* bytes) return true; } - fprintf(stderr, "ERROR: invalid store to debug module: %zd bytes at 0x%016" - PRIx64 "\n", len, addr); + D(fprintf(stderr, "ERROR: invalid store to debug module: %zd bytes at 0x%016" + PRIx64 "\n", len, addr)); return false; } @@ -234,16 +269,25 @@ uint32_t debug_module_t::read32(uint8_t *memory, unsigned int index) return value; } -processor_t *debug_module_t::current_proc() const +processor_t *debug_module_t::processor(unsigned hartid) const { processor_t *proc = NULL; try { - proc = sim->get_core(dmcontrol.hartsel); + proc = sim->get_core(hartid); } catch (const std::out_of_range&) { } return proc; } +bool debug_module_t::hart_selected(unsigned hartid) const +{ + if (dmcontrol.hasel) { + return hartid == dmcontrol.hartsel || hart_array_mask[hartid]; + } else { + return hartid == dmcontrol.hartsel; + } +} + unsigned debug_module_t::sb_access_bits() { return 8 << sbcs.sbaccess; @@ -251,7 +295,7 @@ unsigned debug_module_t::sb_access_bits() void debug_module_t::sb_autoincrement() { - if (!sbcs.autoincrement || !max_bus_master_bits) + if (!sbcs.autoincrement || !config.max_bus_master_bits) return; uint64_t value = sbaddress[0] + sb_access_bits() / 8; @@ -273,13 +317,13 @@ void debug_module_t::sb_read() { reg_t address = ((uint64_t) sbaddress[1] << 32) | sbaddress[0]; try { - if (sbcs.sbaccess == 0 && max_bus_master_bits >= 8) { + if (sbcs.sbaccess == 0 && config.max_bus_master_bits >= 8) { sbdata[0] = sim->debug_mmu->load_uint8(address); - } else if (sbcs.sbaccess == 1 && max_bus_master_bits >= 16) { + } else if (sbcs.sbaccess == 1 && config.max_bus_master_bits >= 16) { sbdata[0] = sim->debug_mmu->load_uint16(address); - } else if (sbcs.sbaccess == 2 && max_bus_master_bits >= 32) { + } else if (sbcs.sbaccess == 2 && config.max_bus_master_bits >= 32) { sbdata[0] = sim->debug_mmu->load_uint32(address); - } else if (sbcs.sbaccess == 3 && max_bus_master_bits >= 64) { + } else if (sbcs.sbaccess == 3 && config.max_bus_master_bits >= 64) { uint64_t value = sim->debug_mmu->load_uint64(address); sbdata[0] = value; sbdata[1] = value >> 32; @@ -295,13 +339,13 @@ void debug_module_t::sb_write() { reg_t address = ((uint64_t) sbaddress[1] << 32) | sbaddress[0]; D(fprintf(stderr, "sb_write() 0x%x @ 0x%lx\n", sbdata[0], address)); - if (sbcs.sbaccess == 0 && max_bus_master_bits >= 8) { + if (sbcs.sbaccess == 0 && config.max_bus_master_bits >= 8) { sim->debug_mmu->store_uint8(address, sbdata[0]); - } else if (sbcs.sbaccess == 1 && max_bus_master_bits >= 16) { + } else if (sbcs.sbaccess == 1 && config.max_bus_master_bits >= 16) { sim->debug_mmu->store_uint16(address, sbdata[0]); - } else if (sbcs.sbaccess == 2 && max_bus_master_bits >= 32) { + } else if (sbcs.sbaccess == 2 && config.max_bus_master_bits >= 32) { sim->debug_mmu->store_uint32(address, sbdata[0]); - } else if (sbcs.sbaccess == 3 && max_bus_master_bits >= 64) { + } else if (sbcs.sbaccess == 3 && config.max_bus_master_bits >= 64) { sim->debug_mmu->store_uint64(address, (((uint64_t) sbdata[1]) << 32) | sbdata[0]); } else { @@ -318,7 +362,7 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) result = read32(dmdata, i); if (abstractcs.busy) { result = -1; - fprintf(stderr, "\ndmi_read(0x%02x (data[%d]) -> -1 because abstractcs.busy==true\n", address, i); + D(fprintf(stderr, "\ndmi_read(0x%02x (data[%d]) -> -1 because abstractcs.busy==true\n", address, i)); } if (abstractcs.busy && abstractcs.cmderr == CMDERR_NONE) { @@ -328,12 +372,12 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) if (!abstractcs.busy && ((abstractauto.autoexecdata >> i) & 1)) { perform_abstract_command(); } - } else if (address >= DMI_PROGBUF0 && address < DMI_PROGBUF0 + progbufsize) { + } else if (address >= DMI_PROGBUF0 && address < DMI_PROGBUF0 + config.progbufsize) { unsigned i = address - DMI_PROGBUF0; result = read32(program_buffer, i); if (abstractcs.busy) { result = -1; - fprintf(stderr, "\ndmi_read(0x%02x (progbuf[%d]) -> -1 because abstractcs.busy==true\n", address, i); + D(fprintf(stderr, "\ndmi_read(0x%02x (progbuf[%d]) -> -1 because abstractcs.busy==true\n", address, i)); } if (!abstractcs.busy && ((abstractauto.autoexecprogbuf >> i) & 1)) { perform_abstract_command(); @@ -343,14 +387,11 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) switch (address) { case DMI_DMCONTROL: { - processor_t *proc = current_proc(); - if (proc) - dmcontrol.haltreq = proc->halt_request; - result = set_field(result, DMI_DMCONTROL_HALTREQ, dmcontrol.haltreq); result = set_field(result, DMI_DMCONTROL_RESUMEREQ, dmcontrol.resumereq); result = set_field(result, DMI_DMCONTROL_HARTSELHI, dmcontrol.hartsel >> DMI_DMCONTROL_HARTSELLO_LENGTH); + result = set_field(result, DMI_DMCONTROL_HASEL, dmcontrol.hasel); result = set_field(result, DMI_DMCONTROL_HARTSELLO, dmcontrol.hartsel); result = set_field(result, DMI_DMCONTROL_HARTRESET, dmcontrol.hartreset); result = set_field(result, DMI_DMCONTROL_NDMRESET, dmcontrol.ndmreset); @@ -359,42 +400,45 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) break; case DMI_DMSTATUS: { - processor_t *proc = current_proc(); - - dmstatus.allnonexistant = false; - dmstatus.allunavail = false; - dmstatus.allrunning = false; - dmstatus.allhalted = false; - dmstatus.allresumeack = false; - if (proc) { - if (halted[dmcontrol.hartsel]) { - dmstatus.allhalted = true; - } else { - dmstatus.allrunning = true; - } - } else { - dmstatus.allnonexistant = true; - } - dmstatus.anynonexistant = dmstatus.allnonexistant; - dmstatus.anyunavail = dmstatus.allunavail; - dmstatus.anyrunning = dmstatus.allrunning; - dmstatus.anyhalted = dmstatus.allhalted; - if (proc) { - if (resumeack[dmcontrol.hartsel]) { - dmstatus.allresumeack = true; - } else { - dmstatus.allresumeack = false; + dmstatus.allhalted = true; + dmstatus.anyhalted = false; + dmstatus.allrunning = true; + dmstatus.anyrunning = false; + dmstatus.allnonexistant = true; + dmstatus.allresumeack = true; + dmstatus.anyresumeack = false; + for (unsigned i = 0; i < nprocs; i++) { + if (hart_selected(i)) { + dmstatus.allnonexistant = false; + if (hart_state[i].resumeack) { + dmstatus.anyresumeack = true; + } else { + dmstatus.allresumeack = false; + } + if (hart_state[i].halted) { + dmstatus.allrunning = false; + dmstatus.anyhalted = true; + } else { + dmstatus.allhalted = false; + dmstatus.anyrunning = true; + } } - } else { - dmstatus.allresumeack = false; } + // We don't allow selecting non-existant harts through + // hart_array_mask, so the only way it's possible is by writing a + // non-existant hartsel. + dmstatus.anynonexistant = (dmcontrol.hartsel >= nprocs); + + dmstatus.allunavail = false; + dmstatus.anyunavail = false; + result = set_field(result, DMI_DMSTATUS_IMPEBREAK, dmstatus.impebreak); result = set_field(result, DMI_DMSTATUS_ALLHAVERESET, - havereset[dmcontrol.hartsel]); + hart_state[dmcontrol.hartsel].havereset); result = set_field(result, DMI_DMSTATUS_ANYHAVERESET, - havereset[dmcontrol.hartsel]); + hart_state[dmcontrol.hartsel].havereset); result = set_field(result, DMI_DMSTATUS_ALLNONEXISTENT, dmstatus.allnonexistant); result = set_field(result, DMI_DMSTATUS_ALLUNAVAIL, dmstatus.allunavail); result = set_field(result, DMI_DMSTATUS_ALLRUNNING, dmstatus.allrunning); @@ -430,6 +474,20 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) result = set_field(result, DMI_HARTINFO_DATASIZE, abstractcs.datacount); result = set_field(result, DMI_HARTINFO_DATAADDR, debug_data_start); break; + case DMI_HAWINDOWSEL: + result = hawindowsel; + break; + case DMI_HAWINDOW: + { + unsigned base = hawindowsel * 32; + for (unsigned i = 0; i < 32; i++) { + unsigned n = base + i; + if (n < nprocs && hart_array_mask[n]) { + result |= 1 << i; + } + } + } + break; case DMI_SBCS: result = set_field(result, DMI_SBCS_SBVERSION, sbcs.version); result = set_field(result, DMI_SBCS_SBREADONADDR, sbcs.readonaddr); @@ -459,10 +517,12 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) case DMI_SBDATA0: result = sbdata[0]; if (sbcs.error == 0) { - sb_autoincrement(); if (sbcs.readondata) { sb_read(); } + if (sbcs.error == 0) { + sb_autoincrement(); + } } break; case DMI_SBDATA1: @@ -477,6 +537,10 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) case DMI_AUTHDATA: result = challenge; break; + case DMI_DMCS2: + result = set_field(result, DMI_DMCS2_HALTGROUP, + hart_state[dmcontrol.hartsel].haltgroup); + break; default: result = 0; D(fprintf(stderr, "Unexpected. Returning Error.")); @@ -488,6 +552,22 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) return true; } +void debug_module_t::run_test_idle() +{ + if (rti_remaining > 0) { + rti_remaining--; + } + if (rti_remaining == 0 && abstractcs.busy && abstract_command_completed) { + abstractcs.busy = false; + } +} + +static bool is_fpu_reg(unsigned regno) +{ + return (regno >= 0x1020 && regno <= 0x103f) || regno == CSR_FFLAGS || + regno == CSR_FRM || regno == CSR_FCSR; +} + bool debug_module_t::perform_abstract_command() { if (abstractcs.cmderr != CMDERR_NONE) @@ -499,11 +579,11 @@ bool debug_module_t::perform_abstract_command() if ((command >> 24) == 0) { // register access - unsigned size = get_field(command, AC_ACCESS_REGISTER_SIZE); + unsigned size = get_field(command, AC_ACCESS_REGISTER_AARSIZE); bool write = get_field(command, AC_ACCESS_REGISTER_WRITE); unsigned regno = get_field(command, AC_ACCESS_REGISTER_REGNO); - if (!halted[dmcontrol.hartsel]) { + if (!hart_state[dmcontrol.hartsel].halted) { abstractcs.cmderr = CMDERR_HALTRESUME; return true; } @@ -511,10 +591,22 @@ bool debug_module_t::perform_abstract_command() unsigned i = 0; if (get_field(command, AC_ACCESS_REGISTER_TRANSFER)) { - if (regno < 0x1000 && progbufsize < 2) { - // Make the debugger use the program buffer if it's available, so it - // can test both use cases. - write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH)); + if (is_fpu_reg(regno)) { + // Save S0 + write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0)); + // Save mstatus + write32(debug_abstract, i++, csrr(S0, CSR_MSTATUS)); + write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH1)); + // Set mstatus.fs + assert((MSTATUS_FS & 0xfff) == 0); + write32(debug_abstract, i++, lui(S0, MSTATUS_FS >> 12)); + write32(debug_abstract, i++, csrrs(ZERO, S0, CSR_MSTATUS)); + } + + if (regno < 0x1000 && config.support_abstract_csr_access) { + if (!is_fpu_reg(regno)) { + write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0)); + } if (write) { switch (size) { @@ -544,7 +636,9 @@ bool debug_module_t::perform_abstract_command() return true; } } - write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH)); + if (!is_fpu_reg(regno)) { + write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH0)); + } } else if (regno >= 0x1000 && regno < 0x1020) { unsigned regnum = regno - 0x1000; @@ -568,9 +662,6 @@ bool debug_module_t::perform_abstract_command() } } else if (regno >= 0x1020 && regno < 0x1040) { - // Don't force the debugger to use progbuf if it exists, so the - // debugger has to make the decision not to use abstract commands to - // access 64-bit FPRs on 32-bit targets. unsigned fprnum = regno - 0x1020; if (write) { @@ -618,6 +709,14 @@ bool debug_module_t::perform_abstract_command() abstractcs.cmderr = CMDERR_NOTSUP; return true; } + + if (is_fpu_reg(regno)) { + // restore mstatus + write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH1)); + write32(debug_abstract, i++, csrw(S0, CSR_MSTATUS)); + // restore s0 + write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH0)); + } } if (get_field(command, AC_ACCESS_REGISTER_POSTEXEC)) { @@ -629,6 +728,8 @@ bool debug_module_t::perform_abstract_command() } debug_rom_flags[dmcontrol.hartsel] |= 1 << DEBUG_ROM_FLAG_GO; + rti_remaining = config.abstract_rti; + abstract_command_completed = false; abstractcs.busy = true; } else { @@ -659,7 +760,7 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) } return true; - } else if (address >= DMI_PROGBUF0 && address < DMI_PROGBUF0 + progbufsize) { + } else if (address >= DMI_PROGBUF0 && address < DMI_PROGBUF0 + config.progbufsize) { unsigned i = address - DMI_PROGBUF0; if (!abstractcs.busy) @@ -677,35 +778,47 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) if (!dmcontrol.dmactive && get_field(value, DMI_DMCONTROL_DMACTIVE)) reset(); dmcontrol.dmactive = get_field(value, DMI_DMCONTROL_DMACTIVE); - if (!dmstatus.authenticated) + if (!dmstatus.authenticated || !dmcontrol.dmactive) return true; - if (dmcontrol.dmactive) { - dmcontrol.haltreq = get_field(value, DMI_DMCONTROL_HALTREQ); - dmcontrol.resumereq = get_field(value, DMI_DMCONTROL_RESUMEREQ); - dmcontrol.hartreset = get_field(value, DMI_DMCONTROL_HARTRESET); - dmcontrol.ndmreset = get_field(value, DMI_DMCONTROL_NDMRESET); - dmcontrol.hartsel = get_field(value, DMI_DMCONTROL_HARTSELHI) << - DMI_DMCONTROL_HARTSELLO_LENGTH; - dmcontrol.hartsel |= get_field(value, DMI_DMCONTROL_HARTSELLO); - dmcontrol.hartsel &= (1L<halt_request = dmcontrol.haltreq; - if (dmcontrol.resumereq) { - debug_rom_flags[dmcontrol.hartsel] |= (1 << DEBUG_ROM_FLAG_RESUME); - resumeack[dmcontrol.hartsel] = false; + + dmcontrol.haltreq = get_field(value, DMI_DMCONTROL_HALTREQ); + dmcontrol.resumereq = get_field(value, DMI_DMCONTROL_RESUMEREQ); + dmcontrol.hartreset = get_field(value, DMI_DMCONTROL_HARTRESET); + dmcontrol.ndmreset = get_field(value, DMI_DMCONTROL_NDMRESET); + if (config.support_hasel) + dmcontrol.hasel = get_field(value, DMI_DMCONTROL_HASEL); + else + dmcontrol.hasel = 0; + dmcontrol.hartsel = get_field(value, DMI_DMCONTROL_HARTSELHI) << + DMI_DMCONTROL_HARTSELLO_LENGTH; + dmcontrol.hartsel |= get_field(value, DMI_DMCONTROL_HARTSELLO); + dmcontrol.hartsel &= (1L<halt_request = dmcontrol.haltreq ? proc->HR_REGULAR : proc->HR_NONE; + if (dmcontrol.haltreq) { + D(fprintf(stderr, "halt hart %d\n", i)); + } + if (dmcontrol.resumereq) { + D(fprintf(stderr, "resume hart %d\n", i)); + debug_rom_flags[i] |= (1 << DEBUG_ROM_FLAG_RESUME); + hart_state[i].resumeack = false; + } + if (dmcontrol.hartreset) { + proc->reset(); + } + } } - if (dmcontrol.hartreset) { - proc->reset(); - } } + if (dmcontrol.ndmreset) { for (size_t i = 0; i < sim->nprocs(); i++) { - proc = sim->get_core(i); + processor_t *proc = sim->get_core(i); proc->reset(); } } @@ -716,6 +829,22 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) command = value; return perform_abstract_command(); + case DMI_HAWINDOWSEL: + hawindowsel = value & ((1U<> i) & 1; + } + } + } + return true; + case DMI_ABSTRACTCS: abstractcs.cmderr = (cmderr_t) (((uint32_t) (abstractcs.cmderr)) & (~(uint32_t)(get_field(value, DMI_ABSTRACTCS_CMDERR)))); return true; @@ -737,6 +866,7 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) sbaddress[0] = value; if (sbcs.error == 0 && sbcs.readonaddr) { sb_read(); + sb_autoincrement(); } return true; case DMI_SBADDRESS1: @@ -752,7 +882,7 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) sbdata[0] = value; if (sbcs.error == 0) { sb_write(); - if (sbcs.autoincrement && sbcs.error == 0) { + if (sbcs.error == 0) { sb_autoincrement(); } } @@ -769,7 +899,7 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) case DMI_AUTHDATA: D(fprintf(stderr, "debug authentication: got 0x%x; 0x%x unlocks\n", value, challenge + secret)); - if (require_authentication) { + if (config.require_authentication) { if (value == challenge + secret) { dmstatus.authenticated = true; } else { @@ -778,6 +908,12 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) } } return true; + case DMI_DMCS2: + if (config.support_haltgroups && get_field(value, DMI_DMCS2_HGWRITE)) { + hart_state[dmcontrol.hartsel].haltgroup = get_field(value, + DMI_DMCS2_HALTGROUP); + } + return true; } } return false; @@ -785,6 +921,7 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) void debug_module_t::proc_reset(unsigned id) { - havereset[id] = true; - halted[id] = false; + hart_state[id].havereset = true; + hart_state[id].halted = false; + hart_state[id].haltgroup = 0; } diff --git a/riscv/debug_module.h b/riscv/debug_module.h index 5b43ed628c..2bcdba4228 100644 --- a/riscv/debug_module.h +++ b/riscv/debug_module.h @@ -8,9 +8,23 @@ class sim_t; +typedef struct { + // Size of program_buffer in 32-bit words, as exposed to the rest of the + // world. + unsigned progbufsize; + unsigned max_bus_master_bits; + bool require_authentication; + unsigned abstract_rti; + bool support_hasel; + bool support_abstract_csr_access; + bool support_haltgroups; + bool support_impebreak; +} debug_module_config_t; + typedef struct { bool haltreq; bool resumereq; + bool hasel; unsigned hartsel; bool hartreset; bool dmactive; @@ -73,6 +87,13 @@ typedef struct { bool access8; } sbcs_t; +typedef struct { + bool halted; + bool resumeack; + bool havereset; + uint8_t haltgroup; +} hart_debug_state_t; + class debug_module_t : public abstract_device_t { public: @@ -81,9 +102,11 @@ class debug_module_t : public abstract_device_t * follows: * 1. Read a 32-bit value from authdata: * 2. Write the value that was read back, plus one, to authdata. + * + * abstract_rti is extra run-test/idle cycles that each abstract command + * takes to execute. Useful for testing OpenOCD. */ - debug_module_t(sim_t *sim, unsigned progbufsize, unsigned max_bus_master_bits, - bool require_authentication); + debug_module_t(sim_t *sim, const debug_module_config_t &config); ~debug_module_t(); void add_device(bus_t *bus); @@ -97,23 +120,23 @@ class debug_module_t : public abstract_device_t bool dmi_read(unsigned address, uint32_t *value); bool dmi_write(unsigned address, uint32_t value); + // Called for every cycle the JTAG TAP spends in Run-Test/Idle. + void run_test_idle(); + // Called when one of the attached harts was reset. void proc_reset(unsigned id); private: static const unsigned datasize = 2; - // Size of program_buffer in 32-bit words, as exposed to the rest of the - // world. - unsigned progbufsize; + unsigned nprocs; + debug_module_config_t config; // Actual size of the program buffer, which is 1 word bigger than we let on // to implement the implicit ebreak at the end. unsigned program_buffer_bytes; - unsigned max_bus_master_bits; - bool require_authentication; static const unsigned debug_data_start = 0x380; unsigned debug_progbuf_start; - static const unsigned debug_abstract_size = 5; + static const unsigned debug_abstract_size = 12; unsigned debug_abstract_start; // R/W this through custom registers, to allow debuggers to test that // functionality. @@ -121,7 +144,7 @@ class debug_module_t : public abstract_device_t // We only support 1024 harts currently. More requires at least resizing // the arrays below, and their corresponding special memory regions. - static const unsigned hartsellen = 10; + unsigned hartsellen = 10; sim_t *sim; @@ -130,9 +153,7 @@ class debug_module_t : public abstract_device_t uint8_t *program_buffer; uint8_t dmdata[datasize * 4]; - bool halted[1024]; - bool resumeack[1024]; - bool havereset[1024]; + std::vector hart_state; uint8_t debug_rom_flags[1024]; void write32(uint8_t *rom, unsigned int index, uint32_t value); @@ -148,6 +169,8 @@ class debug_module_t : public abstract_device_t abstractcs_t abstractcs; abstractauto_t abstractauto; uint32_t command; + uint16_t hawindowsel; + std::vector hart_array_mask; sbcs_t sbcs; uint32_t sbaddress[4]; @@ -156,9 +179,13 @@ class debug_module_t : public abstract_device_t uint32_t challenge; const uint32_t secret = 1; - processor_t *current_proc() const; + processor_t *processor(unsigned hartid) const; + bool hart_selected(unsigned hartid) const; void reset(); bool perform_abstract_command(); + + bool abstract_command_completed; + unsigned rti_remaining; }; #endif diff --git a/riscv/decode.h b/riscv/decode.h index f9e3b6f649..d6d270af87 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -7,10 +7,7 @@ # error spike requires a two''s-complement c++ implementation #endif -#ifdef WORDS_BIGENDIAN -# error spike requires a little-endian host -#endif - +#include #include #include #include @@ -24,13 +21,25 @@ typedef int64_t sreg_t; typedef uint64_t reg_t; +#ifdef __SIZEOF_INT128__ +typedef __int128 int128_t; +typedef unsigned __int128 uint128_t; +#endif + const int NXPR = 32; const int NFPR = 32; +const int NVPR = 32; const int NCSR = 4096; #define X_RA 1 #define X_SP 2 +#define VCSR_VXRM_SHIFT 1 +#define VCSR_VXRM (0x3 << VCSR_VXRM_SHIFT) + +#define VCSR_VXSAT_SHIFT 0 +#define VCSR_VXSAT (0x1 << VCSR_VXSAT_SHIFT) + #define FP_RD_NE 0 #define FP_RD_0 1 #define FP_RD_DN 2 @@ -68,7 +77,7 @@ class insn_t public: insn_t() = default; insn_t(insn_bits_t bits) : b(bits) {} - insn_bits_t bits() { return b; } + insn_bits_t bits() { return b & ~((UINT64_MAX) << (length() * 8)); } int length() { return insn_length(b); } int64_t i_imm() { return int64_t(b) >> 20; } int64_t shamt() { return x(20, 6); } @@ -101,6 +110,32 @@ class insn_t uint64_t rvc_rs2() { return x(2, 5); } uint64_t rvc_rs1s() { return 8 + x(7, 3); } uint64_t rvc_rs2s() { return 8 + x(2, 3); } + + uint64_t v_vm() { return x(25, 1); } + uint64_t v_wd() { return x(26, 1); } + uint64_t v_nf() { return x(29, 3); } + uint64_t v_simm5() { return xs(15, 5); } + uint64_t v_zimm5() { return x(15, 5); } + uint64_t v_zimm11() { return x(20, 11); } + uint64_t v_lmul() { return x(20, 2); } + uint64_t v_frac_lmul() { return x(22, 1); } + uint64_t v_sew() { return 1 << (x(23, 3) + 3); } + uint64_t v_width() { return x(12, 3); } + uint64_t v_mop() { return x(26, 2); } + uint64_t v_lumop() { return x(20, 5); } + uint64_t v_sumop() { return x(20, 5); } + uint64_t v_vta() { return x(26, 1); } + uint64_t v_vma() { return x(27, 1); } + uint64_t v_mew() { return x(28, 1); } + + // Xpulpimg + uint64_t p_zimm5() { return x(20, 5); } + int64_t p_simm5() { return xs(20, 5); } + uint64_t p_rs3() { return x(7, 5); } + uint64_t p_zimm6() { return x(25,1) + (x(20, 5) << 1); } + int64_t p_simm6() { return x(25,1) + (xs(20, 5) << 1); } + + private: insn_bits_t b; uint64_t x(int lo, int len) { return (b >> lo) & ((insn_bits_t(1) << len)-1); } @@ -121,6 +156,14 @@ class regfile_t { return data[i]; } + regfile_t() + { + reset(); + } + void reset() + { + memset(data, 0, sizeof(data)); + } private: T data[N]; }; @@ -128,26 +171,38 @@ class regfile_t // helpful macros, etc #define MMU (*p->get_mmu()) #define STATE (*p->get_state()) +#define P (*p) +#define FLEN (p->get_flen()) #define READ_REG(reg) STATE.XPR[reg] #define READ_FREG(reg) STATE.FPR[reg] +#define RD READ_REG(insn.rd()) #define RS1 READ_REG(insn.rs1()) #define RS2 READ_REG(insn.rs2()) +#define RS3 READ_REG(insn.rs3()) #define WRITE_RD(value) WRITE_REG(insn.rd(), value) #ifndef RISCV_ENABLE_COMMITLOG # define WRITE_REG(reg, value) STATE.XPR.write(reg, value) # define WRITE_FREG(reg, value) DO_WRITE_FREG(reg, freg(value)) +# define WRITE_VSTATUS #else + /* 0 : int + * 1 : floating + * 2 : vector reg + * 3 : vector hint + * 4 : csr + */ # define WRITE_REG(reg, value) ({ \ reg_t wdata = (value); /* value may have side effects */ \ - STATE.log_reg_write = (commit_log_reg_t){(reg) << 1, {wdata, 0}}; \ + STATE.log_reg_write[(reg) << 4] = {wdata, 0}; \ STATE.XPR.write(reg, wdata); \ }) # define WRITE_FREG(reg, value) ({ \ freg_t wdata = freg(value); /* value may have side effects */ \ - STATE.log_reg_write = (commit_log_reg_t){((reg) << 1) | 1, wdata}; \ + STATE.log_reg_write[((reg) << 4) | 1] = wdata; \ DO_WRITE_FREG(reg, wdata); \ }) +# define WRITE_VSTATUS STATE.log_reg_write[3] = {0, 0}; #endif // RVC macros @@ -168,34 +223,82 @@ class regfile_t #define FRS3 READ_FREG(insn.rs3()) #define dirty_fp_state (STATE.mstatus |= MSTATUS_FS | (xlen == 64 ? MSTATUS64_SD : MSTATUS32_SD)) #define dirty_ext_state (STATE.mstatus |= MSTATUS_XS | (xlen == 64 ? MSTATUS64_SD : MSTATUS32_SD)) +#define dirty_vs_state (STATE.mstatus |= MSTATUS_VS | (xlen == 64 ? MSTATUS64_SD : MSTATUS32_SD)) #define DO_WRITE_FREG(reg, value) (STATE.FPR.write(reg, value), dirty_fp_state) #define WRITE_FRD(value) WRITE_FREG(insn.rd(), value) - + #define SHAMT (insn.i_imm() & 0x3F) #define BRANCH_TARGET (pc + insn.sb_imm()) #define JUMP_TARGET (pc + insn.uj_imm()) #define RM ({ int rm = insn.rm(); \ if(rm == 7) rm = STATE.frm; \ - if(rm > 4) throw trap_illegal_instruction(0); \ + if(rm > 4) throw trap_illegal_instruction(insn.bits()); \ rm; }) #define get_field(reg, mask) (((reg) & (decltype(reg))(mask)) / ((mask) & ~((mask) << 1))) #define set_field(reg, mask, val) (((reg) & ~(decltype(reg))(mask)) | (((decltype(reg))(val) * ((mask) & ~((mask) << 1))) & (decltype(reg))(mask))) -#define require(x) if (unlikely(!(x))) throw trap_illegal_instruction(0) +#define require(x) if (unlikely(!(x))) throw trap_illegal_instruction(insn.bits()) #define require_privilege(p) require(STATE.prv >= (p)) +#define require_novirt() if (unlikely(STATE.v)) throw trap_virtual_instruction(insn.bits()) #define require_rv64 require(xlen == 64) #define require_rv32 require(xlen == 32) #define require_extension(s) require(p->supports_extension(s)) #define require_fp require((STATE.mstatus & MSTATUS_FS) != 0) #define require_accelerator require((STATE.mstatus & MSTATUS_XS) != 0) +#define require_vector_vs require((STATE.mstatus & MSTATUS_VS) != 0); +#define require_vector(alu) \ + do { \ + require_vector_vs; \ + require_extension('V'); \ + require(!P.VU.vill); \ + if (alu && !P.VU.vstart_alu) \ + require(P.VU.vstart == 0); \ + WRITE_VSTATUS; \ + dirty_vs_state; \ + } while (0); +#define require_vector_novtype(is_log, alu) \ + do { \ + require_vector_vs; \ + require_extension('V'); \ + if (alu && !P.VU.vstart_alu) \ + require(P.VU.vstart == 0); \ + if (is_log) \ + WRITE_VSTATUS; \ + dirty_vs_state; \ + } while (0); +#define require_align(val, pos) require(is_aligned(val, pos)) +#define require_noover(astart, asize, bstart, bsize) \ + require(!is_overlapped(astart, asize, bstart, bsize)) +#define require_noover_widen(astart, asize, bstart, bsize) \ + require(!is_overlapped_widen(astart, asize, bstart, bsize)) +#define require_vm do { if (insn.v_vm() == 0) require(insn.rd() != 0);} while(0); + #define set_fp_exceptions ({ if (softfloat_exceptionFlags) { \ dirty_fp_state; \ STATE.fflags |= softfloat_exceptionFlags; \ } \ softfloat_exceptionFlags = 0; }) +// Xpulpimg macros +#define sext16(x) ((sreg_t)(int16_t)(x)) +#define zext16(x) ((reg_t)(uint16_t)(x)) + +#define sext8(x) ((sreg_t)(int8_t)(x)) +#define zext8(x) ((reg_t)(uint8_t)(x)) + +#define P_RS3 READ_REG(insn.p_rs3()) /* same as RD, just different semantical value */ +#define WRITE_RS1(value) WRITE_REG(insn.rs1(), value) + +#define RS1_H(i) ((RS1 >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rs1 half: i should only be 0 or 1 */ +#define RS1_B(i) ((RS1 >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rs1 byte: i should only be from 0 to 3 */ +#define RS2_H(i) ((RS2 >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rs2 half: i should only be 0 or 1 */ +#define RS2_B(i) ((RS2 >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rs2 byte: i should only be from 0 to 3 */ +#define RD_H(i) ((RD >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rd half: i should only be 0 or 1 */ +#define RD_B(i) ((RD >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rd byte: i should only be from 0 to 3 */ + + #define sext32(x) ((sreg_t)(int32_t)(x)) #define zext32(x) ((reg_t)(uint32_t)(x)) #define sext_xlen(x) (((sreg_t)(x) << (64-xlen)) >> (64-xlen)) @@ -212,9 +315,12 @@ class regfile_t STATE.pc = __npc; \ } while(0) +class wait_for_interrupt_t {}; + #define wfi() \ do { set_pc_and_serialize(npc); \ npc = PC_SERIALIZE_WFI; \ + throw wait_for_interrupt_t(); \ } while(0) #define serialize() set_pc_and_serialize(npc) @@ -226,21 +332,29 @@ class regfile_t #define invalid_pc(pc) ((pc) & 1) /* Convenience wrappers to simplify softfloat code sequences */ +#define isBoxedF16(r) (isBoxedF32(r) && ((uint64_t)((r.v[0] >> 16) + 1) == ((uint64_t)1 << 48))) +#define unboxF16(r) (isBoxedF16(r) ? (uint16_t)r.v[0] : defaultNaNF16UI) #define isBoxedF32(r) (isBoxedF64(r) && ((uint32_t)((r.v[0] >> 32) + 1) == 0)) #define unboxF32(r) (isBoxedF32(r) ? (uint32_t)r.v[0] : defaultNaNF32UI) #define isBoxedF64(r) ((r.v[1] + 1) == 0) #define unboxF64(r) (isBoxedF64(r) ? r.v[0] : defaultNaNF64UI) typedef float128_t freg_t; +inline float16_t f16(uint16_t v) { return { v }; } inline float32_t f32(uint32_t v) { return { v }; } inline float64_t f64(uint64_t v) { return { v }; } +inline float16_t f16(freg_t r) { return f16(unboxF16(r)); } inline float32_t f32(freg_t r) { return f32(unboxF32(r)); } inline float64_t f64(freg_t r) { return f64(unboxF64(r)); } inline float128_t f128(freg_t r) { return r; } +inline freg_t freg(float16_t f) { return { ((uint64_t)-1 << 16) | f.v, (uint64_t)-1 }; } inline freg_t freg(float32_t f) { return { ((uint64_t)-1 << 32) | f.v, (uint64_t)-1 }; } inline freg_t freg(float64_t f) { return { f.v, (uint64_t)-1 }; } inline freg_t freg(float128_t f) { return f; } +#define F16_SIGN ((uint16_t)1 << 15) #define F32_SIGN ((uint32_t)1 << 31) #define F64_SIGN ((uint64_t)1 << 63) +#define fsgnj16(a, b, n, x) \ + f16((f16(a).v & ~F16_SIGN) | ((((x) ? f16(a).v : (n) ? F16_SIGN : 0) ^ f16(b).v) & F16_SIGN)) #define fsgnj32(a, b, n, x) \ f32((f32(a).v & ~F32_SIGN) | ((((x) ? f32(a).v : (n) ? F32_SIGN : 0) ^ f32(b).v) & F32_SIGN)) #define fsgnj64(a, b, n, x) \ @@ -268,14 +382,2020 @@ inline freg_t f128_negate(freg_t a) #define validate_csr(which, write) ({ \ if (!STATE.serialized) return PC_SERIALIZE_BEFORE; \ STATE.serialized = false; \ - unsigned csr_priv = get_field((which), 0x300); \ - unsigned csr_read_only = get_field((which), 0xC00) == 3; \ - if (((write) && csr_read_only) || STATE.prv < csr_priv) \ - throw trap_illegal_instruction(0); \ + /* permissions check occurs in get_csr */ \ (which); }) -// Seems that 0x0 doesn't work. -#define DEBUG_START 0x100 -#define DEBUG_END (0x1000 - 1) +/* For debug only. This will fail if the native machine's float types are not IEEE */ +inline float to_f(float32_t f){float r; memcpy(&r, &f, sizeof(r)); return r;} +inline double to_f(float64_t f){double r; memcpy(&r, &f, sizeof(r)); return r;} +inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r)); return r;} + +// Vector macros +#define e8 8 // 8b elements +#define e16 16 // 16b elements +#define e32 32 // 32b elements +#define e64 64 // 64b elements +#define e128 128 // 128b elements +#define e256 256 // 256b elements +#define e512 512 // 512b elements +#define e1024 1024 // 1024b elements + +#define vsext(x, sew) (((sreg_t)(x) << (64-sew)) >> (64-sew)) +#define vzext(x, sew) (((reg_t)(x) << (64-sew)) >> (64-sew)) + +#define DEBUG_RVV 0 + +#if DEBUG_RVV +#define DEBUG_RVV_FP_VV \ + printf("vfp(%lu) vd=%f vs1=%f vs2=%f\n", i, to_f(vd), to_f(vs1), to_f(vs2)); +#define DEBUG_RVV_FP_VF \ + printf("vfp(%lu) vd=%f vs1=%f vs2=%f\n", i, to_f(vd), to_f(rs1), to_f(vs2)); +#define DEBUG_RVV_FMA_VV \ + printf("vfma(%lu) vd=%f vs1=%f vs2=%f vd_old=%f\n", i, to_f(vd), to_f(vs1), to_f(vs2), to_f(vd_old)); +#define DEBUG_RVV_FMA_VF \ + printf("vfma(%lu) vd=%f vs1=%f vs2=%f vd_old=%f\n", i, to_f(vd), to_f(rs1), to_f(vs2), to_f(vd_old)); +#else +#define DEBUG_RVV_FP_VV 0 +#define DEBUG_RVV_FP_VF 0 +#define DEBUG_RVV_FMA_VV 0 +#define DEBUG_RVV_FMA_VF 0 +#endif + +// +// vector: masking skip helper +// +#define VI_MASK_VARS \ + const int midx = i / 64; \ + const int mpos = i % 64; + +#define VI_LOOP_ELEMENT_SKIP(BODY) \ + VI_MASK_VARS \ + if (insn.v_vm() == 0) { \ + BODY; \ + bool skip = ((P.VU.elt(0, midx) >> mpos) & 0x1) == 0; \ + if (skip) {\ + continue; \ + }\ + } + +#define VI_ELEMENT_SKIP(inx) \ + if (inx >= vl) { \ + continue; \ + } else if (inx < P.VU.vstart) { \ + continue; \ + } else { \ + VI_LOOP_ELEMENT_SKIP(); \ + } + +// +// vector: operation and register acccess check helper +// +static inline bool is_overlapped(const int astart, int asize, + const int bstart, int bsize) +{ + asize = asize == 0 ? 1 : asize; + bsize = bsize == 0 ? 1 : bsize; + + const int aend = astart + asize; + const int bend = bstart + bsize; + + return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize; +} + +static inline bool is_overlapped_widen(const int astart, int asize, + const int bstart, int bsize) +{ + asize = asize == 0 ? 1 : asize; + bsize = bsize == 0 ? 1 : bsize; + + const int aend = astart + asize; + const int bend = bstart + bsize; + + if (astart < bstart && + is_overlapped(astart, asize, bstart, bsize) && + !is_overlapped(astart, asize, bstart + bsize, bsize)) { + return false; + } else { + return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize; + } +} + +static inline bool is_aligned(const unsigned val, const unsigned pos) +{ + return pos ? (val & (pos - 1)) == 0 : true; +} + +#define VI_NARROW_CHECK_COMMON \ + require_vector(true);\ + require(P.VU.vflmul <= 4); \ + require(P.VU.vsew * 2 <= P.VU.ELEN); \ + require_align(insn.rs2(), P.VU.vflmul * 2); \ + require_align(insn.rd(), P.VU.vflmul); \ + require_vm; \ + +#define VI_WIDE_CHECK_COMMON \ + require_vector(true);\ + require(P.VU.vflmul <= 4); \ + require(P.VU.vsew * 2 <= P.VU.ELEN); \ + require_align(insn.rd(), P.VU.vflmul * 2); \ + require_vm; \ + +#define VI_CHECK_ST_INDEX(elt_width) \ + require_vector(false); \ + float vemul = ((float)elt_width / P.VU.vsew * P.VU.vflmul); \ + require(vemul >= 0.125 && vemul <= 8); \ + reg_t emul = vemul < 1 ? 1 : vemul; \ + reg_t flmul = P.VU.vflmul < 1 ? 1 : P.VU.vflmul; \ + require_align(insn.rd(), P.VU.vflmul); \ + require_align(insn.rs2(), vemul); \ + require((nf * flmul) <= (NVPR / 4) && \ + (insn.rd() + nf * flmul) <= NVPR); \ + +#define VI_CHECK_LD_INDEX(elt_width) \ + VI_CHECK_ST_INDEX(elt_width); \ + if (elt_width > P.VU.vsew) { \ + if (insn.rd() != insn.rs2()) \ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + } else if (elt_width < P.VU.vsew) { \ + if (vemul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + } \ + } \ + if (insn.v_nf() > 0) {\ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + require_noover(vd, nf, insn.rs2(), 1); \ + } \ + require_vm; \ + +#define VI_CHECK_MSS(is_vs1) \ + if (insn.rd() != insn.rs2()) \ + require_noover(insn.rd(), 1, insn.rs2(), P.VU.vflmul); \ + require_align(insn.rs2(), P.VU.vflmul); \ + if (is_vs1) {\ + if (insn.rd() != insn.rs1()) \ + require_noover(insn.rd(), 1, insn.rs1(), P.VU.vflmul); \ + require_align(insn.rs1(), P.VU.vflmul); \ + } \ + +#define VI_CHECK_SSS(is_vs1) \ + require_vm; \ + if (P.VU.vflmul > 1) { \ + require_align(insn.rd(), P.VU.vflmul); \ + require_align(insn.rs2(), P.VU.vflmul); \ + if (is_vs1) { \ + require_align(insn.rs1(), P.VU.vflmul); \ + } \ + } + +#define VI_CHECK_STORE(elt_width) \ + require_vector(false); \ + reg_t veew = sizeof(elt_width##_t) * 8; \ + float vemul = ((float)veew / P.VU.vsew * P.VU.vflmul); \ + reg_t emul = vemul < 1 ? 1 : vemul; \ + require(vemul >= 0.125 && vemul <= 8); \ + require_align(insn.rd(), vemul); \ + require((nf * emul) <= (NVPR / 4) && \ + (insn.rd() + nf * emul) <= NVPR); \ + +#define VI_CHECK_LOAD(elt_width) \ + VI_CHECK_STORE(elt_width); \ + require_vm; \ + +#define VI_CHECK_DSS(is_vs1) \ + VI_WIDE_CHECK_COMMON; \ + require_align(insn.rs2(), P.VU.vflmul); \ + if (P.VU.vflmul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul * 2, insn.rs2(), P.VU.vflmul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul * 2, insn.rs2(), P.VU.vflmul); \ + } \ + if (is_vs1) {\ + require_align(insn.rs1(), P.VU.vflmul); \ + if (P.VU.vflmul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul * 2, insn.rs1(), P.VU.vflmul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul * 2, insn.rs1(), P.VU.vflmul); \ + } \ + } + +#define VI_CHECK_QSS(is_vs1) \ + require_vector(true);\ + p->supports_extension(EXT_ZVQMAC); \ + require(P.VU.vflmul <= 2); \ + require(P.VU.vsew * 4 <= P.VU.ELEN); \ + require_align(insn.rd(), P.VU.vflmul * 4); \ + require_align(insn.rs2(), P.VU.vflmul); \ + require_vm; \ + if (P.VU.vflmul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul * 4, insn.rs2(), P.VU.vflmul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul * 4, insn.rs2(), P.VU.vflmul); \ + } \ + if (is_vs1) {\ + require_align(insn.rs1(), P.VU.vflmul); \ + if (P.VU.vflmul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul * 4, insn.rs1(), P.VU.vflmul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul * 4, insn.rs1(), P.VU.vflmul); \ + } \ + } + +#define VI_CHECK_DDS(is_rs) \ + VI_WIDE_CHECK_COMMON; \ + require_align(insn.rs2(), P.VU.vflmul * 2); \ + if (is_rs) { \ + require_align(insn.rs1(), P.VU.vflmul); \ + if (P.VU.vflmul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul * 2, insn.rs1(), P.VU.vflmul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul * 2, insn.rs1(), P.VU.vflmul); \ + } \ + } + +#define VI_CHECK_SDS(is_vs1) \ + VI_NARROW_CHECK_COMMON; \ + if (insn.rd() != insn.rs2()) \ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul * 2); \ + if (is_vs1) \ + require_align(insn.rs1(), P.VU.vflmul); \ + +#define VI_CHECK_REDUCTION(is_wide) \ + require_vector(true);\ + if (is_wide) {\ + require(P.VU.vsew * 2 <= P.VU.ELEN); \ + } \ + require_align(insn.rs2(), P.VU.vflmul); \ + require(P.VU.vstart == 0); \ + +#define VI_CHECK_SLIDE(is_over) \ + require_align(insn.rs2(), P.VU.vflmul); \ + require_align(insn.rd(), P.VU.vflmul); \ + require_vm; \ + if (is_over) \ + require(insn.rd() != insn.rs2()); \ + + +// +// vector: loop header and end helper +// +#define VI_GENERAL_LOOP_BASE \ + require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \ + require_vector(true);\ + reg_t vl = P.VU.vl; \ + reg_t sew = P.VU.vsew; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + for (reg_t i=P.VU.vstart; i 0) { \ + vd_0_des = vd_0_res; \ + } \ + P.VU.vstart = 0; + +#define VI_LOOP_CMP_BASE \ + require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \ + require_vector(true);\ + reg_t vl = P.VU.vl; \ + reg_t sew = P.VU.vsew; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + for (reg_t i=P.VU.vstart; i(insn.rd(), midx, true); \ + uint64_t res = 0; + +#define VI_LOOP_CMP_END \ + vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ + } \ + P.VU.vstart = 0; + +#define VI_LOOP_MASK(op) \ + require(P.VU.vsew <= e64); \ + require_vector(true);\ + reg_t vl = P.VU.vl; \ + for (reg_t i = P.VU.vstart; i < vl; ++i) { \ + int midx = i / 64; \ + int mpos = i % 64; \ + uint64_t mmask = UINT64_C(1) << mpos; \ + uint64_t vs2 = P.VU.elt(insn.rs2(), midx); \ + uint64_t vs1 = P.VU.elt(insn.rs1(), midx); \ + uint64_t &res = P.VU.elt(insn.rd(), midx, true); \ + res = (res & ~mmask) | ((op) & (1ULL << mpos)); \ + } \ + P.VU.vstart = 0; + +#define VI_LOOP_NSHIFT_BASE \ + VI_GENERAL_LOOP_BASE; \ + VI_LOOP_ELEMENT_SKIP({\ + require(!(insn.rd() == 0 && P.VU.vflmul > 1));\ + }); + + +#define INT_ROUNDING(result, xrm, gb) \ + do { \ + const uint64_t lsb = 1UL << (gb); \ + const uint64_t lsb_half = lsb >> 1; \ + switch (xrm) {\ + case VRM::RNU:\ + result += lsb_half; \ + break;\ + case VRM::RNE:\ + if ((result & lsb_half) && ((result & (lsb_half - 1)) || (result & lsb))) \ + result += lsb; \ + break;\ + case VRM::RDN:\ + break;\ + case VRM::ROD:\ + if (result & (lsb - 1)) \ + result |= lsb; \ + break;\ + case VRM::INVALID_RM:\ + assert(true);\ + } \ + } while (0) + +// +// vector: integer and masking operand access helper +// +#define VXI_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_sew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); \ + type_sew_t::type rs1 = (type_sew_t::type)RS1; \ + type_sew_t::type simm5 = (type_sew_t::type)insn.v_simm5(); + +#define VV_U_PARAMS(x) \ + type_usew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VX_U_PARAMS(x) \ + type_usew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type rs1 = (type_usew_t::type)RS1; \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VI_U_PARAMS(x) \ + type_usew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type zimm5 = (type_usew_t::type)insn.v_zimm5(); \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VV_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_sew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VX_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_sew_t::type rs1 = (type_sew_t::type)RS1; \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VI_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_sew_t::type simm5 = (type_sew_t::type)insn.v_simm5(); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define XV_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, RS1); + +#define VV_UCMP_PARAMS(x) \ + type_usew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VX_UCMP_PARAMS(x) \ + type_usew_t::type rs1 = (type_usew_t::type)RS1; \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VI_UCMP_PARAMS(x) \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VV_CMP_PARAMS(x) \ + type_sew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VX_CMP_PARAMS(x) \ + type_sew_t::type rs1 = (type_sew_t::type)RS1; \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VI_CMP_PARAMS(x) \ + type_sew_t::type simm5 = (type_sew_t::type)insn.v_simm5(); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VI_XI_SLIDEDOWN_PARAMS(x, off) \ + auto &vd = P.VU.elt::type>(rd_num, i, true); \ + auto vs2 = P.VU.elt::type>(rs2_num, i + off); + +#define VI_XI_SLIDEUP_PARAMS(x, offset) \ + auto &vd = P.VU.elt::type>(rd_num, i, true); \ + auto vs2 = P.VU.elt::type>(rs2_num, i - offset); + +#define VI_NSHIFT_PARAMS(sew1, sew2) \ + auto &vd = P.VU.elt::type>(rd_num, i, true); \ + auto vs2_u = P.VU.elt::type>(rs2_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto zimm5 = (type_usew_t::type)insn.v_zimm5(); + +#define VX_NSHIFT_PARAMS(sew1, sew2) \ + auto &vd = P.VU.elt::type>(rd_num, i, true); \ + auto vs2_u = P.VU.elt::type>(rs2_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto rs1 = (type_sew_t::type)RS1; + +#define VV_NSHIFT_PARAMS(sew1, sew2) \ + auto &vd = P.VU.elt::type>(rd_num, i, true); \ + auto vs2_u = P.VU.elt::type>(rs2_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto vs1 = P.VU.elt::type>(rs1_num, i); + +#define XI_CARRY_PARAMS(x) \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto rs1 = (type_sew_t::type)RS1; \ + auto simm5 = (type_sew_t::type)insn.v_simm5(); \ + auto &vd = P.VU.elt(rd_num, midx, true); + +#define VV_CARRY_PARAMS(x) \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto vs1 = P.VU.elt::type>(rs1_num, i); \ + auto &vd = P.VU.elt(rd_num, midx, true); + +#define XI_WITH_CARRY_PARAMS(x) \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto rs1 = (type_sew_t::type)RS1; \ + auto simm5 = (type_sew_t::type)insn.v_simm5(); \ + auto &vd = P.VU.elt::type>(rd_num, i, true); + +#define VV_WITH_CARRY_PARAMS(x) \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto vs1 = P.VU.elt::type>(rs1_num, i); \ + auto &vd = P.VU.elt::type>(rd_num, i, true); + +// +// vector: integer and masking operation loop +// + +// comparision result to masking register +#define VI_VV_LOOP_CMP(BODY) \ + VI_CHECK_MSS(true); \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VV_CMP_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_CMP_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_CMP_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_CMP_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VX_LOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VX_CMP_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_CMP_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_CMP_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_CMP_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VI_LOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VI_CMP_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_CMP_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_CMP_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_CMP_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VV_ULOOP_CMP(BODY) \ + VI_CHECK_MSS(true); \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VV_UCMP_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_UCMP_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_UCMP_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_UCMP_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VX_ULOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VX_UCMP_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_UCMP_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_UCMP_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_UCMP_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VI_ULOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VI_UCMP_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_UCMP_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_UCMP_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_UCMP_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +// merge and copy loop +#define VI_VVXI_MERGE_LOOP(BODY) \ + VI_GENERAL_LOOP_BASE \ + if (sew == e8){ \ + VXI_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VXI_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VXI_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VXI_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +// reduction loop - signed +#define VI_LOOP_REDUCTION_BASE(x) \ + require(x >= e8 && x <= e64); \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt::type>(rd_num, 0, true); \ + auto vd_0_res = P.VU.elt::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i::type>(rs2_num, i); \ + +#define REDUCTION_LOOP(x, BODY) \ + VI_LOOP_REDUCTION_BASE(x) \ + BODY; \ + VI_LOOP_REDUCTION_END(x) + +#define VI_VV_LOOP_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(false); \ + reg_t sew = P.VU.vsew; \ + if (sew == e8) { \ + REDUCTION_LOOP(e8, BODY) \ + } else if(sew == e16) { \ + REDUCTION_LOOP(e16, BODY) \ + } else if(sew == e32) { \ + REDUCTION_LOOP(e32, BODY) \ + } else if(sew == e64) { \ + REDUCTION_LOOP(e64, BODY) \ + } + +// reduction loop - unsgied +#define VI_ULOOP_REDUCTION_BASE(x) \ + require(x >= e8 && x <= e64); \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt::type>(rd_num, 0, true); \ + auto vd_0_res = P.VU.elt::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i::type>(rs2_num, i); + +#define REDUCTION_ULOOP(x, BODY) \ + VI_ULOOP_REDUCTION_BASE(x) \ + BODY; \ + VI_LOOP_REDUCTION_END(x) + +#define VI_VV_ULOOP_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(false); \ + reg_t sew = P.VU.vsew; \ + if (sew == e8){ \ + REDUCTION_ULOOP(e8, BODY) \ + } else if(sew == e16) { \ + REDUCTION_ULOOP(e16, BODY) \ + } else if(sew == e32) { \ + REDUCTION_ULOOP(e32, BODY) \ + } else if(sew == e64) { \ + REDUCTION_ULOOP(e64, BODY) \ + } + + +// genearl VXI signed/unsgied loop +#define VI_VV_ULOOP(BODY) \ + VI_CHECK_SSS(true) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VV_LOOP(BODY) \ + VI_CHECK_SSS(true) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_ULOOP(BODY) \ + VI_CHECK_SSS(false) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_LOOP(BODY) \ + VI_CHECK_SSS(false) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VI_ULOOP(BODY) \ + VI_CHECK_SSS(false) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VI_LOOP(BODY) \ + VI_CHECK_SSS(false) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +// narrow operation loop +#define VI_VV_LOOP_NARROW(BODY) \ +VI_NARROW_CHECK_COMMON; \ +VI_LOOP_BASE \ +if (sew == e8){ \ + VI_NARROW_SHIFT(e8, e16) \ + BODY; \ +}else if(sew == e16){ \ + VI_NARROW_SHIFT(e16, e32) \ + BODY; \ +}else if(sew == e32){ \ + VI_NARROW_SHIFT(e32, e64) \ + BODY; \ +} \ +VI_LOOP_END + +#define VI_NARROW_SHIFT(sew1, sew2) \ + type_usew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type vs2_u = P.VU.elt::type>(rs2_num, i); \ + type_usew_t::type zimm5 = (type_usew_t::type)insn.v_zimm5(); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); \ + type_sew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_sew_t::type rs1 = (type_sew_t::type)RS1; + +#define VI_VVXI_LOOP_NARROW(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_NARROW_SHIFT(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VI_NARROW_SHIFT(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VI_NARROW_SHIFT(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VI_LOOP_NSHIFT(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ + VI_LOOP_NSHIFT_BASE \ + if (sew == e8){ \ + VI_NSHIFT_PARAMS(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VI_NSHIFT_PARAMS(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VI_NSHIFT_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_LOOP_NSHIFT(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ + VI_LOOP_NSHIFT_BASE \ + if (sew == e8){ \ + VX_NSHIFT_PARAMS(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VX_NSHIFT_PARAMS(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VX_NSHIFT_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VV_LOOP_NSHIFT(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ + VI_LOOP_NSHIFT_BASE \ + if (sew == e8){ \ + VV_NSHIFT_PARAMS(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VV_NSHIFT_PARAMS(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VV_NSHIFT_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +// widen operation loop +#define VI_VV_LOOP_WIDEN(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_PARAMS(e32); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_LOOP_WIDEN(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_PARAMS(e32); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_WIDE_OP_AND_ASSIGN(var0, var1, var2, op0, op1, sign) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign##16_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign##16_t)(sign##8_t)var0 op0 (sign##16_t)(sign##8_t)var1) + var2; \ + } \ + break; \ + case e16: { \ + sign##32_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign##32_t)(sign##16_t)var0 op0 (sign##32_t)(sign##16_t)var1) + var2; \ + } \ + break; \ + default: { \ + sign##64_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign##64_t)(sign##32_t)var0 op0 (sign##64_t)(sign##32_t)var1) + var2; \ + } \ + break; \ + } + +#define VI_WIDE_OP_AND_ASSIGN_MIX(var0, var1, var2, op0, op1, sign_d, sign_1, sign_2) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign_d##16_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign_1##16_t)(sign_1##8_t)var0 op0 (sign_2##16_t)(sign_2##8_t)var1) + var2; \ + } \ + break; \ + case e16: { \ + sign_d##32_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign_1##32_t)(sign_1##16_t)var0 op0 (sign_2##32_t)(sign_2##16_t)var1) + var2; \ + } \ + break; \ + default: { \ + sign_d##64_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign_1##64_t)(sign_1##32_t)var0 op0 (sign_2##64_t)(sign_2##32_t)var1) + var2; \ + } \ + break; \ + } + +#define VI_WIDE_WVX_OP(var0, op0, sign) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign##16_t &vd_w = P.VU.elt(rd_num, i, true); \ + sign##16_t vs2_w = P.VU.elt(rs2_num, i); \ + vd_w = vs2_w op0 (sign##16_t)(sign##8_t)var0; \ + } \ + break; \ + case e16: { \ + sign##32_t &vd_w = P.VU.elt(rd_num, i, true); \ + sign##32_t vs2_w = P.VU.elt(rs2_num, i); \ + vd_w = vs2_w op0 (sign##32_t)(sign##16_t)var0; \ + } \ + break; \ + default: { \ + sign##64_t &vd_w = P.VU.elt(rd_num, i, true); \ + sign##64_t vs2_w = P.VU.elt(rs2_num, i); \ + vd_w = vs2_w op0 (sign##64_t)(sign##32_t)var0; \ + } \ + break; \ + } + +// quad operation loop +#define VI_VV_LOOP_QUAD(BODY) \ + VI_CHECK_QSS(true); \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_LOOP_QUAD(BODY) \ + VI_CHECK_QSS(false); \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_PARAMS(e16); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_QUAD_OP_AND_ASSIGN(var0, var1, var2, op0, op1, sign) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign##32_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign##32_t)(sign##8_t)var0 op0 (sign##32_t)(sign##8_t)var1) + var2; \ + } \ + break; \ + default: { \ + sign##64_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign##64_t)(sign##16_t)var0 op0 (sign##64_t)(sign##16_t)var1) + var2; \ + } \ + break; \ + } + +#define VI_QUAD_OP_AND_ASSIGN_MIX(var0, var1, var2, op0, op1, sign_d, sign_1, sign_2) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign_d##32_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign_1##32_t)(sign_1##8_t)var0 op0 (sign_2##32_t)(sign_2##8_t)var1) + var2; \ + } \ + break; \ + default: { \ + sign_d##64_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign_1##64_t)(sign_1##16_t)var0 op0 (sign_2##64_t)(sign_2##16_t)var1) + var2; \ + } \ + break; \ + } + +// wide reduction loop - signed +#define VI_LOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt::type>(rd_num, 0, true); \ + auto vd_0_res = P.VU.elt::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i::type>(rs2_num, i); + +#define WIDE_REDUCTION_LOOP(sew1, sew2, BODY) \ + VI_LOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + BODY; \ + VI_LOOP_REDUCTION_END(sew2) + +#define VI_VV_LOOP_WIDE_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(true); \ + reg_t sew = P.VU.vsew; \ + if (sew == e8){ \ + WIDE_REDUCTION_LOOP(e8, e16, BODY) \ + } else if(sew == e16){ \ + WIDE_REDUCTION_LOOP(e16, e32, BODY) \ + } else if(sew == e32){ \ + WIDE_REDUCTION_LOOP(e32, e64, BODY) \ + } + +// wide reduction loop - unsigned +#define VI_ULOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt::type>(rd_num, 0, true); \ + auto vd_0_res = P.VU.elt::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i::type>(rs2_num, i); + +#define WIDE_REDUCTION_ULOOP(sew1, sew2, BODY) \ + VI_ULOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + BODY; \ + VI_LOOP_REDUCTION_END(sew2) + +#define VI_VV_ULOOP_WIDE_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(true); \ + reg_t sew = P.VU.vsew; \ + if (sew == e8){ \ + WIDE_REDUCTION_ULOOP(e8, e16, BODY) \ + } else if(sew == e16){ \ + WIDE_REDUCTION_ULOOP(e16, e32, BODY) \ + } else if(sew == e32){ \ + WIDE_REDUCTION_ULOOP(e32, e64, BODY) \ + } + +// carry/borrow bit loop +#define VI_VV_LOOP_CARRY(BODY) \ + VI_CHECK_MSS(true); \ + VI_GENERAL_LOOP_BASE \ + VI_MASK_VARS \ + if (sew == e8){ \ + VV_CARRY_PARAMS(e8) \ + BODY; \ + } else if (sew == e16) { \ + VV_CARRY_PARAMS(e16) \ + BODY; \ + } else if (sew == e32) { \ + VV_CARRY_PARAMS(e32) \ + BODY; \ + } else if (sew == e64) { \ + VV_CARRY_PARAMS(e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_XI_LOOP_CARRY(BODY) \ + VI_CHECK_MSS(false); \ + VI_GENERAL_LOOP_BASE \ + VI_MASK_VARS \ + if (sew == e8){ \ + XI_CARRY_PARAMS(e8) \ + BODY; \ + } else if (sew == e16) { \ + XI_CARRY_PARAMS(e16) \ + BODY; \ + } else if (sew == e32) { \ + XI_CARRY_PARAMS(e32) \ + BODY; \ + } else if (sew == e64) { \ + XI_CARRY_PARAMS(e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VV_LOOP_WITH_CARRY(BODY) \ + require(insn.rd() != 0); \ + VI_CHECK_SSS(true); \ + VI_GENERAL_LOOP_BASE \ + VI_MASK_VARS \ + if (sew == e8){ \ + VV_WITH_CARRY_PARAMS(e8) \ + BODY; \ + } else if (sew == e16) { \ + VV_WITH_CARRY_PARAMS(e16) \ + BODY; \ + } else if (sew == e32) { \ + VV_WITH_CARRY_PARAMS(e32) \ + BODY; \ + } else if (sew == e64) { \ + VV_WITH_CARRY_PARAMS(e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_XI_LOOP_WITH_CARRY(BODY) \ + require(insn.rd() != 0); \ + VI_CHECK_SSS(false); \ + VI_GENERAL_LOOP_BASE \ + VI_MASK_VARS \ + if (sew == e8){ \ + XI_WITH_CARRY_PARAMS(e8) \ + BODY; \ + } else if (sew == e16) { \ + XI_WITH_CARRY_PARAMS(e16) \ + BODY; \ + } else if (sew == e32) { \ + XI_WITH_CARRY_PARAMS(e32) \ + BODY; \ + } else if (sew == e64) { \ + XI_WITH_CARRY_PARAMS(e64) \ + BODY; \ + } \ + VI_LOOP_END + +// average loop +#define VI_VVX_LOOP_AVG(opd, op, is_vs1) \ +VI_CHECK_SSS(is_vs1); \ +VRM xrm = p->VU.get_vround_mode(); \ +VI_LOOP_BASE \ + switch(sew) { \ + case e8: { \ + VV_PARAMS(e8); \ + type_sew_t::type rs1 = RS1; \ + auto res = (int32_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + case e16: { \ + VV_PARAMS(e16); \ + type_sew_t::type rs1 = RS1; \ + auto res = (int32_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + case e32: { \ + VV_PARAMS(e32); \ + type_sew_t::type rs1 = RS1; \ + auto res = (int64_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + default: { \ + VV_PARAMS(e64); \ + type_sew_t::type rs1 = RS1; \ + auto res = (int128_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + } \ +VI_LOOP_END + +#define VI_VVX_ULOOP_AVG(opd, op, is_vs1) \ +VI_CHECK_SSS(is_vs1); \ +VRM xrm = p->VU.get_vround_mode(); \ +VI_LOOP_BASE \ + switch(sew) { \ + case e8: { \ + VV_U_PARAMS(e8); \ + type_usew_t::type rs1 = RS1; \ + auto res = (uint16_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + case e16: { \ + VV_U_PARAMS(e16); \ + type_usew_t::type rs1 = RS1; \ + auto res = (uint32_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + case e32: { \ + VV_U_PARAMS(e32); \ + type_usew_t::type rs1 = RS1; \ + auto res = (uint64_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + default: { \ + VV_U_PARAMS(e64); \ + type_usew_t::type rs1 = RS1; \ + auto res = (uint128_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + } \ +VI_LOOP_END + +// +// vector: load/store helper +// +#define VI_STRIP(inx) \ + reg_t vreg_inx = inx; + +#define VI_DUPLICATE_VREG(reg_num, idx_sew) \ +reg_t index[P.VU.vlmax]; \ +for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ + switch(idx_sew) { \ + case e8: \ + index[i] = P.VU.elt(reg_num, i); \ + break; \ + case e16: \ + index[i] = P.VU.elt(reg_num, i); \ + break; \ + case e32: \ + index[i] = P.VU.elt(reg_num, i); \ + break; \ + case e64: \ + index[i] = P.VU.elt(reg_num, i); \ + break; \ + } \ +} + +#define VI_LD(stride, offset, elt_width) \ + const reg_t nf = insn.v_nf() + 1; \ + const reg_t vl = P.VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t vd = insn.rd(); \ + VI_CHECK_LOAD(elt_width); \ + for (reg_t i = 0; i < vl; ++i) { \ + VI_ELEMENT_SKIP(i); \ + VI_STRIP(i); \ + P.VU.vstart = i; \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + elt_width##_t val = MMU.load_##elt_width( \ + baseAddr + (stride) + (offset) * sizeof(elt_width##_t)); \ + P.VU.elt(vd + fn * emul, vreg_inx, true) = val; \ + } \ + } \ + P.VU.vstart = 0; + +#define VI_LD_INDEX(elt_width, is_seg) \ + const reg_t nf = insn.v_nf() + 1; \ + const reg_t vl = P.VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t vd = insn.rd(); \ + if (!is_seg) \ + require(nf == 1); \ + VI_CHECK_LD_INDEX(elt_width); \ + VI_DUPLICATE_VREG(insn.rs2(), elt_width); \ + for (reg_t i = 0; i < vl; ++i) { \ + VI_ELEMENT_SKIP(i); \ + VI_STRIP(i); \ + P.VU.vstart = i; \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + switch(P.VU.vsew){ \ + case e8: \ + P.VU.elt(vd + fn * flmul, vreg_inx, true) = \ + MMU.load_uint8(baseAddr + index[i] + fn * 1); \ + break; \ + case e16: \ + P.VU.elt(vd + fn * flmul, vreg_inx, true) = \ + MMU.load_uint16(baseAddr + index[i] + fn * 2); \ + break; \ + case e32: \ + P.VU.elt(vd + fn * flmul, vreg_inx, true) = \ + MMU.load_uint32(baseAddr + index[i] + fn * 4); \ + break; \ + default: \ + P.VU.elt(vd + fn * flmul, vreg_inx, true) = \ + MMU.load_uint64(baseAddr + index[i] + fn * 8); \ + break; \ + } \ + } \ + } \ + P.VU.vstart = 0; + +#define VI_ST(stride, offset, elt_width) \ + const reg_t nf = insn.v_nf() + 1; \ + const reg_t vl = P.VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t vs3 = insn.rd(); \ + VI_CHECK_STORE(elt_width); \ + for (reg_t i = 0; i < vl; ++i) { \ + VI_STRIP(i) \ + VI_ELEMENT_SKIP(i); \ + P.VU.vstart = i; \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + elt_width##_t val = P.VU.elt(vs3 + fn * emul, vreg_inx); \ + MMU.store_##elt_width( \ + baseAddr + (stride) + (offset) * sizeof(elt_width##_t), val); \ + } \ + } \ + P.VU.vstart = 0; + +#define VI_ST_INDEX(elt_width, is_seg) \ + const reg_t nf = insn.v_nf() + 1; \ + const reg_t vl = P.VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t vs3 = insn.rd(); \ + if (!is_seg) \ + require(nf == 1); \ + VI_CHECK_ST_INDEX(elt_width); \ + VI_DUPLICATE_VREG(insn.rs2(), elt_width); \ + for (reg_t i = 0; i < vl; ++i) { \ + VI_STRIP(i) \ + VI_ELEMENT_SKIP(i); \ + P.VU.vstart = i; \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + switch (P.VU.vsew) { \ + case e8: \ + MMU.store_uint8(baseAddr + index[i] + fn * 1, \ + P.VU.elt(vs3 + fn * flmul, vreg_inx)); \ + break; \ + case e16: \ + MMU.store_uint16(baseAddr + index[i] + fn * 2, \ + P.VU.elt(vs3 + fn * flmul, vreg_inx)); \ + break; \ + case e32: \ + MMU.store_uint32(baseAddr + index[i] + fn * 4, \ + P.VU.elt(vs3 + fn * flmul, vreg_inx)); \ + break; \ + default: \ + MMU.store_uint64(baseAddr + index[i] + fn * 8, \ + P.VU.elt(vs3 + fn * flmul, vreg_inx)); \ + break; \ + } \ + } \ + } \ + P.VU.vstart = 0; + +#define VI_LDST_FF(elt_width) \ + const reg_t nf = insn.v_nf() + 1; \ + const reg_t sew = p->VU.vsew; \ + const reg_t vl = p->VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t rd_num = insn.rd(); \ + VI_CHECK_LOAD(elt_width); \ + bool early_stop = false; \ + for (reg_t i = p->VU.vstart; i < vl; ++i) { \ + VI_STRIP(i); \ + VI_ELEMENT_SKIP(i); \ + \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + uint64_t val; \ + try { \ + val = MMU.load_##elt_width( \ + baseAddr + (i * nf + fn) * sizeof(elt_width##_t)); \ + } catch (trap_t& t) { \ + if (i == 0) \ + throw; /* Only take exception on zeroth element */ \ + /* Reduce VL if an exception occurs on a later element */ \ + early_stop = true; \ + P.VU.vl = i; \ + break; \ + } \ + p->VU.elt(rd_num + fn * emul, vreg_inx, true) = val; \ + } \ + \ + if (early_stop) { \ + break; \ + } \ + } \ + p->VU.vstart = 0; + +#define VI_LD_WHOLE(elt_width) \ + require_vector_novtype(true, false); \ + const reg_t baseAddr = RS1; \ + const reg_t vd = insn.rd(); \ + const reg_t len = insn.v_nf() + 1; \ + require_align(vd, len); \ + const reg_t elt_per_reg = P.VU.vlenb / sizeof(elt_width ## _t); \ + const reg_t size = len * elt_per_reg; \ + if (P.VU.vstart < size) { \ + reg_t i = P.VU.vstart / elt_per_reg; \ + reg_t off = P.VU.vstart % elt_per_reg; \ + if (off) { \ + for (reg_t pos = off; pos < elt_per_reg; ++pos) { \ + auto val = MMU.load_## elt_width(baseAddr + \ + P.VU.vstart * sizeof(elt_width ## _t)); \ + P.VU.elt(vd + i, pos, true) = val; \ + P.VU.vstart++; \ + } \ + ++i; \ + } \ + for (; i < len; ++i) { \ + for (reg_t pos = 0; pos < elt_per_reg; ++pos) { \ + auto val = MMU.load_## elt_width(baseAddr + \ + P.VU.vstart * sizeof(elt_width ## _t)); \ + P.VU.elt(vd + i, pos, true) = val; \ + P.VU.vstart++; \ + } \ + } \ + } \ + P.VU.vstart = 0; \ + +#define VI_ST_WHOLE \ + require_vector_novtype(true, false); \ + const reg_t baseAddr = RS1; \ + const reg_t vs3 = insn.rd(); \ + const reg_t len = insn.v_nf() + 1; \ + require_align(vs3, len); \ + const reg_t size = len * P.VU.vlenb; \ + \ + if (P.VU.vstart < size) { \ + reg_t i = P.VU.vstart / P.VU.vlenb; \ + reg_t off = P.VU.vstart % P.VU.vlenb; \ + if (off) { \ + for (reg_t pos = off; pos < P.VU.vlenb; ++pos) { \ + auto val = P.VU.elt(vs3 + i, pos); \ + MMU.store_uint8(baseAddr + P.VU.vstart, val); \ + P.VU.vstart++; \ + } \ + i++; \ + } \ + for (; i < len; ++i) { \ + for (reg_t pos = 0; pos < P.VU.vlenb; ++pos) { \ + auto val = P.VU.elt(vs3 + i, pos); \ + MMU.store_uint8(baseAddr + P.VU.vstart, val); \ + P.VU.vstart++; \ + } \ + } \ + } \ + P.VU.vstart = 0; + +// +// vector: amo +// +#define VI_AMO(op, type, idx_type) \ + require_vector(false); \ + require_align(insn.rd(), P.VU.vflmul); \ + require(P.VU.vsew <= P.get_xlen() && P.VU.vsew >= 32); \ + require_align(insn.rd(), P.VU.vflmul); \ + float vemul = ((float)idx_type / P.VU.vsew * P.VU.vflmul); \ + require(vemul >= 0.125 && vemul <= 8); \ + require_align(insn.rs2(), vemul); \ + if (insn.v_wd()) {\ + require_vm; \ + if (idx_type > P.VU.vsew) { \ + if (insn.rd() != insn.rs2()) \ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + } else if (idx_type < P.VU.vsew) { \ + if (vemul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + } \ + } \ + } \ + VI_DUPLICATE_VREG(insn.rs2(), idx_type); \ + const reg_t vl = P.VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t vd = insn.rd(); \ + for (reg_t i = P.VU.vstart; i < vl; ++i) { \ + VI_ELEMENT_SKIP(i); \ + VI_STRIP(i); \ + switch (P.VU.vsew) { \ + case e32: {\ + auto vs3 = P.VU.elt< type ## 32_t>(vd, vreg_inx); \ + auto val = MMU.amo_uint32(baseAddr + index[i], [&]( type ## 32_t lhs) { op }); \ + if (insn.v_wd()) \ + P.VU.elt< type ## 32_t>(vd, vreg_inx, true) = val; \ + } \ + break; \ + case e64: {\ + auto vs3 = P.VU.elt< type ## 64_t>(vd, vreg_inx); \ + auto val = MMU.amo_uint64(baseAddr + index[i], [&]( type ## 64_t lhs) { op }); \ + if (insn.v_wd()) \ + P.VU.elt< type ## 64_t>(vd, vreg_inx, true) = val; \ + } \ + break; \ + default: \ + require(0); \ + break; \ + } \ + } \ + P.VU.vstart = 0; + +// vector: sign/unsiged extension +#define VI_VV_EXT(div, type) \ + require(insn.rd() != insn.rs2()); \ + require_vm; \ + reg_t from = P.VU.vsew / div; \ + require(from >= e8 && from <= e64); \ + require_align(insn.rd(), P.VU.vflmul); \ + require_align(insn.rs2(), P.VU.vflmul / div); \ + if ((P.VU.vflmul / div) < 1) { \ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul / div); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul / div); \ + } \ + reg_t pat = (((P.VU.vsew >> 3) << 4) | from >> 3); \ + VI_GENERAL_LOOP_BASE \ + VI_LOOP_ELEMENT_SKIP(); \ + switch (pat) { \ + case 0x21: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + case 0x41: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + case 0x81: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + case 0x42: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + case 0x82: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + case 0x84: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + case 0x88: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + default: \ + break; \ + } \ + VI_LOOP_END + +// +// vector: vfp helper +// +#define VI_VFP_COMMON \ + require_fp; \ + require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) || \ + (P.VU.vsew == e32 && p->supports_extension('F')) || \ + (P.VU.vsew == e64 && p->supports_extension('D'))); \ + require_vector(true);\ + require(STATE.frm < 0x5);\ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + softfloat_roundingMode = STATE.frm; + +#define VI_VFP_LOOP_BASE \ + VI_VFP_COMMON \ + for (reg_t i=P.VU.vstart; i(rd_num, midx, true); \ + uint64_t res = 0; + +#define VI_VFP_LOOP_REDUCTION_BASE(width) \ + float##width##_t vd_0 = P.VU.elt(rd_num, 0); \ + float##width##_t vs1_0 = P.VU.elt(rs1_num, 0); \ + vd_0 = vs1_0; \ + bool is_active = false; \ + for (reg_t i=P.VU.vstart; i(rs2_num, i); \ + is_active = true; \ + +#define VI_VFP_LOOP_WIDE_REDUCTION_BASE \ + VI_VFP_COMMON \ + float64_t vd_0 = f64(P.VU.elt(rs1_num, 0).v); \ + for (reg_t i=P.VU.vstart; i 0) { \ + if (is_propagate && !is_active) { \ + switch (x) { \ + case e16: {\ + auto ret = f16_classify(f16(vd_0.v)); \ + if (ret & 0x300) { \ + if (ret & 0x100) { \ + softfloat_exceptionFlags |= softfloat_flag_invalid; \ + set_fp_exceptions; \ + } \ + P.VU.elt(rd_num, 0, true) = defaultNaNF16UI; \ + } else { \ + P.VU.elt(rd_num, 0, true) = vd_0.v; \ + } \ + } \ + break; \ + case e32: { \ + auto ret = f32_classify(f32(vd_0.v)); \ + if (ret & 0x300) { \ + if (ret & 0x100) { \ + softfloat_exceptionFlags |= softfloat_flag_invalid; \ + set_fp_exceptions; \ + } \ + P.VU.elt(rd_num, 0, true) = defaultNaNF32UI; \ + } else { \ + P.VU.elt(rd_num, 0, true) = vd_0.v; \ + } \ + } \ + break; \ + case e64: {\ + auto ret = f64_classify(f64(vd_0.v)); \ + if (ret & 0x300) { \ + if (ret & 0x100) { \ + softfloat_exceptionFlags |= softfloat_flag_invalid; \ + set_fp_exceptions; \ + } \ + P.VU.elt(rd_num, 0, true) = defaultNaNF64UI; \ + } else { \ + P.VU.elt(rd_num, 0, true) = vd_0.v; \ + } \ + } \ + break; \ + } \ + } else { \ + P.VU.elt::type>(rd_num, 0, true) = vd_0.v; \ + } \ + } + +#define VI_VFP_LOOP_CMP_END \ + switch(P.VU.vsew) { \ + case e16: \ + case e32: \ + case e64: { \ + vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ + break; \ + } \ + default: \ + require(0); \ + break; \ + }; \ + } \ + P.VU.vstart = 0; + +#define VI_VFP_VV_LOOP(BODY16, BODY32, BODY64) \ + VI_CHECK_SSS(true); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float16_t &vd = P.VU.elt(rd_num, i, true); \ + float16_t vs1 = P.VU.elt(rs1_num, i); \ + float16_t vs2 = P.VU.elt(rs2_num, i); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs1 = P.VU.elt(rs1_num, i); \ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + case e64: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t vs1 = P.VU.elt(rs1_num, i); \ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + BODY64; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + +#define VI_VFP_V_LOOP(BODY16, BODY32, BODY64) \ + VI_CHECK_SSS(false); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float16_t &vd = P.VU.elt(rd_num, i, true); \ + float16_t vs2 = P.VU.elt(rs2_num, i); \ + BODY16; \ + break; \ + }\ + case e32: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + BODY32; \ + break; \ + }\ + case e64: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + BODY64; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + set_fp_exceptions; \ + VI_VFP_LOOP_END + +#define VI_VFP_VV_LOOP_REDUCTION(BODY16, BODY32, BODY64) \ + VI_CHECK_REDUCTION(false) \ + VI_VFP_COMMON \ + switch(P.VU.vsew) { \ + case e16: {\ + VI_VFP_LOOP_REDUCTION_BASE(16) \ + BODY16; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e16) \ + break; \ + }\ + case e32: {\ + VI_VFP_LOOP_REDUCTION_BASE(32) \ + BODY32; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e32) \ + break; \ + }\ + case e64: {\ + VI_VFP_LOOP_REDUCTION_BASE(64) \ + BODY64; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e64) \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + +#define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY16, BODY32) \ + VI_CHECK_REDUCTION(true) \ + VI_VFP_COMMON \ + require((P.VU.vsew == e16 && p->supports_extension('F')) || \ + (P.VU.vsew == e32 && p->supports_extension('D'))); \ + bool is_active = false; \ + switch(P.VU.vsew) { \ + case e16: {\ + float32_t vd_0 = P.VU.elt(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i(rs2_num, i)); \ + BODY16; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e32) \ + break; \ + }\ + case e32: {\ + float64_t vd_0 = P.VU.elt(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i(rs2_num, i)); \ + BODY32; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e64) \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + +#define VI_VFP_VF_LOOP(BODY16, BODY32, BODY64) \ + VI_CHECK_SSS(false); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float16_t &vd = P.VU.elt(rd_num, i, true); \ + float16_t rs1 = f16(READ_FREG(rs1_num)); \ + float16_t vs2 = P.VU.elt(rs2_num, i); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t rs1 = f32(READ_FREG(rs1_num)); \ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + case e64: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t rs1 = f64(READ_FREG(rs1_num)); \ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + BODY64; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VF; \ + VI_VFP_LOOP_END + +#define VI_VFP_LOOP_CMP(BODY16, BODY32, BODY64, is_vs1) \ + VI_CHECK_MSS(is_vs1); \ + VI_VFP_LOOP_CMP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float16_t vs2 = P.VU.elt(rs2_num, i); \ + float16_t vs1 = P.VU.elt(rs1_num, i); \ + float16_t rs1 = f16(READ_FREG(rs1_num)); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + float32_t vs1 = P.VU.elt(rs1_num, i); \ + float32_t rs1 = f32(READ_FREG(rs1_num)); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + case e64: {\ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + float64_t vs1 = P.VU.elt(rs1_num, i); \ + float64_t rs1 = f64(READ_FREG(rs1_num)); \ + BODY64; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + VI_VFP_LOOP_CMP_END \ + +#define VI_VFP_VF_LOOP_WIDE(BODY16, BODY32) \ + VI_CHECK_DSS(false); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: { \ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = f16_to_f32(P.VU.elt(rs2_num, i)); \ + float32_t rs1 = f16_to_f32(f16(READ_FREG(rs1_num))); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + } \ + case e32: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t vs2 = f32_to_f64(P.VU.elt(rs2_num, i)); \ + float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + + +#define VI_VFP_VV_LOOP_WIDE(BODY16, BODY32) \ + VI_CHECK_DSS(true); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = f16_to_f32(P.VU.elt(rs2_num, i)); \ + float32_t vs1 = f16_to_f32(P.VU.elt(rs1_num, i)); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t vs2 = f32_to_f64(P.VU.elt(rs2_num, i)); \ + float64_t vs1 = f32_to_f64(P.VU.elt(rs1_num, i)); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + +#define VI_VFP_WF_LOOP_WIDE(BODY16, BODY32) \ + VI_CHECK_DDS(false); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + float32_t rs1 = f16_to_f32(f16(READ_FREG(rs1_num))); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + +#define VI_VFP_WV_LOOP_WIDE(BODY16, BODY32) \ + VI_CHECK_DDS(true); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + float32_t vs1 = f16_to_f32(P.VU.elt(rs1_num, i)); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + float64_t vs1 = f32_to_f64(P.VU.elt(rs1_num, i)); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + +#define VI_VFP_LOOP_SCALE_BASE \ + require_fp; \ + require_vector(true);\ + require((P.VU.vsew == e8 && p->supports_extension(EXT_ZFH)) || \ + (P.VU.vsew == e16 && p->supports_extension('F')) || \ + (P.VU.vsew == e32 && p->supports_extension('D'))); \ + require(STATE.frm < 0x5);\ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + softfloat_roundingMode = STATE.frm; \ + for (reg_t i=P.VU.vstart; i bus_t::find_device(reg_t addr) it--; return std::make_pair(it->first, it->second); } + +// Type for holding all registered MMIO plugins by name. +using mmio_plugin_map_t = std::map; + +// Simple singleton instance of an mmio_plugin_map_t. +static mmio_plugin_map_t& mmio_plugin_map() +{ + static mmio_plugin_map_t instance; + return instance; +} + +void register_mmio_plugin(const char* name_cstr, + const mmio_plugin_t* mmio_plugin) +{ + std::string name(name_cstr); + if (!mmio_plugin_map().emplace(name, *mmio_plugin).second) { + throw std::runtime_error("Plugin \"" + name + "\" already registered!"); + } +} + +mmio_plugin_device_t::mmio_plugin_device_t(const std::string& name, + const std::string& args) + : plugin(mmio_plugin_map().at(name)), user_data((*plugin.alloc)(args.c_str())) +{ +} + +mmio_plugin_device_t::~mmio_plugin_device_t() +{ + (*plugin.dealloc)(user_data); +} + +bool mmio_plugin_device_t::load(reg_t addr, size_t len, uint8_t* bytes) +{ + return (*plugin.load)(user_data, addr, len, bytes); +} + +bool mmio_plugin_device_t::store(reg_t addr, size_t len, const uint8_t* bytes) +{ + return (*plugin.store)(user_data, addr, len, bytes); +} diff --git a/riscv/devices.h b/riscv/devices.h index 4e4d27ff60..3dd6c66936 100644 --- a/riscv/devices.h +++ b/riscv/devices.h @@ -2,10 +2,12 @@ #define _RISCV_DEVICES_H #include "decode.h" +#include "mmio_plugin.h" #include #include #include #include +#include class processor_t; @@ -62,7 +64,7 @@ class mem_t : public abstract_device_t { class clint_t : public abstract_device_t { public: - clint_t(std::vector&); + clint_t(std::vector&, uint64_t freq_hz, bool real_time); bool load(reg_t addr, size_t len, uint8_t* bytes); bool store(reg_t addr, size_t len, const uint8_t* bytes); size_t size() { return CLINT_SIZE; } @@ -72,8 +74,25 @@ class clint_t : public abstract_device_t { typedef uint64_t mtimecmp_t; typedef uint32_t msip_t; std::vector& procs; + uint64_t freq_hz; + bool real_time; + uint64_t real_time_ref_secs; + uint64_t real_time_ref_usecs; mtime_t mtime; std::vector mtimecmp; }; +class mmio_plugin_device_t : public abstract_device_t { + public: + mmio_plugin_device_t(const std::string& name, const std::string& args); + virtual ~mmio_plugin_device_t() override; + + virtual bool load(reg_t addr, size_t len, uint8_t* bytes) override; + virtual bool store(reg_t addr, size_t len, const uint8_t* bytes) override; + + private: + mmio_plugin_t plugin; + void* user_data; +}; + #endif diff --git a/riscv/disasm.h b/riscv/disasm.h index 94e007a12d..88d0e9b3ad 100644 --- a/riscv/disasm.h +++ b/riscv/disasm.h @@ -10,6 +10,7 @@ extern const char* xpr_name[NXPR]; extern const char* fpr_name[NFPR]; +extern const char* vr_name[NVPR]; extern const char* csr_name(int which); class arg_t @@ -19,18 +20,31 @@ class arg_t virtual ~arg_t() {} }; +// Indicates that the next arg (only) is optional. +// If the result of converting the next arg to a string is "" +// then it will not be printed. +struct : public arg_t { + std::string to_string(insn_t insn) const { return ""; } +} opt; + class disasm_insn_t { public: - disasm_insn_t(const char* name, uint32_t match, uint32_t mask, - const std::vector& args) - : match(match), mask(mask), args(args), name(name) {} + NOINLINE disasm_insn_t(const char* name, uint32_t match, uint32_t mask, + const std::vector& args) + : match(match), mask(mask), args(args), name(strdup(name)) {} + ~disasm_insn_t() { free(const_cast(name)); } bool operator == (insn_t insn) const { return (insn.bits() & mask) == match; } + const char* get_name() const + { + return name; + } + std::string to_string(insn_t insn) const { std::stringstream s; @@ -40,10 +54,21 @@ class disasm_insn_t if (args.size()) { + bool next_arg_optional = false; s << std::string(std::max(1, 8 - len), ' '); - for (size_t i = 0; i < args.size()-1; i++) - s << args[i]->to_string(insn) << ", "; - s << args[args.size()-1]->to_string(insn); + for (size_t i = 0; i < args.size(); i++) { + if (args[i] == &opt) { + next_arg_optional = true; + continue; + } + std::string argString = args[i]->to_string(insn); + if (next_arg_optional) { + next_arg_optional = false; + if (argString.empty()) continue; + } + if (i != 0) s << ", "; + s << argString; + } } return s.str(); } @@ -63,12 +88,15 @@ class disassembler_t public: disassembler_t(int xlen); ~disassembler_t(); + std::string disassemble(insn_t insn) const; + const disasm_insn_t* lookup(insn_t insn) const; + void add_insn(disasm_insn_t* insn); + private: static const int HASH_SIZE = 256; std::vector chain[HASH_SIZE+1]; - const disasm_insn_t* lookup(insn_t insn) const; }; #endif diff --git a/riscv/dts.cc b/riscv/dts.cc index b8a5f9d7c1..56b76e6c50 100644 --- a/riscv/dts.cc +++ b/riscv/dts.cc @@ -1,6 +1,7 @@ // See LICENSE for license details. #include "dts.h" +#include "libfdt.h" #include #include #include @@ -9,6 +10,8 @@ #include std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, + reg_t initrd_start, reg_t initrd_end, + const char* bootargs, std::vector procs, std::vector> mems) { @@ -21,6 +24,25 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, " #size-cells = <2>;\n" " compatible = \"ucbbar,spike-bare-dev\";\n" " model = \"ucbbar,spike-bare\";\n" + " chosen {\n"; + if (initrd_start < initrd_end) { + s << " linux,initrd-start = <" << (size_t)initrd_start << ">;\n" + " linux,initrd-end = <" << (size_t)initrd_end << ">;\n"; + if (!bootargs) + bootargs = "root=/dev/ram console=hvc0 earlycon=sbi"; + } else { + if (!bootargs) + bootargs = "console=hvc0 earlycon=sbi"; + } + s << " bootargs = \""; + for (size_t i = 0; i < strlen(bootargs); i++) { + if (bootargs[i] == '"') + s << '\\' << bootargs[i]; + else + s << bootargs[i]; + } + s << "\";\n"; + s << " };\n" " cpus {\n" " #address-cells = <1>;\n" " #size-cells = <0>;\n" @@ -33,6 +55,8 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, " compatible = \"riscv\";\n" " riscv,isa = \"" << procs[i]->get_isa_string() << "\";\n" " mmu-type = \"riscv," << (procs[i]->get_max_xlen() <= 32 ? "sv32" : "sv48") << "\";\n" + " riscv,pmpregions = <16>;\n" + " riscv,pmpgranularity = <4>;\n" " clock-frequency = <" << cpu_hz << ">;\n" " CPU" << i << "_intc: interrupt-controller {\n" " #interrupt-cells = <1>;\n" @@ -47,7 +71,7 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, " memory@" << m.first << " {\n" " device_type = \"memory\";\n" " reg = <0x" << (m.first >> 32) << " 0x" << (m.first & (uint32_t)-1) << - " 0x" << (m.second->size() >> 32) << " 0x" << (m.second->size() & (uint32_t)-1) << ">;\n" + " 0x" << (m.second->size() >> 16 >> 16) << " 0x" << (m.second->size() & (uint32_t)-1) << ">;\n" " };\n"; } s << " soc {\n" @@ -80,6 +104,7 @@ std::string dts_compile(const std::string& dts) int dts_pipe[2]; pid_t dts_pid; + fflush(NULL); // flush stdout/stderr before forking if (pipe(dts_pipe) != 0 || (dts_pid = fork()) < 0) { std::cerr << "Failed to fork dts child: " << strerror(errno) << std::endl; exit(1); @@ -116,7 +141,7 @@ std::string dts_compile(const std::string& dts) close(dts_pipe[1]); close(dtb_pipe[0]); close(dtb_pipe[1]); - execl(DTC, DTC, "-O", "dtb", 0); + execlp(DTC, DTC, "-O", "dtb", 0); std::cerr << "Failed to run " DTC ": " << strerror(errno) << std::endl; exit(1); } @@ -154,3 +179,97 @@ std::string dts_compile(const std::string& dts) return dtb.str(); } + + +static int fdt_get_node_addr_size(void *fdt, int node, reg_t *addr, + unsigned long *size, const char *field) +{ + int parent, len, i; + int cell_addr, cell_size; + const fdt32_t *prop_addr, *prop_size; + uint64_t temp = 0; + + parent = fdt_parent_offset(fdt, node); + if (parent < 0) + return parent; + + cell_addr = fdt_address_cells(fdt, parent); + if (cell_addr < 1) + return -ENODEV; + + cell_size = fdt_size_cells(fdt, parent); + if (cell_size < 0) + return -ENODEV; + + if (!field) + return -ENODEV; + + prop_addr = (fdt32_t *)fdt_getprop(fdt, node, field, &len); + if (!prop_addr) + return -ENODEV; + prop_size = prop_addr + cell_addr; + + if (addr) { + for (i = 0; i < cell_addr; i++) + temp = (temp << 32) | fdt32_to_cpu(*prop_addr++); + *addr = temp; + } + temp = 0; + + if (size) { + for (i = 0; i < cell_size; i++) + temp = (temp << 32) | fdt32_to_cpu(*prop_size++); + *size = temp; + } + + return 0; +} + +int fdt_parse_clint(void *fdt, reg_t *clint_addr, + const char *compatible) +{ + int nodeoffset, rc; + + nodeoffset = fdt_node_offset_by_compatible(fdt, -1, compatible); + if (nodeoffset < 0) + return nodeoffset; + + rc = fdt_get_node_addr_size(fdt, nodeoffset, clint_addr, NULL, "reg"); + if (rc < 0 || !clint_addr) + return -ENODEV; + + return 0; +} + +int fdt_parse_pmp_num(void *fdt, reg_t *pmp_num, const char *compatible) +{ + int nodeoffset, rc; + + nodeoffset = fdt_node_offset_by_compatible(fdt, -1, compatible); + if (nodeoffset < 0) + return nodeoffset; + + rc = fdt_get_node_addr_size(fdt, nodeoffset, pmp_num, NULL, + "riscv,pmpregions"); + if (rc < 0 || !pmp_num) + return -ENODEV; + + return 0; +} + +int fdt_parse_pmp_alignment(void *fdt, reg_t *pmp_align, + const char *compatible) +{ + int nodeoffset, rc; + + nodeoffset = fdt_node_offset_by_compatible(fdt, -1, compatible); + if (nodeoffset < 0) + return nodeoffset; + + rc = fdt_get_node_addr_size(fdt, nodeoffset, pmp_align, NULL, + "riscv,pmpgranularity"); + if (rc < 0 || !pmp_align) + return -ENODEV; + + return 0; +} diff --git a/riscv/dts.h b/riscv/dts.h index ec0aa6161b..1f01e0f8c8 100644 --- a/riscv/dts.h +++ b/riscv/dts.h @@ -7,9 +7,17 @@ #include std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, + reg_t initrd_start, reg_t initrd_end, + const char* bootargs, std::vector procs, std::vector> mems); std::string dts_compile(const std::string& dts); +int fdt_parse_clint(void *fdt, reg_t *clint_addr, + const char *compatible); +int fdt_parse_pmp_num(void *fdt, reg_t *pmp_num, + const char *compatible); +int fdt_parse_pmp_alignment(void *fdt, reg_t *pmp_align, + const char *compatible); #endif diff --git a/riscv/encoding.h b/riscv/encoding.h deleted file mode 100644 index c109ce189d..0000000000 --- a/riscv/encoding.h +++ /dev/null @@ -1,1471 +0,0 @@ -// See LICENSE for license details. - -#ifndef RISCV_CSR_ENCODING_H -#define RISCV_CSR_ENCODING_H - -#define MSTATUS_UIE 0x00000001 -#define MSTATUS_SIE 0x00000002 -#define MSTATUS_HIE 0x00000004 -#define MSTATUS_MIE 0x00000008 -#define MSTATUS_UPIE 0x00000010 -#define MSTATUS_SPIE 0x00000020 -#define MSTATUS_HPIE 0x00000040 -#define MSTATUS_MPIE 0x00000080 -#define MSTATUS_SPP 0x00000100 -#define MSTATUS_HPP 0x00000600 -#define MSTATUS_MPP 0x00001800 -#define MSTATUS_FS 0x00006000 -#define MSTATUS_XS 0x00018000 -#define MSTATUS_MPRV 0x00020000 -#define MSTATUS_SUM 0x00040000 -#define MSTATUS_MXR 0x00080000 -#define MSTATUS_TVM 0x00100000 -#define MSTATUS_TW 0x00200000 -#define MSTATUS_TSR 0x00400000 -#define MSTATUS32_SD 0x80000000 -#define MSTATUS_UXL 0x0000000300000000 -#define MSTATUS_SXL 0x0000000C00000000 -#define MSTATUS64_SD 0x8000000000000000 - -#define SSTATUS_UIE 0x00000001 -#define SSTATUS_SIE 0x00000002 -#define SSTATUS_UPIE 0x00000010 -#define SSTATUS_SPIE 0x00000020 -#define SSTATUS_SPP 0x00000100 -#define SSTATUS_FS 0x00006000 -#define SSTATUS_XS 0x00018000 -#define SSTATUS_SUM 0x00040000 -#define SSTATUS_MXR 0x00080000 -#define SSTATUS32_SD 0x80000000 -#define SSTATUS_UXL 0x0000000300000000 -#define SSTATUS64_SD 0x8000000000000000 - -#define DCSR_XDEBUGVER (3U<<30) -#define DCSR_NDRESET (1<<29) -#define DCSR_FULLRESET (1<<28) -#define DCSR_EBREAKM (1<<15) -#define DCSR_EBREAKH (1<<14) -#define DCSR_EBREAKS (1<<13) -#define DCSR_EBREAKU (1<<12) -#define DCSR_STOPCYCLE (1<<10) -#define DCSR_STOPTIME (1<<9) -#define DCSR_CAUSE (7<<6) -#define DCSR_DEBUGINT (1<<5) -#define DCSR_HALT (1<<3) -#define DCSR_STEP (1<<2) -#define DCSR_PRV (3<<0) - -#define DCSR_CAUSE_NONE 0 -#define DCSR_CAUSE_SWBP 1 -#define DCSR_CAUSE_HWBP 2 -#define DCSR_CAUSE_DEBUGINT 3 -#define DCSR_CAUSE_STEP 4 -#define DCSR_CAUSE_HALT 5 - -#define MCONTROL_TYPE(xlen) (0xfULL<<((xlen)-4)) -#define MCONTROL_DMODE(xlen) (1ULL<<((xlen)-5)) -#define MCONTROL_MASKMAX(xlen) (0x3fULL<<((xlen)-11)) - -#define MCONTROL_SELECT (1<<19) -#define MCONTROL_TIMING (1<<18) -#define MCONTROL_ACTION (0x3f<<12) -#define MCONTROL_CHAIN (1<<11) -#define MCONTROL_MATCH (0xf<<7) -#define MCONTROL_M (1<<6) -#define MCONTROL_H (1<<5) -#define MCONTROL_S (1<<4) -#define MCONTROL_U (1<<3) -#define MCONTROL_EXECUTE (1<<2) -#define MCONTROL_STORE (1<<1) -#define MCONTROL_LOAD (1<<0) - -#define MCONTROL_TYPE_NONE 0 -#define MCONTROL_TYPE_MATCH 2 - -#define MCONTROL_ACTION_DEBUG_EXCEPTION 0 -#define MCONTROL_ACTION_DEBUG_MODE 1 -#define MCONTROL_ACTION_TRACE_START 2 -#define MCONTROL_ACTION_TRACE_STOP 3 -#define MCONTROL_ACTION_TRACE_EMIT 4 - -#define MCONTROL_MATCH_EQUAL 0 -#define MCONTROL_MATCH_NAPOT 1 -#define MCONTROL_MATCH_GE 2 -#define MCONTROL_MATCH_LT 3 -#define MCONTROL_MATCH_MASK_LOW 4 -#define MCONTROL_MATCH_MASK_HIGH 5 - -#define MIP_SSIP (1 << IRQ_S_SOFT) -#define MIP_HSIP (1 << IRQ_H_SOFT) -#define MIP_MSIP (1 << IRQ_M_SOFT) -#define MIP_STIP (1 << IRQ_S_TIMER) -#define MIP_HTIP (1 << IRQ_H_TIMER) -#define MIP_MTIP (1 << IRQ_M_TIMER) -#define MIP_SEIP (1 << IRQ_S_EXT) -#define MIP_HEIP (1 << IRQ_H_EXT) -#define MIP_MEIP (1 << IRQ_M_EXT) - -#define SIP_SSIP MIP_SSIP -#define SIP_STIP MIP_STIP - -#define PRV_U 0 -#define PRV_S 1 -#define PRV_H 2 -#define PRV_M 3 - -#define SATP32_MODE 0x80000000 -#define SATP32_ASID 0x7FC00000 -#define SATP32_PPN 0x003FFFFF -#define SATP64_MODE 0xF000000000000000 -#define SATP64_ASID 0x0FFFF00000000000 -#define SATP64_PPN 0x00000FFFFFFFFFFF - -#define SATP_MODE_OFF 0 -#define SATP_MODE_SV32 1 -#define SATP_MODE_SV39 8 -#define SATP_MODE_SV48 9 -#define SATP_MODE_SV57 10 -#define SATP_MODE_SV64 11 - -#define PMP_R 0x01 -#define PMP_W 0x02 -#define PMP_X 0x04 -#define PMP_A 0x18 -#define PMP_L 0x80 -#define PMP_SHIFT 2 - -#define PMP_TOR 0x08 -#define PMP_NA4 0x10 -#define PMP_NAPOT 0x18 - -#define IRQ_S_SOFT 1 -#define IRQ_H_SOFT 2 -#define IRQ_M_SOFT 3 -#define IRQ_S_TIMER 5 -#define IRQ_H_TIMER 6 -#define IRQ_M_TIMER 7 -#define IRQ_S_EXT 9 -#define IRQ_H_EXT 10 -#define IRQ_M_EXT 11 -#define IRQ_COP 12 -#define IRQ_HOST 13 - -#define DEFAULT_RSTVEC 0x00001000 -#define CLINT_BASE 0x02000000 -#define CLINT_SIZE 0x000c0000 -#define EXT_IO_BASE 0x40000000 -#define DRAM_BASE 0x80000000 - -// page table entry (PTE) fields -#define PTE_V 0x001 // Valid -#define PTE_R 0x002 // Read -#define PTE_W 0x004 // Write -#define PTE_X 0x008 // Execute -#define PTE_U 0x010 // User -#define PTE_G 0x020 // Global -#define PTE_A 0x040 // Accessed -#define PTE_D 0x080 // Dirty -#define PTE_SOFT 0x300 // Reserved for Software - -#define PTE_PPN_SHIFT 10 - -#define PTE_TABLE(PTE) (((PTE) & (PTE_V | PTE_R | PTE_W | PTE_X)) == PTE_V) - -#ifdef __riscv - -#if __riscv_xlen == 64 -# define MSTATUS_SD MSTATUS64_SD -# define SSTATUS_SD SSTATUS64_SD -# define RISCV_PGLEVEL_BITS 9 -# define SATP_MODE SATP64_MODE -#else -# define MSTATUS_SD MSTATUS32_SD -# define SSTATUS_SD SSTATUS32_SD -# define RISCV_PGLEVEL_BITS 10 -# define SATP_MODE SATP32_MODE -#endif -#define RISCV_PGSHIFT 12 -#define RISCV_PGSIZE (1 << RISCV_PGSHIFT) - -#ifndef __ASSEMBLER__ - -#ifdef __GNUC__ - -#define read_csr(reg) ({ unsigned long __tmp; \ - asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ - __tmp; }) - -#define write_csr(reg, val) ({ \ - asm volatile ("csrw " #reg ", %0" :: "rK"(val)); }) - -#define swap_csr(reg, val) ({ unsigned long __tmp; \ - asm volatile ("csrrw %0, " #reg ", %1" : "=r"(__tmp) : "rK"(val)); \ - __tmp; }) - -#define set_csr(reg, bit) ({ unsigned long __tmp; \ - asm volatile ("csrrs %0, " #reg ", %1" : "=r"(__tmp) : "rK"(bit)); \ - __tmp; }) - -#define clear_csr(reg, bit) ({ unsigned long __tmp; \ - asm volatile ("csrrc %0, " #reg ", %1" : "=r"(__tmp) : "rK"(bit)); \ - __tmp; }) - -#define rdtime() read_csr(time) -#define rdcycle() read_csr(cycle) -#define rdinstret() read_csr(instret) - -#endif - -#endif - -#endif - -#endif -/* Automatically generated by parse-opcodes. */ -#ifndef RISCV_ENCODING_H -#define RISCV_ENCODING_H -#define MATCH_BEQ 0x63 -#define MASK_BEQ 0x707f -#define MATCH_BNE 0x1063 -#define MASK_BNE 0x707f -#define MATCH_BLT 0x4063 -#define MASK_BLT 0x707f -#define MATCH_BGE 0x5063 -#define MASK_BGE 0x707f -#define MATCH_BLTU 0x6063 -#define MASK_BLTU 0x707f -#define MATCH_BGEU 0x7063 -#define MASK_BGEU 0x707f -#define MATCH_JALR 0x67 -#define MASK_JALR 0x707f -#define MATCH_JAL 0x6f -#define MASK_JAL 0x7f -#define MATCH_LUI 0x37 -#define MASK_LUI 0x7f -#define MATCH_AUIPC 0x17 -#define MASK_AUIPC 0x7f -#define MATCH_ADDI 0x13 -#define MASK_ADDI 0x707f -#define MATCH_SLLI 0x1013 -#define MASK_SLLI 0xfc00707f -#define MATCH_SLTI 0x2013 -#define MASK_SLTI 0x707f -#define MATCH_SLTIU 0x3013 -#define MASK_SLTIU 0x707f -#define MATCH_XORI 0x4013 -#define MASK_XORI 0x707f -#define MATCH_SRLI 0x5013 -#define MASK_SRLI 0xfc00707f -#define MATCH_SRAI 0x40005013 -#define MASK_SRAI 0xfc00707f -#define MATCH_ORI 0x6013 -#define MASK_ORI 0x707f -#define MATCH_ANDI 0x7013 -#define MASK_ANDI 0x707f -#define MATCH_ADD 0x33 -#define MASK_ADD 0xfe00707f -#define MATCH_SUB 0x40000033 -#define MASK_SUB 0xfe00707f -#define MATCH_SLL 0x1033 -#define MASK_SLL 0xfe00707f -#define MATCH_SLT 0x2033 -#define MASK_SLT 0xfe00707f -#define MATCH_SLTU 0x3033 -#define MASK_SLTU 0xfe00707f -#define MATCH_XOR 0x4033 -#define MASK_XOR 0xfe00707f -#define MATCH_SRL 0x5033 -#define MASK_SRL 0xfe00707f -#define MATCH_SRA 0x40005033 -#define MASK_SRA 0xfe00707f -#define MATCH_OR 0x6033 -#define MASK_OR 0xfe00707f -#define MATCH_AND 0x7033 -#define MASK_AND 0xfe00707f -#define MATCH_ADDIW 0x1b -#define MASK_ADDIW 0x707f -#define MATCH_SLLIW 0x101b -#define MASK_SLLIW 0xfe00707f -#define MATCH_SRLIW 0x501b -#define MASK_SRLIW 0xfe00707f -#define MATCH_SRAIW 0x4000501b -#define MASK_SRAIW 0xfe00707f -#define MATCH_ADDW 0x3b -#define MASK_ADDW 0xfe00707f -#define MATCH_SUBW 0x4000003b -#define MASK_SUBW 0xfe00707f -#define MATCH_SLLW 0x103b -#define MASK_SLLW 0xfe00707f -#define MATCH_SRLW 0x503b -#define MASK_SRLW 0xfe00707f -#define MATCH_SRAW 0x4000503b -#define MASK_SRAW 0xfe00707f -#define MATCH_LB 0x3 -#define MASK_LB 0x707f -#define MATCH_LH 0x1003 -#define MASK_LH 0x707f -#define MATCH_LW 0x2003 -#define MASK_LW 0x707f -#define MATCH_LD 0x3003 -#define MASK_LD 0x707f -#define MATCH_LBU 0x4003 -#define MASK_LBU 0x707f -#define MATCH_LHU 0x5003 -#define MASK_LHU 0x707f -#define MATCH_LWU 0x6003 -#define MASK_LWU 0x707f -#define MATCH_SB 0x23 -#define MASK_SB 0x707f -#define MATCH_SH 0x1023 -#define MASK_SH 0x707f -#define MATCH_SW 0x2023 -#define MASK_SW 0x707f -#define MATCH_SD 0x3023 -#define MASK_SD 0x707f -#define MATCH_FENCE 0xf -#define MASK_FENCE 0x707f -#define MATCH_FENCE_I 0x100f -#define MASK_FENCE_I 0x707f -#define MATCH_MUL 0x2000033 -#define MASK_MUL 0xfe00707f -#define MATCH_MULH 0x2001033 -#define MASK_MULH 0xfe00707f -#define MATCH_MULHSU 0x2002033 -#define MASK_MULHSU 0xfe00707f -#define MATCH_MULHU 0x2003033 -#define MASK_MULHU 0xfe00707f -#define MATCH_DIV 0x2004033 -#define MASK_DIV 0xfe00707f -#define MATCH_DIVU 0x2005033 -#define MASK_DIVU 0xfe00707f -#define MATCH_REM 0x2006033 -#define MASK_REM 0xfe00707f -#define MATCH_REMU 0x2007033 -#define MASK_REMU 0xfe00707f -#define MATCH_MULW 0x200003b -#define MASK_MULW 0xfe00707f -#define MATCH_DIVW 0x200403b -#define MASK_DIVW 0xfe00707f -#define MATCH_DIVUW 0x200503b -#define MASK_DIVUW 0xfe00707f -#define MATCH_REMW 0x200603b -#define MASK_REMW 0xfe00707f -#define MATCH_REMUW 0x200703b -#define MASK_REMUW 0xfe00707f -#define MATCH_AMOADD_W 0x202f -#define MASK_AMOADD_W 0xf800707f -#define MATCH_AMOXOR_W 0x2000202f -#define MASK_AMOXOR_W 0xf800707f -#define MATCH_AMOOR_W 0x4000202f -#define MASK_AMOOR_W 0xf800707f -#define MATCH_AMOAND_W 0x6000202f -#define MASK_AMOAND_W 0xf800707f -#define MATCH_AMOMIN_W 0x8000202f -#define MASK_AMOMIN_W 0xf800707f -#define MATCH_AMOMAX_W 0xa000202f -#define MASK_AMOMAX_W 0xf800707f -#define MATCH_AMOMINU_W 0xc000202f -#define MASK_AMOMINU_W 0xf800707f -#define MATCH_AMOMAXU_W 0xe000202f -#define MASK_AMOMAXU_W 0xf800707f -#define MATCH_AMOSWAP_W 0x800202f -#define MASK_AMOSWAP_W 0xf800707f -#define MATCH_LR_W 0x1000202f -#define MASK_LR_W 0xf9f0707f -#define MATCH_SC_W 0x1800202f -#define MASK_SC_W 0xf800707f -#define MATCH_AMOADD_D 0x302f -#define MASK_AMOADD_D 0xf800707f -#define MATCH_AMOXOR_D 0x2000302f -#define MASK_AMOXOR_D 0xf800707f -#define MATCH_AMOOR_D 0x4000302f -#define MASK_AMOOR_D 0xf800707f -#define MATCH_AMOAND_D 0x6000302f -#define MASK_AMOAND_D 0xf800707f -#define MATCH_AMOMIN_D 0x8000302f -#define MASK_AMOMIN_D 0xf800707f -#define MATCH_AMOMAX_D 0xa000302f -#define MASK_AMOMAX_D 0xf800707f -#define MATCH_AMOMINU_D 0xc000302f -#define MASK_AMOMINU_D 0xf800707f -#define MATCH_AMOMAXU_D 0xe000302f -#define MASK_AMOMAXU_D 0xf800707f -#define MATCH_AMOSWAP_D 0x800302f -#define MASK_AMOSWAP_D 0xf800707f -#define MATCH_LR_D 0x1000302f -#define MASK_LR_D 0xf9f0707f -#define MATCH_SC_D 0x1800302f -#define MASK_SC_D 0xf800707f -#define MATCH_ECALL 0x73 -#define MASK_ECALL 0xffffffff -#define MATCH_EBREAK 0x100073 -#define MASK_EBREAK 0xffffffff -#define MATCH_URET 0x200073 -#define MASK_URET 0xffffffff -#define MATCH_SRET 0x10200073 -#define MASK_SRET 0xffffffff -#define MATCH_MRET 0x30200073 -#define MASK_MRET 0xffffffff -#define MATCH_DRET 0x7b200073 -#define MASK_DRET 0xffffffff -#define MATCH_SFENCE_VMA 0x12000073 -#define MASK_SFENCE_VMA 0xfe007fff -#define MATCH_WFI 0x10500073 -#define MASK_WFI 0xffffffff -#define MATCH_CSRRW 0x1073 -#define MASK_CSRRW 0x707f -#define MATCH_CSRRS 0x2073 -#define MASK_CSRRS 0x707f -#define MATCH_CSRRC 0x3073 -#define MASK_CSRRC 0x707f -#define MATCH_CSRRWI 0x5073 -#define MASK_CSRRWI 0x707f -#define MATCH_CSRRSI 0x6073 -#define MASK_CSRRSI 0x707f -#define MATCH_CSRRCI 0x7073 -#define MASK_CSRRCI 0x707f -#define MATCH_FADD_S 0x53 -#define MASK_FADD_S 0xfe00007f -#define MATCH_FSUB_S 0x8000053 -#define MASK_FSUB_S 0xfe00007f -#define MATCH_FMUL_S 0x10000053 -#define MASK_FMUL_S 0xfe00007f -#define MATCH_FDIV_S 0x18000053 -#define MASK_FDIV_S 0xfe00007f -#define MATCH_FSGNJ_S 0x20000053 -#define MASK_FSGNJ_S 0xfe00707f -#define MATCH_FSGNJN_S 0x20001053 -#define MASK_FSGNJN_S 0xfe00707f -#define MATCH_FSGNJX_S 0x20002053 -#define MASK_FSGNJX_S 0xfe00707f -#define MATCH_FMIN_S 0x28000053 -#define MASK_FMIN_S 0xfe00707f -#define MATCH_FMAX_S 0x28001053 -#define MASK_FMAX_S 0xfe00707f -#define MATCH_FSQRT_S 0x58000053 -#define MASK_FSQRT_S 0xfff0007f -#define MATCH_FADD_D 0x2000053 -#define MASK_FADD_D 0xfe00007f -#define MATCH_FSUB_D 0xa000053 -#define MASK_FSUB_D 0xfe00007f -#define MATCH_FMUL_D 0x12000053 -#define MASK_FMUL_D 0xfe00007f -#define MATCH_FDIV_D 0x1a000053 -#define MASK_FDIV_D 0xfe00007f -#define MATCH_FSGNJ_D 0x22000053 -#define MASK_FSGNJ_D 0xfe00707f -#define MATCH_FSGNJN_D 0x22001053 -#define MASK_FSGNJN_D 0xfe00707f -#define MATCH_FSGNJX_D 0x22002053 -#define MASK_FSGNJX_D 0xfe00707f -#define MATCH_FMIN_D 0x2a000053 -#define MASK_FMIN_D 0xfe00707f -#define MATCH_FMAX_D 0x2a001053 -#define MASK_FMAX_D 0xfe00707f -#define MATCH_FCVT_S_D 0x40100053 -#define MASK_FCVT_S_D 0xfff0007f -#define MATCH_FCVT_D_S 0x42000053 -#define MASK_FCVT_D_S 0xfff0007f -#define MATCH_FSQRT_D 0x5a000053 -#define MASK_FSQRT_D 0xfff0007f -#define MATCH_FADD_Q 0x6000053 -#define MASK_FADD_Q 0xfe00007f -#define MATCH_FSUB_Q 0xe000053 -#define MASK_FSUB_Q 0xfe00007f -#define MATCH_FMUL_Q 0x16000053 -#define MASK_FMUL_Q 0xfe00007f -#define MATCH_FDIV_Q 0x1e000053 -#define MASK_FDIV_Q 0xfe00007f -#define MATCH_FSGNJ_Q 0x26000053 -#define MASK_FSGNJ_Q 0xfe00707f -#define MATCH_FSGNJN_Q 0x26001053 -#define MASK_FSGNJN_Q 0xfe00707f -#define MATCH_FSGNJX_Q 0x26002053 -#define MASK_FSGNJX_Q 0xfe00707f -#define MATCH_FMIN_Q 0x2e000053 -#define MASK_FMIN_Q 0xfe00707f -#define MATCH_FMAX_Q 0x2e001053 -#define MASK_FMAX_Q 0xfe00707f -#define MATCH_FCVT_S_Q 0x40300053 -#define MASK_FCVT_S_Q 0xfff0007f -#define MATCH_FCVT_Q_S 0x46000053 -#define MASK_FCVT_Q_S 0xfff0007f -#define MATCH_FCVT_D_Q 0x42300053 -#define MASK_FCVT_D_Q 0xfff0007f -#define MATCH_FCVT_Q_D 0x46100053 -#define MASK_FCVT_Q_D 0xfff0007f -#define MATCH_FSQRT_Q 0x5e000053 -#define MASK_FSQRT_Q 0xfff0007f -#define MATCH_FLE_S 0xa0000053 -#define MASK_FLE_S 0xfe00707f -#define MATCH_FLT_S 0xa0001053 -#define MASK_FLT_S 0xfe00707f -#define MATCH_FEQ_S 0xa0002053 -#define MASK_FEQ_S 0xfe00707f -#define MATCH_FLE_D 0xa2000053 -#define MASK_FLE_D 0xfe00707f -#define MATCH_FLT_D 0xa2001053 -#define MASK_FLT_D 0xfe00707f -#define MATCH_FEQ_D 0xa2002053 -#define MASK_FEQ_D 0xfe00707f -#define MATCH_FLE_Q 0xa6000053 -#define MASK_FLE_Q 0xfe00707f -#define MATCH_FLT_Q 0xa6001053 -#define MASK_FLT_Q 0xfe00707f -#define MATCH_FEQ_Q 0xa6002053 -#define MASK_FEQ_Q 0xfe00707f -#define MATCH_FCVT_W_S 0xc0000053 -#define MASK_FCVT_W_S 0xfff0007f -#define MATCH_FCVT_WU_S 0xc0100053 -#define MASK_FCVT_WU_S 0xfff0007f -#define MATCH_FCVT_L_S 0xc0200053 -#define MASK_FCVT_L_S 0xfff0007f -#define MATCH_FCVT_LU_S 0xc0300053 -#define MASK_FCVT_LU_S 0xfff0007f -#define MATCH_FMV_X_W 0xe0000053 -#define MASK_FMV_X_W 0xfff0707f -#define MATCH_FCLASS_S 0xe0001053 -#define MASK_FCLASS_S 0xfff0707f -#define MATCH_FCVT_W_D 0xc2000053 -#define MASK_FCVT_W_D 0xfff0007f -#define MATCH_FCVT_WU_D 0xc2100053 -#define MASK_FCVT_WU_D 0xfff0007f -#define MATCH_FCVT_L_D 0xc2200053 -#define MASK_FCVT_L_D 0xfff0007f -#define MATCH_FCVT_LU_D 0xc2300053 -#define MASK_FCVT_LU_D 0xfff0007f -#define MATCH_FMV_X_D 0xe2000053 -#define MASK_FMV_X_D 0xfff0707f -#define MATCH_FCLASS_D 0xe2001053 -#define MASK_FCLASS_D 0xfff0707f -#define MATCH_FCVT_W_Q 0xc6000053 -#define MASK_FCVT_W_Q 0xfff0007f -#define MATCH_FCVT_WU_Q 0xc6100053 -#define MASK_FCVT_WU_Q 0xfff0007f -#define MATCH_FCVT_L_Q 0xc6200053 -#define MASK_FCVT_L_Q 0xfff0007f -#define MATCH_FCVT_LU_Q 0xc6300053 -#define MASK_FCVT_LU_Q 0xfff0007f -#define MATCH_FMV_X_Q 0xe6000053 -#define MASK_FMV_X_Q 0xfff0707f -#define MATCH_FCLASS_Q 0xe6001053 -#define MASK_FCLASS_Q 0xfff0707f -#define MATCH_FCVT_S_W 0xd0000053 -#define MASK_FCVT_S_W 0xfff0007f -#define MATCH_FCVT_S_WU 0xd0100053 -#define MASK_FCVT_S_WU 0xfff0007f -#define MATCH_FCVT_S_L 0xd0200053 -#define MASK_FCVT_S_L 0xfff0007f -#define MATCH_FCVT_S_LU 0xd0300053 -#define MASK_FCVT_S_LU 0xfff0007f -#define MATCH_FMV_W_X 0xf0000053 -#define MASK_FMV_W_X 0xfff0707f -#define MATCH_FCVT_D_W 0xd2000053 -#define MASK_FCVT_D_W 0xfff0007f -#define MATCH_FCVT_D_WU 0xd2100053 -#define MASK_FCVT_D_WU 0xfff0007f -#define MATCH_FCVT_D_L 0xd2200053 -#define MASK_FCVT_D_L 0xfff0007f -#define MATCH_FCVT_D_LU 0xd2300053 -#define MASK_FCVT_D_LU 0xfff0007f -#define MATCH_FMV_D_X 0xf2000053 -#define MASK_FMV_D_X 0xfff0707f -#define MATCH_FCVT_Q_W 0xd6000053 -#define MASK_FCVT_Q_W 0xfff0007f -#define MATCH_FCVT_Q_WU 0xd6100053 -#define MASK_FCVT_Q_WU 0xfff0007f -#define MATCH_FCVT_Q_L 0xd6200053 -#define MASK_FCVT_Q_L 0xfff0007f -#define MATCH_FCVT_Q_LU 0xd6300053 -#define MASK_FCVT_Q_LU 0xfff0007f -#define MATCH_FMV_Q_X 0xf6000053 -#define MASK_FMV_Q_X 0xfff0707f -#define MATCH_FLW 0x2007 -#define MASK_FLW 0x707f -#define MATCH_FLD 0x3007 -#define MASK_FLD 0x707f -#define MATCH_FLQ 0x4007 -#define MASK_FLQ 0x707f -#define MATCH_FSW 0x2027 -#define MASK_FSW 0x707f -#define MATCH_FSD 0x3027 -#define MASK_FSD 0x707f -#define MATCH_FSQ 0x4027 -#define MASK_FSQ 0x707f -#define MATCH_FMADD_S 0x43 -#define MASK_FMADD_S 0x600007f -#define MATCH_FMSUB_S 0x47 -#define MASK_FMSUB_S 0x600007f -#define MATCH_FNMSUB_S 0x4b -#define MASK_FNMSUB_S 0x600007f -#define MATCH_FNMADD_S 0x4f -#define MASK_FNMADD_S 0x600007f -#define MATCH_FMADD_D 0x2000043 -#define MASK_FMADD_D 0x600007f -#define MATCH_FMSUB_D 0x2000047 -#define MASK_FMSUB_D 0x600007f -#define MATCH_FNMSUB_D 0x200004b -#define MASK_FNMSUB_D 0x600007f -#define MATCH_FNMADD_D 0x200004f -#define MASK_FNMADD_D 0x600007f -#define MATCH_FMADD_Q 0x6000043 -#define MASK_FMADD_Q 0x600007f -#define MATCH_FMSUB_Q 0x6000047 -#define MASK_FMSUB_Q 0x600007f -#define MATCH_FNMSUB_Q 0x600004b -#define MASK_FNMSUB_Q 0x600007f -#define MATCH_FNMADD_Q 0x600004f -#define MASK_FNMADD_Q 0x600007f -#define MATCH_C_NOP 0x1 -#define MASK_C_NOP 0xffff -#define MATCH_C_ADDI16SP 0x6101 -#define MASK_C_ADDI16SP 0xef83 -#define MATCH_C_JR 0x8002 -#define MASK_C_JR 0xf07f -#define MATCH_C_JALR 0x9002 -#define MASK_C_JALR 0xf07f -#define MATCH_C_EBREAK 0x9002 -#define MASK_C_EBREAK 0xffff -#define MATCH_C_LD 0x6000 -#define MASK_C_LD 0xe003 -#define MATCH_C_SD 0xe000 -#define MASK_C_SD 0xe003 -#define MATCH_C_ADDIW 0x2001 -#define MASK_C_ADDIW 0xe003 -#define MATCH_C_LDSP 0x6002 -#define MASK_C_LDSP 0xe003 -#define MATCH_C_SDSP 0xe002 -#define MASK_C_SDSP 0xe003 -#define MATCH_C_ADDI4SPN 0x0 -#define MASK_C_ADDI4SPN 0xe003 -#define MATCH_C_FLD 0x2000 -#define MASK_C_FLD 0xe003 -#define MATCH_C_LW 0x4000 -#define MASK_C_LW 0xe003 -#define MATCH_C_FLW 0x6000 -#define MASK_C_FLW 0xe003 -#define MATCH_C_FSD 0xa000 -#define MASK_C_FSD 0xe003 -#define MATCH_C_SW 0xc000 -#define MASK_C_SW 0xe003 -#define MATCH_C_FSW 0xe000 -#define MASK_C_FSW 0xe003 -#define MATCH_C_ADDI 0x1 -#define MASK_C_ADDI 0xe003 -#define MATCH_C_JAL 0x2001 -#define MASK_C_JAL 0xe003 -#define MATCH_C_LI 0x4001 -#define MASK_C_LI 0xe003 -#define MATCH_C_LUI 0x6001 -#define MASK_C_LUI 0xe003 -#define MATCH_C_SRLI 0x8001 -#define MASK_C_SRLI 0xec03 -#define MATCH_C_SRAI 0x8401 -#define MASK_C_SRAI 0xec03 -#define MATCH_C_ANDI 0x8801 -#define MASK_C_ANDI 0xec03 -#define MATCH_C_SUB 0x8c01 -#define MASK_C_SUB 0xfc63 -#define MATCH_C_XOR 0x8c21 -#define MASK_C_XOR 0xfc63 -#define MATCH_C_OR 0x8c41 -#define MASK_C_OR 0xfc63 -#define MATCH_C_AND 0x8c61 -#define MASK_C_AND 0xfc63 -#define MATCH_C_SUBW 0x9c01 -#define MASK_C_SUBW 0xfc63 -#define MATCH_C_ADDW 0x9c21 -#define MASK_C_ADDW 0xfc63 -#define MATCH_C_J 0xa001 -#define MASK_C_J 0xe003 -#define MATCH_C_BEQZ 0xc001 -#define MASK_C_BEQZ 0xe003 -#define MATCH_C_BNEZ 0xe001 -#define MASK_C_BNEZ 0xe003 -#define MATCH_C_SLLI 0x2 -#define MASK_C_SLLI 0xe003 -#define MATCH_C_FLDSP 0x2002 -#define MASK_C_FLDSP 0xe003 -#define MATCH_C_LWSP 0x4002 -#define MASK_C_LWSP 0xe003 -#define MATCH_C_FLWSP 0x6002 -#define MASK_C_FLWSP 0xe003 -#define MATCH_C_MV 0x8002 -#define MASK_C_MV 0xf003 -#define MATCH_C_ADD 0x9002 -#define MASK_C_ADD 0xf003 -#define MATCH_C_FSDSP 0xa002 -#define MASK_C_FSDSP 0xe003 -#define MATCH_C_SWSP 0xc002 -#define MASK_C_SWSP 0xe003 -#define MATCH_C_FSWSP 0xe002 -#define MASK_C_FSWSP 0xe003 -#define MATCH_CUSTOM0 0xb -#define MASK_CUSTOM0 0x707f -#define MATCH_CUSTOM0_RS1 0x200b -#define MASK_CUSTOM0_RS1 0x707f -#define MATCH_CUSTOM0_RS1_RS2 0x300b -#define MASK_CUSTOM0_RS1_RS2 0x707f -#define MATCH_CUSTOM0_RD 0x400b -#define MASK_CUSTOM0_RD 0x707f -#define MATCH_CUSTOM0_RD_RS1 0x600b -#define MASK_CUSTOM0_RD_RS1 0x707f -#define MATCH_CUSTOM0_RD_RS1_RS2 0x700b -#define MASK_CUSTOM0_RD_RS1_RS2 0x707f -#define MATCH_CUSTOM1 0x2b -#define MASK_CUSTOM1 0x707f -#define MATCH_CUSTOM1_RS1 0x202b -#define MASK_CUSTOM1_RS1 0x707f -#define MATCH_CUSTOM1_RS1_RS2 0x302b -#define MASK_CUSTOM1_RS1_RS2 0x707f -#define MATCH_CUSTOM1_RD 0x402b -#define MASK_CUSTOM1_RD 0x707f -#define MATCH_CUSTOM1_RD_RS1 0x602b -#define MASK_CUSTOM1_RD_RS1 0x707f -#define MATCH_CUSTOM1_RD_RS1_RS2 0x702b -#define MASK_CUSTOM1_RD_RS1_RS2 0x707f -#define MATCH_CUSTOM2 0x5b -#define MASK_CUSTOM2 0x707f -#define MATCH_CUSTOM2_RS1 0x205b -#define MASK_CUSTOM2_RS1 0x707f -#define MATCH_CUSTOM2_RS1_RS2 0x305b -#define MASK_CUSTOM2_RS1_RS2 0x707f -#define MATCH_CUSTOM2_RD 0x405b -#define MASK_CUSTOM2_RD 0x707f -#define MATCH_CUSTOM2_RD_RS1 0x605b -#define MASK_CUSTOM2_RD_RS1 0x707f -#define MATCH_CUSTOM2_RD_RS1_RS2 0x705b -#define MASK_CUSTOM2_RD_RS1_RS2 0x707f -#define MATCH_CUSTOM3 0x7b -#define MASK_CUSTOM3 0x707f -#define MATCH_CUSTOM3_RS1 0x207b -#define MASK_CUSTOM3_RS1 0x707f -#define MATCH_CUSTOM3_RS1_RS2 0x307b -#define MASK_CUSTOM3_RS1_RS2 0x707f -#define MATCH_CUSTOM3_RD 0x407b -#define MASK_CUSTOM3_RD 0x707f -#define MATCH_CUSTOM3_RD_RS1 0x607b -#define MASK_CUSTOM3_RD_RS1 0x707f -#define MATCH_CUSTOM3_RD_RS1_RS2 0x707b -#define MASK_CUSTOM3_RD_RS1_RS2 0x707f -#define CSR_FFLAGS 0x1 -#define CSR_FRM 0x2 -#define CSR_FCSR 0x3 -#define CSR_CYCLE 0xc00 -#define CSR_TIME 0xc01 -#define CSR_INSTRET 0xc02 -#define CSR_HPMCOUNTER3 0xc03 -#define CSR_HPMCOUNTER4 0xc04 -#define CSR_HPMCOUNTER5 0xc05 -#define CSR_HPMCOUNTER6 0xc06 -#define CSR_HPMCOUNTER7 0xc07 -#define CSR_HPMCOUNTER8 0xc08 -#define CSR_HPMCOUNTER9 0xc09 -#define CSR_HPMCOUNTER10 0xc0a -#define CSR_HPMCOUNTER11 0xc0b -#define CSR_HPMCOUNTER12 0xc0c -#define CSR_HPMCOUNTER13 0xc0d -#define CSR_HPMCOUNTER14 0xc0e -#define CSR_HPMCOUNTER15 0xc0f -#define CSR_HPMCOUNTER16 0xc10 -#define CSR_HPMCOUNTER17 0xc11 -#define CSR_HPMCOUNTER18 0xc12 -#define CSR_HPMCOUNTER19 0xc13 -#define CSR_HPMCOUNTER20 0xc14 -#define CSR_HPMCOUNTER21 0xc15 -#define CSR_HPMCOUNTER22 0xc16 -#define CSR_HPMCOUNTER23 0xc17 -#define CSR_HPMCOUNTER24 0xc18 -#define CSR_HPMCOUNTER25 0xc19 -#define CSR_HPMCOUNTER26 0xc1a -#define CSR_HPMCOUNTER27 0xc1b -#define CSR_HPMCOUNTER28 0xc1c -#define CSR_HPMCOUNTER29 0xc1d -#define CSR_HPMCOUNTER30 0xc1e -#define CSR_HPMCOUNTER31 0xc1f -#define CSR_SSTATUS 0x100 -#define CSR_SIE 0x104 -#define CSR_STVEC 0x105 -#define CSR_SCOUNTEREN 0x106 -#define CSR_SSCRATCH 0x140 -#define CSR_SEPC 0x141 -#define CSR_SCAUSE 0x142 -#define CSR_STVAL 0x143 -#define CSR_SIP 0x144 -#define CSR_SATP 0x180 -#define CSR_MSTATUS 0x300 -#define CSR_MISA 0x301 -#define CSR_MEDELEG 0x302 -#define CSR_MIDELEG 0x303 -#define CSR_MIE 0x304 -#define CSR_MTVEC 0x305 -#define CSR_MCOUNTEREN 0x306 -#define CSR_MSCRATCH 0x340 -#define CSR_MEPC 0x341 -#define CSR_MCAUSE 0x342 -#define CSR_MTVAL 0x343 -#define CSR_MIP 0x344 -#define CSR_PMPCFG0 0x3a0 -#define CSR_PMPCFG1 0x3a1 -#define CSR_PMPCFG2 0x3a2 -#define CSR_PMPCFG3 0x3a3 -#define CSR_PMPADDR0 0x3b0 -#define CSR_PMPADDR1 0x3b1 -#define CSR_PMPADDR2 0x3b2 -#define CSR_PMPADDR3 0x3b3 -#define CSR_PMPADDR4 0x3b4 -#define CSR_PMPADDR5 0x3b5 -#define CSR_PMPADDR6 0x3b6 -#define CSR_PMPADDR7 0x3b7 -#define CSR_PMPADDR8 0x3b8 -#define CSR_PMPADDR9 0x3b9 -#define CSR_PMPADDR10 0x3ba -#define CSR_PMPADDR11 0x3bb -#define CSR_PMPADDR12 0x3bc -#define CSR_PMPADDR13 0x3bd -#define CSR_PMPADDR14 0x3be -#define CSR_PMPADDR15 0x3bf -#define CSR_TSELECT 0x7a0 -#define CSR_TDATA1 0x7a1 -#define CSR_TDATA2 0x7a2 -#define CSR_TDATA3 0x7a3 -#define CSR_DCSR 0x7b0 -#define CSR_DPC 0x7b1 -#define CSR_DSCRATCH 0x7b2 -#define CSR_MCYCLE 0xb00 -#define CSR_MINSTRET 0xb02 -#define CSR_MHPMCOUNTER3 0xb03 -#define CSR_MHPMCOUNTER4 0xb04 -#define CSR_MHPMCOUNTER5 0xb05 -#define CSR_MHPMCOUNTER6 0xb06 -#define CSR_MHPMCOUNTER7 0xb07 -#define CSR_MHPMCOUNTER8 0xb08 -#define CSR_MHPMCOUNTER9 0xb09 -#define CSR_MHPMCOUNTER10 0xb0a -#define CSR_MHPMCOUNTER11 0xb0b -#define CSR_MHPMCOUNTER12 0xb0c -#define CSR_MHPMCOUNTER13 0xb0d -#define CSR_MHPMCOUNTER14 0xb0e -#define CSR_MHPMCOUNTER15 0xb0f -#define CSR_MHPMCOUNTER16 0xb10 -#define CSR_MHPMCOUNTER17 0xb11 -#define CSR_MHPMCOUNTER18 0xb12 -#define CSR_MHPMCOUNTER19 0xb13 -#define CSR_MHPMCOUNTER20 0xb14 -#define CSR_MHPMCOUNTER21 0xb15 -#define CSR_MHPMCOUNTER22 0xb16 -#define CSR_MHPMCOUNTER23 0xb17 -#define CSR_MHPMCOUNTER24 0xb18 -#define CSR_MHPMCOUNTER25 0xb19 -#define CSR_MHPMCOUNTER26 0xb1a -#define CSR_MHPMCOUNTER27 0xb1b -#define CSR_MHPMCOUNTER28 0xb1c -#define CSR_MHPMCOUNTER29 0xb1d -#define CSR_MHPMCOUNTER30 0xb1e -#define CSR_MHPMCOUNTER31 0xb1f -#define CSR_MHPMEVENT3 0x323 -#define CSR_MHPMEVENT4 0x324 -#define CSR_MHPMEVENT5 0x325 -#define CSR_MHPMEVENT6 0x326 -#define CSR_MHPMEVENT7 0x327 -#define CSR_MHPMEVENT8 0x328 -#define CSR_MHPMEVENT9 0x329 -#define CSR_MHPMEVENT10 0x32a -#define CSR_MHPMEVENT11 0x32b -#define CSR_MHPMEVENT12 0x32c -#define CSR_MHPMEVENT13 0x32d -#define CSR_MHPMEVENT14 0x32e -#define CSR_MHPMEVENT15 0x32f -#define CSR_MHPMEVENT16 0x330 -#define CSR_MHPMEVENT17 0x331 -#define CSR_MHPMEVENT18 0x332 -#define CSR_MHPMEVENT19 0x333 -#define CSR_MHPMEVENT20 0x334 -#define CSR_MHPMEVENT21 0x335 -#define CSR_MHPMEVENT22 0x336 -#define CSR_MHPMEVENT23 0x337 -#define CSR_MHPMEVENT24 0x338 -#define CSR_MHPMEVENT25 0x339 -#define CSR_MHPMEVENT26 0x33a -#define CSR_MHPMEVENT27 0x33b -#define CSR_MHPMEVENT28 0x33c -#define CSR_MHPMEVENT29 0x33d -#define CSR_MHPMEVENT30 0x33e -#define CSR_MHPMEVENT31 0x33f -#define CSR_MVENDORID 0xf11 -#define CSR_MARCHID 0xf12 -#define CSR_MIMPID 0xf13 -#define CSR_MHARTID 0xf14 -#define CSR_CYCLEH 0xc80 -#define CSR_TIMEH 0xc81 -#define CSR_INSTRETH 0xc82 -#define CSR_HPMCOUNTER3H 0xc83 -#define CSR_HPMCOUNTER4H 0xc84 -#define CSR_HPMCOUNTER5H 0xc85 -#define CSR_HPMCOUNTER6H 0xc86 -#define CSR_HPMCOUNTER7H 0xc87 -#define CSR_HPMCOUNTER8H 0xc88 -#define CSR_HPMCOUNTER9H 0xc89 -#define CSR_HPMCOUNTER10H 0xc8a -#define CSR_HPMCOUNTER11H 0xc8b -#define CSR_HPMCOUNTER12H 0xc8c -#define CSR_HPMCOUNTER13H 0xc8d -#define CSR_HPMCOUNTER14H 0xc8e -#define CSR_HPMCOUNTER15H 0xc8f -#define CSR_HPMCOUNTER16H 0xc90 -#define CSR_HPMCOUNTER17H 0xc91 -#define CSR_HPMCOUNTER18H 0xc92 -#define CSR_HPMCOUNTER19H 0xc93 -#define CSR_HPMCOUNTER20H 0xc94 -#define CSR_HPMCOUNTER21H 0xc95 -#define CSR_HPMCOUNTER22H 0xc96 -#define CSR_HPMCOUNTER23H 0xc97 -#define CSR_HPMCOUNTER24H 0xc98 -#define CSR_HPMCOUNTER25H 0xc99 -#define CSR_HPMCOUNTER26H 0xc9a -#define CSR_HPMCOUNTER27H 0xc9b -#define CSR_HPMCOUNTER28H 0xc9c -#define CSR_HPMCOUNTER29H 0xc9d -#define CSR_HPMCOUNTER30H 0xc9e -#define CSR_HPMCOUNTER31H 0xc9f -#define CSR_MCYCLEH 0xb80 -#define CSR_MINSTRETH 0xb82 -#define CSR_MHPMCOUNTER3H 0xb83 -#define CSR_MHPMCOUNTER4H 0xb84 -#define CSR_MHPMCOUNTER5H 0xb85 -#define CSR_MHPMCOUNTER6H 0xb86 -#define CSR_MHPMCOUNTER7H 0xb87 -#define CSR_MHPMCOUNTER8H 0xb88 -#define CSR_MHPMCOUNTER9H 0xb89 -#define CSR_MHPMCOUNTER10H 0xb8a -#define CSR_MHPMCOUNTER11H 0xb8b -#define CSR_MHPMCOUNTER12H 0xb8c -#define CSR_MHPMCOUNTER13H 0xb8d -#define CSR_MHPMCOUNTER14H 0xb8e -#define CSR_MHPMCOUNTER15H 0xb8f -#define CSR_MHPMCOUNTER16H 0xb90 -#define CSR_MHPMCOUNTER17H 0xb91 -#define CSR_MHPMCOUNTER18H 0xb92 -#define CSR_MHPMCOUNTER19H 0xb93 -#define CSR_MHPMCOUNTER20H 0xb94 -#define CSR_MHPMCOUNTER21H 0xb95 -#define CSR_MHPMCOUNTER22H 0xb96 -#define CSR_MHPMCOUNTER23H 0xb97 -#define CSR_MHPMCOUNTER24H 0xb98 -#define CSR_MHPMCOUNTER25H 0xb99 -#define CSR_MHPMCOUNTER26H 0xb9a -#define CSR_MHPMCOUNTER27H 0xb9b -#define CSR_MHPMCOUNTER28H 0xb9c -#define CSR_MHPMCOUNTER29H 0xb9d -#define CSR_MHPMCOUNTER30H 0xb9e -#define CSR_MHPMCOUNTER31H 0xb9f -#define CAUSE_MISALIGNED_FETCH 0x0 -#define CAUSE_FETCH_ACCESS 0x1 -#define CAUSE_ILLEGAL_INSTRUCTION 0x2 -#define CAUSE_BREAKPOINT 0x3 -#define CAUSE_MISALIGNED_LOAD 0x4 -#define CAUSE_LOAD_ACCESS 0x5 -#define CAUSE_MISALIGNED_STORE 0x6 -#define CAUSE_STORE_ACCESS 0x7 -#define CAUSE_USER_ECALL 0x8 -#define CAUSE_SUPERVISOR_ECALL 0x9 -#define CAUSE_HYPERVISOR_ECALL 0xa -#define CAUSE_MACHINE_ECALL 0xb -#define CAUSE_FETCH_PAGE_FAULT 0xc -#define CAUSE_LOAD_PAGE_FAULT 0xd -#define CAUSE_STORE_PAGE_FAULT 0xf -#endif -#ifdef DECLARE_INSN -DECLARE_INSN(beq, MATCH_BEQ, MASK_BEQ) -DECLARE_INSN(bne, MATCH_BNE, MASK_BNE) -DECLARE_INSN(blt, MATCH_BLT, MASK_BLT) -DECLARE_INSN(bge, MATCH_BGE, MASK_BGE) -DECLARE_INSN(bltu, MATCH_BLTU, MASK_BLTU) -DECLARE_INSN(bgeu, MATCH_BGEU, MASK_BGEU) -DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR) -DECLARE_INSN(jal, MATCH_JAL, MASK_JAL) -DECLARE_INSN(lui, MATCH_LUI, MASK_LUI) -DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC) -DECLARE_INSN(addi, MATCH_ADDI, MASK_ADDI) -DECLARE_INSN(slli, MATCH_SLLI, MASK_SLLI) -DECLARE_INSN(slti, MATCH_SLTI, MASK_SLTI) -DECLARE_INSN(sltiu, MATCH_SLTIU, MASK_SLTIU) -DECLARE_INSN(xori, MATCH_XORI, MASK_XORI) -DECLARE_INSN(srli, MATCH_SRLI, MASK_SRLI) -DECLARE_INSN(srai, MATCH_SRAI, MASK_SRAI) -DECLARE_INSN(ori, MATCH_ORI, MASK_ORI) -DECLARE_INSN(andi, MATCH_ANDI, MASK_ANDI) -DECLARE_INSN(add, MATCH_ADD, MASK_ADD) -DECLARE_INSN(sub, MATCH_SUB, MASK_SUB) -DECLARE_INSN(sll, MATCH_SLL, MASK_SLL) -DECLARE_INSN(slt, MATCH_SLT, MASK_SLT) -DECLARE_INSN(sltu, MATCH_SLTU, MASK_SLTU) -DECLARE_INSN(xor, MATCH_XOR, MASK_XOR) -DECLARE_INSN(srl, MATCH_SRL, MASK_SRL) -DECLARE_INSN(sra, MATCH_SRA, MASK_SRA) -DECLARE_INSN(or, MATCH_OR, MASK_OR) -DECLARE_INSN(and, MATCH_AND, MASK_AND) -DECLARE_INSN(addiw, MATCH_ADDIW, MASK_ADDIW) -DECLARE_INSN(slliw, MATCH_SLLIW, MASK_SLLIW) -DECLARE_INSN(srliw, MATCH_SRLIW, MASK_SRLIW) -DECLARE_INSN(sraiw, MATCH_SRAIW, MASK_SRAIW) -DECLARE_INSN(addw, MATCH_ADDW, MASK_ADDW) -DECLARE_INSN(subw, MATCH_SUBW, MASK_SUBW) -DECLARE_INSN(sllw, MATCH_SLLW, MASK_SLLW) -DECLARE_INSN(srlw, MATCH_SRLW, MASK_SRLW) -DECLARE_INSN(sraw, MATCH_SRAW, MASK_SRAW) -DECLARE_INSN(lb, MATCH_LB, MASK_LB) -DECLARE_INSN(lh, MATCH_LH, MASK_LH) -DECLARE_INSN(lw, MATCH_LW, MASK_LW) -DECLARE_INSN(ld, MATCH_LD, MASK_LD) -DECLARE_INSN(lbu, MATCH_LBU, MASK_LBU) -DECLARE_INSN(lhu, MATCH_LHU, MASK_LHU) -DECLARE_INSN(lwu, MATCH_LWU, MASK_LWU) -DECLARE_INSN(sb, MATCH_SB, MASK_SB) -DECLARE_INSN(sh, MATCH_SH, MASK_SH) -DECLARE_INSN(sw, MATCH_SW, MASK_SW) -DECLARE_INSN(sd, MATCH_SD, MASK_SD) -DECLARE_INSN(fence, MATCH_FENCE, MASK_FENCE) -DECLARE_INSN(fence_i, MATCH_FENCE_I, MASK_FENCE_I) -DECLARE_INSN(mul, MATCH_MUL, MASK_MUL) -DECLARE_INSN(mulh, MATCH_MULH, MASK_MULH) -DECLARE_INSN(mulhsu, MATCH_MULHSU, MASK_MULHSU) -DECLARE_INSN(mulhu, MATCH_MULHU, MASK_MULHU) -DECLARE_INSN(div, MATCH_DIV, MASK_DIV) -DECLARE_INSN(divu, MATCH_DIVU, MASK_DIVU) -DECLARE_INSN(rem, MATCH_REM, MASK_REM) -DECLARE_INSN(remu, MATCH_REMU, MASK_REMU) -DECLARE_INSN(mulw, MATCH_MULW, MASK_MULW) -DECLARE_INSN(divw, MATCH_DIVW, MASK_DIVW) -DECLARE_INSN(divuw, MATCH_DIVUW, MASK_DIVUW) -DECLARE_INSN(remw, MATCH_REMW, MASK_REMW) -DECLARE_INSN(remuw, MATCH_REMUW, MASK_REMUW) -DECLARE_INSN(amoadd_w, MATCH_AMOADD_W, MASK_AMOADD_W) -DECLARE_INSN(amoxor_w, MATCH_AMOXOR_W, MASK_AMOXOR_W) -DECLARE_INSN(amoor_w, MATCH_AMOOR_W, MASK_AMOOR_W) -DECLARE_INSN(amoand_w, MATCH_AMOAND_W, MASK_AMOAND_W) -DECLARE_INSN(amomin_w, MATCH_AMOMIN_W, MASK_AMOMIN_W) -DECLARE_INSN(amomax_w, MATCH_AMOMAX_W, MASK_AMOMAX_W) -DECLARE_INSN(amominu_w, MATCH_AMOMINU_W, MASK_AMOMINU_W) -DECLARE_INSN(amomaxu_w, MATCH_AMOMAXU_W, MASK_AMOMAXU_W) -DECLARE_INSN(amoswap_w, MATCH_AMOSWAP_W, MASK_AMOSWAP_W) -DECLARE_INSN(lr_w, MATCH_LR_W, MASK_LR_W) -DECLARE_INSN(sc_w, MATCH_SC_W, MASK_SC_W) -DECLARE_INSN(amoadd_d, MATCH_AMOADD_D, MASK_AMOADD_D) -DECLARE_INSN(amoxor_d, MATCH_AMOXOR_D, MASK_AMOXOR_D) -DECLARE_INSN(amoor_d, MATCH_AMOOR_D, MASK_AMOOR_D) -DECLARE_INSN(amoand_d, MATCH_AMOAND_D, MASK_AMOAND_D) -DECLARE_INSN(amomin_d, MATCH_AMOMIN_D, MASK_AMOMIN_D) -DECLARE_INSN(amomax_d, MATCH_AMOMAX_D, MASK_AMOMAX_D) -DECLARE_INSN(amominu_d, MATCH_AMOMINU_D, MASK_AMOMINU_D) -DECLARE_INSN(amomaxu_d, MATCH_AMOMAXU_D, MASK_AMOMAXU_D) -DECLARE_INSN(amoswap_d, MATCH_AMOSWAP_D, MASK_AMOSWAP_D) -DECLARE_INSN(lr_d, MATCH_LR_D, MASK_LR_D) -DECLARE_INSN(sc_d, MATCH_SC_D, MASK_SC_D) -DECLARE_INSN(ecall, MATCH_ECALL, MASK_ECALL) -DECLARE_INSN(ebreak, MATCH_EBREAK, MASK_EBREAK) -DECLARE_INSN(uret, MATCH_URET, MASK_URET) -DECLARE_INSN(sret, MATCH_SRET, MASK_SRET) -DECLARE_INSN(mret, MATCH_MRET, MASK_MRET) -DECLARE_INSN(dret, MATCH_DRET, MASK_DRET) -DECLARE_INSN(sfence_vma, MATCH_SFENCE_VMA, MASK_SFENCE_VMA) -DECLARE_INSN(wfi, MATCH_WFI, MASK_WFI) -DECLARE_INSN(csrrw, MATCH_CSRRW, MASK_CSRRW) -DECLARE_INSN(csrrs, MATCH_CSRRS, MASK_CSRRS) -DECLARE_INSN(csrrc, MATCH_CSRRC, MASK_CSRRC) -DECLARE_INSN(csrrwi, MATCH_CSRRWI, MASK_CSRRWI) -DECLARE_INSN(csrrsi, MATCH_CSRRSI, MASK_CSRRSI) -DECLARE_INSN(csrrci, MATCH_CSRRCI, MASK_CSRRCI) -DECLARE_INSN(fadd_s, MATCH_FADD_S, MASK_FADD_S) -DECLARE_INSN(fsub_s, MATCH_FSUB_S, MASK_FSUB_S) -DECLARE_INSN(fmul_s, MATCH_FMUL_S, MASK_FMUL_S) -DECLARE_INSN(fdiv_s, MATCH_FDIV_S, MASK_FDIV_S) -DECLARE_INSN(fsgnj_s, MATCH_FSGNJ_S, MASK_FSGNJ_S) -DECLARE_INSN(fsgnjn_s, MATCH_FSGNJN_S, MASK_FSGNJN_S) -DECLARE_INSN(fsgnjx_s, MATCH_FSGNJX_S, MASK_FSGNJX_S) -DECLARE_INSN(fmin_s, MATCH_FMIN_S, MASK_FMIN_S) -DECLARE_INSN(fmax_s, MATCH_FMAX_S, MASK_FMAX_S) -DECLARE_INSN(fsqrt_s, MATCH_FSQRT_S, MASK_FSQRT_S) -DECLARE_INSN(fadd_d, MATCH_FADD_D, MASK_FADD_D) -DECLARE_INSN(fsub_d, MATCH_FSUB_D, MASK_FSUB_D) -DECLARE_INSN(fmul_d, MATCH_FMUL_D, MASK_FMUL_D) -DECLARE_INSN(fdiv_d, MATCH_FDIV_D, MASK_FDIV_D) -DECLARE_INSN(fsgnj_d, MATCH_FSGNJ_D, MASK_FSGNJ_D) -DECLARE_INSN(fsgnjn_d, MATCH_FSGNJN_D, MASK_FSGNJN_D) -DECLARE_INSN(fsgnjx_d, MATCH_FSGNJX_D, MASK_FSGNJX_D) -DECLARE_INSN(fmin_d, MATCH_FMIN_D, MASK_FMIN_D) -DECLARE_INSN(fmax_d, MATCH_FMAX_D, MASK_FMAX_D) -DECLARE_INSN(fcvt_s_d, MATCH_FCVT_S_D, MASK_FCVT_S_D) -DECLARE_INSN(fcvt_d_s, MATCH_FCVT_D_S, MASK_FCVT_D_S) -DECLARE_INSN(fsqrt_d, MATCH_FSQRT_D, MASK_FSQRT_D) -DECLARE_INSN(fadd_q, MATCH_FADD_Q, MASK_FADD_Q) -DECLARE_INSN(fsub_q, MATCH_FSUB_Q, MASK_FSUB_Q) -DECLARE_INSN(fmul_q, MATCH_FMUL_Q, MASK_FMUL_Q) -DECLARE_INSN(fdiv_q, MATCH_FDIV_Q, MASK_FDIV_Q) -DECLARE_INSN(fsgnj_q, MATCH_FSGNJ_Q, MASK_FSGNJ_Q) -DECLARE_INSN(fsgnjn_q, MATCH_FSGNJN_Q, MASK_FSGNJN_Q) -DECLARE_INSN(fsgnjx_q, MATCH_FSGNJX_Q, MASK_FSGNJX_Q) -DECLARE_INSN(fmin_q, MATCH_FMIN_Q, MASK_FMIN_Q) -DECLARE_INSN(fmax_q, MATCH_FMAX_Q, MASK_FMAX_Q) -DECLARE_INSN(fcvt_s_q, MATCH_FCVT_S_Q, MASK_FCVT_S_Q) -DECLARE_INSN(fcvt_q_s, MATCH_FCVT_Q_S, MASK_FCVT_Q_S) -DECLARE_INSN(fcvt_d_q, MATCH_FCVT_D_Q, MASK_FCVT_D_Q) -DECLARE_INSN(fcvt_q_d, MATCH_FCVT_Q_D, MASK_FCVT_Q_D) -DECLARE_INSN(fsqrt_q, MATCH_FSQRT_Q, MASK_FSQRT_Q) -DECLARE_INSN(fle_s, MATCH_FLE_S, MASK_FLE_S) -DECLARE_INSN(flt_s, MATCH_FLT_S, MASK_FLT_S) -DECLARE_INSN(feq_s, MATCH_FEQ_S, MASK_FEQ_S) -DECLARE_INSN(fle_d, MATCH_FLE_D, MASK_FLE_D) -DECLARE_INSN(flt_d, MATCH_FLT_D, MASK_FLT_D) -DECLARE_INSN(feq_d, MATCH_FEQ_D, MASK_FEQ_D) -DECLARE_INSN(fle_q, MATCH_FLE_Q, MASK_FLE_Q) -DECLARE_INSN(flt_q, MATCH_FLT_Q, MASK_FLT_Q) -DECLARE_INSN(feq_q, MATCH_FEQ_Q, MASK_FEQ_Q) -DECLARE_INSN(fcvt_w_s, MATCH_FCVT_W_S, MASK_FCVT_W_S) -DECLARE_INSN(fcvt_wu_s, MATCH_FCVT_WU_S, MASK_FCVT_WU_S) -DECLARE_INSN(fcvt_l_s, MATCH_FCVT_L_S, MASK_FCVT_L_S) -DECLARE_INSN(fcvt_lu_s, MATCH_FCVT_LU_S, MASK_FCVT_LU_S) -DECLARE_INSN(fmv_x_w, MATCH_FMV_X_W, MASK_FMV_X_W) -DECLARE_INSN(fclass_s, MATCH_FCLASS_S, MASK_FCLASS_S) -DECLARE_INSN(fcvt_w_d, MATCH_FCVT_W_D, MASK_FCVT_W_D) -DECLARE_INSN(fcvt_wu_d, MATCH_FCVT_WU_D, MASK_FCVT_WU_D) -DECLARE_INSN(fcvt_l_d, MATCH_FCVT_L_D, MASK_FCVT_L_D) -DECLARE_INSN(fcvt_lu_d, MATCH_FCVT_LU_D, MASK_FCVT_LU_D) -DECLARE_INSN(fmv_x_d, MATCH_FMV_X_D, MASK_FMV_X_D) -DECLARE_INSN(fclass_d, MATCH_FCLASS_D, MASK_FCLASS_D) -DECLARE_INSN(fcvt_w_q, MATCH_FCVT_W_Q, MASK_FCVT_W_Q) -DECLARE_INSN(fcvt_wu_q, MATCH_FCVT_WU_Q, MASK_FCVT_WU_Q) -DECLARE_INSN(fcvt_l_q, MATCH_FCVT_L_Q, MASK_FCVT_L_Q) -DECLARE_INSN(fcvt_lu_q, MATCH_FCVT_LU_Q, MASK_FCVT_LU_Q) -DECLARE_INSN(fmv_x_q, MATCH_FMV_X_Q, MASK_FMV_X_Q) -DECLARE_INSN(fclass_q, MATCH_FCLASS_Q, MASK_FCLASS_Q) -DECLARE_INSN(fcvt_s_w, MATCH_FCVT_S_W, MASK_FCVT_S_W) -DECLARE_INSN(fcvt_s_wu, MATCH_FCVT_S_WU, MASK_FCVT_S_WU) -DECLARE_INSN(fcvt_s_l, MATCH_FCVT_S_L, MASK_FCVT_S_L) -DECLARE_INSN(fcvt_s_lu, MATCH_FCVT_S_LU, MASK_FCVT_S_LU) -DECLARE_INSN(fmv_w_x, MATCH_FMV_W_X, MASK_FMV_W_X) -DECLARE_INSN(fcvt_d_w, MATCH_FCVT_D_W, MASK_FCVT_D_W) -DECLARE_INSN(fcvt_d_wu, MATCH_FCVT_D_WU, MASK_FCVT_D_WU) -DECLARE_INSN(fcvt_d_l, MATCH_FCVT_D_L, MASK_FCVT_D_L) -DECLARE_INSN(fcvt_d_lu, MATCH_FCVT_D_LU, MASK_FCVT_D_LU) -DECLARE_INSN(fmv_d_x, MATCH_FMV_D_X, MASK_FMV_D_X) -DECLARE_INSN(fcvt_q_w, MATCH_FCVT_Q_W, MASK_FCVT_Q_W) -DECLARE_INSN(fcvt_q_wu, MATCH_FCVT_Q_WU, MASK_FCVT_Q_WU) -DECLARE_INSN(fcvt_q_l, MATCH_FCVT_Q_L, MASK_FCVT_Q_L) -DECLARE_INSN(fcvt_q_lu, MATCH_FCVT_Q_LU, MASK_FCVT_Q_LU) -DECLARE_INSN(fmv_q_x, MATCH_FMV_Q_X, MASK_FMV_Q_X) -DECLARE_INSN(flw, MATCH_FLW, MASK_FLW) -DECLARE_INSN(fld, MATCH_FLD, MASK_FLD) -DECLARE_INSN(flq, MATCH_FLQ, MASK_FLQ) -DECLARE_INSN(fsw, MATCH_FSW, MASK_FSW) -DECLARE_INSN(fsd, MATCH_FSD, MASK_FSD) -DECLARE_INSN(fsq, MATCH_FSQ, MASK_FSQ) -DECLARE_INSN(fmadd_s, MATCH_FMADD_S, MASK_FMADD_S) -DECLARE_INSN(fmsub_s, MATCH_FMSUB_S, MASK_FMSUB_S) -DECLARE_INSN(fnmsub_s, MATCH_FNMSUB_S, MASK_FNMSUB_S) -DECLARE_INSN(fnmadd_s, MATCH_FNMADD_S, MASK_FNMADD_S) -DECLARE_INSN(fmadd_d, MATCH_FMADD_D, MASK_FMADD_D) -DECLARE_INSN(fmsub_d, MATCH_FMSUB_D, MASK_FMSUB_D) -DECLARE_INSN(fnmsub_d, MATCH_FNMSUB_D, MASK_FNMSUB_D) -DECLARE_INSN(fnmadd_d, MATCH_FNMADD_D, MASK_FNMADD_D) -DECLARE_INSN(fmadd_q, MATCH_FMADD_Q, MASK_FMADD_Q) -DECLARE_INSN(fmsub_q, MATCH_FMSUB_Q, MASK_FMSUB_Q) -DECLARE_INSN(fnmsub_q, MATCH_FNMSUB_Q, MASK_FNMSUB_Q) -DECLARE_INSN(fnmadd_q, MATCH_FNMADD_Q, MASK_FNMADD_Q) -DECLARE_INSN(c_nop, MATCH_C_NOP, MASK_C_NOP) -DECLARE_INSN(c_addi16sp, MATCH_C_ADDI16SP, MASK_C_ADDI16SP) -DECLARE_INSN(c_jr, MATCH_C_JR, MASK_C_JR) -DECLARE_INSN(c_jalr, MATCH_C_JALR, MASK_C_JALR) -DECLARE_INSN(c_ebreak, MATCH_C_EBREAK, MASK_C_EBREAK) -DECLARE_INSN(c_ld, MATCH_C_LD, MASK_C_LD) -DECLARE_INSN(c_sd, MATCH_C_SD, MASK_C_SD) -DECLARE_INSN(c_addiw, MATCH_C_ADDIW, MASK_C_ADDIW) -DECLARE_INSN(c_ldsp, MATCH_C_LDSP, MASK_C_LDSP) -DECLARE_INSN(c_sdsp, MATCH_C_SDSP, MASK_C_SDSP) -DECLARE_INSN(c_addi4spn, MATCH_C_ADDI4SPN, MASK_C_ADDI4SPN) -DECLARE_INSN(c_fld, MATCH_C_FLD, MASK_C_FLD) -DECLARE_INSN(c_lw, MATCH_C_LW, MASK_C_LW) -DECLARE_INSN(c_flw, MATCH_C_FLW, MASK_C_FLW) -DECLARE_INSN(c_fsd, MATCH_C_FSD, MASK_C_FSD) -DECLARE_INSN(c_sw, MATCH_C_SW, MASK_C_SW) -DECLARE_INSN(c_fsw, MATCH_C_FSW, MASK_C_FSW) -DECLARE_INSN(c_addi, MATCH_C_ADDI, MASK_C_ADDI) -DECLARE_INSN(c_jal, MATCH_C_JAL, MASK_C_JAL) -DECLARE_INSN(c_li, MATCH_C_LI, MASK_C_LI) -DECLARE_INSN(c_lui, MATCH_C_LUI, MASK_C_LUI) -DECLARE_INSN(c_srli, MATCH_C_SRLI, MASK_C_SRLI) -DECLARE_INSN(c_srai, MATCH_C_SRAI, MASK_C_SRAI) -DECLARE_INSN(c_andi, MATCH_C_ANDI, MASK_C_ANDI) -DECLARE_INSN(c_sub, MATCH_C_SUB, MASK_C_SUB) -DECLARE_INSN(c_xor, MATCH_C_XOR, MASK_C_XOR) -DECLARE_INSN(c_or, MATCH_C_OR, MASK_C_OR) -DECLARE_INSN(c_and, MATCH_C_AND, MASK_C_AND) -DECLARE_INSN(c_subw, MATCH_C_SUBW, MASK_C_SUBW) -DECLARE_INSN(c_addw, MATCH_C_ADDW, MASK_C_ADDW) -DECLARE_INSN(c_j, MATCH_C_J, MASK_C_J) -DECLARE_INSN(c_beqz, MATCH_C_BEQZ, MASK_C_BEQZ) -DECLARE_INSN(c_bnez, MATCH_C_BNEZ, MASK_C_BNEZ) -DECLARE_INSN(c_slli, MATCH_C_SLLI, MASK_C_SLLI) -DECLARE_INSN(c_fldsp, MATCH_C_FLDSP, MASK_C_FLDSP) -DECLARE_INSN(c_lwsp, MATCH_C_LWSP, MASK_C_LWSP) -DECLARE_INSN(c_flwsp, MATCH_C_FLWSP, MASK_C_FLWSP) -DECLARE_INSN(c_mv, MATCH_C_MV, MASK_C_MV) -DECLARE_INSN(c_add, MATCH_C_ADD, MASK_C_ADD) -DECLARE_INSN(c_fsdsp, MATCH_C_FSDSP, MASK_C_FSDSP) -DECLARE_INSN(c_swsp, MATCH_C_SWSP, MASK_C_SWSP) -DECLARE_INSN(c_fswsp, MATCH_C_FSWSP, MASK_C_FSWSP) -DECLARE_INSN(custom0, MATCH_CUSTOM0, MASK_CUSTOM0) -DECLARE_INSN(custom0_rs1, MATCH_CUSTOM0_RS1, MASK_CUSTOM0_RS1) -DECLARE_INSN(custom0_rs1_rs2, MATCH_CUSTOM0_RS1_RS2, MASK_CUSTOM0_RS1_RS2) -DECLARE_INSN(custom0_rd, MATCH_CUSTOM0_RD, MASK_CUSTOM0_RD) -DECLARE_INSN(custom0_rd_rs1, MATCH_CUSTOM0_RD_RS1, MASK_CUSTOM0_RD_RS1) -DECLARE_INSN(custom0_rd_rs1_rs2, MATCH_CUSTOM0_RD_RS1_RS2, MASK_CUSTOM0_RD_RS1_RS2) -DECLARE_INSN(custom1, MATCH_CUSTOM1, MASK_CUSTOM1) -DECLARE_INSN(custom1_rs1, MATCH_CUSTOM1_RS1, MASK_CUSTOM1_RS1) -DECLARE_INSN(custom1_rs1_rs2, MATCH_CUSTOM1_RS1_RS2, MASK_CUSTOM1_RS1_RS2) -DECLARE_INSN(custom1_rd, MATCH_CUSTOM1_RD, MASK_CUSTOM1_RD) -DECLARE_INSN(custom1_rd_rs1, MATCH_CUSTOM1_RD_RS1, MASK_CUSTOM1_RD_RS1) -DECLARE_INSN(custom1_rd_rs1_rs2, MATCH_CUSTOM1_RD_RS1_RS2, MASK_CUSTOM1_RD_RS1_RS2) -DECLARE_INSN(custom2, MATCH_CUSTOM2, MASK_CUSTOM2) -DECLARE_INSN(custom2_rs1, MATCH_CUSTOM2_RS1, MASK_CUSTOM2_RS1) -DECLARE_INSN(custom2_rs1_rs2, MATCH_CUSTOM2_RS1_RS2, MASK_CUSTOM2_RS1_RS2) -DECLARE_INSN(custom2_rd, MATCH_CUSTOM2_RD, MASK_CUSTOM2_RD) -DECLARE_INSN(custom2_rd_rs1, MATCH_CUSTOM2_RD_RS1, MASK_CUSTOM2_RD_RS1) -DECLARE_INSN(custom2_rd_rs1_rs2, MATCH_CUSTOM2_RD_RS1_RS2, MASK_CUSTOM2_RD_RS1_RS2) -DECLARE_INSN(custom3, MATCH_CUSTOM3, MASK_CUSTOM3) -DECLARE_INSN(custom3_rs1, MATCH_CUSTOM3_RS1, MASK_CUSTOM3_RS1) -DECLARE_INSN(custom3_rs1_rs2, MATCH_CUSTOM3_RS1_RS2, MASK_CUSTOM3_RS1_RS2) -DECLARE_INSN(custom3_rd, MATCH_CUSTOM3_RD, MASK_CUSTOM3_RD) -DECLARE_INSN(custom3_rd_rs1, MATCH_CUSTOM3_RD_RS1, MASK_CUSTOM3_RD_RS1) -DECLARE_INSN(custom3_rd_rs1_rs2, MATCH_CUSTOM3_RD_RS1_RS2, MASK_CUSTOM3_RD_RS1_RS2) -#endif -#ifdef DECLARE_CSR -DECLARE_CSR(fflags, CSR_FFLAGS) -DECLARE_CSR(frm, CSR_FRM) -DECLARE_CSR(fcsr, CSR_FCSR) -DECLARE_CSR(cycle, CSR_CYCLE) -DECLARE_CSR(time, CSR_TIME) -DECLARE_CSR(instret, CSR_INSTRET) -DECLARE_CSR(hpmcounter3, CSR_HPMCOUNTER3) -DECLARE_CSR(hpmcounter4, CSR_HPMCOUNTER4) -DECLARE_CSR(hpmcounter5, CSR_HPMCOUNTER5) -DECLARE_CSR(hpmcounter6, CSR_HPMCOUNTER6) -DECLARE_CSR(hpmcounter7, CSR_HPMCOUNTER7) -DECLARE_CSR(hpmcounter8, CSR_HPMCOUNTER8) -DECLARE_CSR(hpmcounter9, CSR_HPMCOUNTER9) -DECLARE_CSR(hpmcounter10, CSR_HPMCOUNTER10) -DECLARE_CSR(hpmcounter11, CSR_HPMCOUNTER11) -DECLARE_CSR(hpmcounter12, CSR_HPMCOUNTER12) -DECLARE_CSR(hpmcounter13, CSR_HPMCOUNTER13) -DECLARE_CSR(hpmcounter14, CSR_HPMCOUNTER14) -DECLARE_CSR(hpmcounter15, CSR_HPMCOUNTER15) -DECLARE_CSR(hpmcounter16, CSR_HPMCOUNTER16) -DECLARE_CSR(hpmcounter17, CSR_HPMCOUNTER17) -DECLARE_CSR(hpmcounter18, CSR_HPMCOUNTER18) -DECLARE_CSR(hpmcounter19, CSR_HPMCOUNTER19) -DECLARE_CSR(hpmcounter20, CSR_HPMCOUNTER20) -DECLARE_CSR(hpmcounter21, CSR_HPMCOUNTER21) -DECLARE_CSR(hpmcounter22, CSR_HPMCOUNTER22) -DECLARE_CSR(hpmcounter23, CSR_HPMCOUNTER23) -DECLARE_CSR(hpmcounter24, CSR_HPMCOUNTER24) -DECLARE_CSR(hpmcounter25, CSR_HPMCOUNTER25) -DECLARE_CSR(hpmcounter26, CSR_HPMCOUNTER26) -DECLARE_CSR(hpmcounter27, CSR_HPMCOUNTER27) -DECLARE_CSR(hpmcounter28, CSR_HPMCOUNTER28) -DECLARE_CSR(hpmcounter29, CSR_HPMCOUNTER29) -DECLARE_CSR(hpmcounter30, CSR_HPMCOUNTER30) -DECLARE_CSR(hpmcounter31, CSR_HPMCOUNTER31) -DECLARE_CSR(sstatus, CSR_SSTATUS) -DECLARE_CSR(sie, CSR_SIE) -DECLARE_CSR(stvec, CSR_STVEC) -DECLARE_CSR(scounteren, CSR_SCOUNTEREN) -DECLARE_CSR(sscratch, CSR_SSCRATCH) -DECLARE_CSR(sepc, CSR_SEPC) -DECLARE_CSR(scause, CSR_SCAUSE) -DECLARE_CSR(stval, CSR_STVAL) -DECLARE_CSR(sip, CSR_SIP) -DECLARE_CSR(satp, CSR_SATP) -DECLARE_CSR(mstatus, CSR_MSTATUS) -DECLARE_CSR(misa, CSR_MISA) -DECLARE_CSR(medeleg, CSR_MEDELEG) -DECLARE_CSR(mideleg, CSR_MIDELEG) -DECLARE_CSR(mie, CSR_MIE) -DECLARE_CSR(mtvec, CSR_MTVEC) -DECLARE_CSR(mcounteren, CSR_MCOUNTEREN) -DECLARE_CSR(mscratch, CSR_MSCRATCH) -DECLARE_CSR(mepc, CSR_MEPC) -DECLARE_CSR(mcause, CSR_MCAUSE) -DECLARE_CSR(mtval, CSR_MTVAL) -DECLARE_CSR(mip, CSR_MIP) -DECLARE_CSR(pmpcfg0, CSR_PMPCFG0) -DECLARE_CSR(pmpcfg1, CSR_PMPCFG1) -DECLARE_CSR(pmpcfg2, CSR_PMPCFG2) -DECLARE_CSR(pmpcfg3, CSR_PMPCFG3) -DECLARE_CSR(pmpaddr0, CSR_PMPADDR0) -DECLARE_CSR(pmpaddr1, CSR_PMPADDR1) -DECLARE_CSR(pmpaddr2, CSR_PMPADDR2) -DECLARE_CSR(pmpaddr3, CSR_PMPADDR3) -DECLARE_CSR(pmpaddr4, CSR_PMPADDR4) -DECLARE_CSR(pmpaddr5, CSR_PMPADDR5) -DECLARE_CSR(pmpaddr6, CSR_PMPADDR6) -DECLARE_CSR(pmpaddr7, CSR_PMPADDR7) -DECLARE_CSR(pmpaddr8, CSR_PMPADDR8) -DECLARE_CSR(pmpaddr9, CSR_PMPADDR9) -DECLARE_CSR(pmpaddr10, CSR_PMPADDR10) -DECLARE_CSR(pmpaddr11, CSR_PMPADDR11) -DECLARE_CSR(pmpaddr12, CSR_PMPADDR12) -DECLARE_CSR(pmpaddr13, CSR_PMPADDR13) -DECLARE_CSR(pmpaddr14, CSR_PMPADDR14) -DECLARE_CSR(pmpaddr15, CSR_PMPADDR15) -DECLARE_CSR(tselect, CSR_TSELECT) -DECLARE_CSR(tdata1, CSR_TDATA1) -DECLARE_CSR(tdata2, CSR_TDATA2) -DECLARE_CSR(tdata3, CSR_TDATA3) -DECLARE_CSR(dcsr, CSR_DCSR) -DECLARE_CSR(dpc, CSR_DPC) -DECLARE_CSR(dscratch, CSR_DSCRATCH) -DECLARE_CSR(mcycle, CSR_MCYCLE) -DECLARE_CSR(minstret, CSR_MINSTRET) -DECLARE_CSR(mhpmcounter3, CSR_MHPMCOUNTER3) -DECLARE_CSR(mhpmcounter4, CSR_MHPMCOUNTER4) -DECLARE_CSR(mhpmcounter5, CSR_MHPMCOUNTER5) -DECLARE_CSR(mhpmcounter6, CSR_MHPMCOUNTER6) -DECLARE_CSR(mhpmcounter7, CSR_MHPMCOUNTER7) -DECLARE_CSR(mhpmcounter8, CSR_MHPMCOUNTER8) -DECLARE_CSR(mhpmcounter9, CSR_MHPMCOUNTER9) -DECLARE_CSR(mhpmcounter10, CSR_MHPMCOUNTER10) -DECLARE_CSR(mhpmcounter11, CSR_MHPMCOUNTER11) -DECLARE_CSR(mhpmcounter12, CSR_MHPMCOUNTER12) -DECLARE_CSR(mhpmcounter13, CSR_MHPMCOUNTER13) -DECLARE_CSR(mhpmcounter14, CSR_MHPMCOUNTER14) -DECLARE_CSR(mhpmcounter15, CSR_MHPMCOUNTER15) -DECLARE_CSR(mhpmcounter16, CSR_MHPMCOUNTER16) -DECLARE_CSR(mhpmcounter17, CSR_MHPMCOUNTER17) -DECLARE_CSR(mhpmcounter18, CSR_MHPMCOUNTER18) -DECLARE_CSR(mhpmcounter19, CSR_MHPMCOUNTER19) -DECLARE_CSR(mhpmcounter20, CSR_MHPMCOUNTER20) -DECLARE_CSR(mhpmcounter21, CSR_MHPMCOUNTER21) -DECLARE_CSR(mhpmcounter22, CSR_MHPMCOUNTER22) -DECLARE_CSR(mhpmcounter23, CSR_MHPMCOUNTER23) -DECLARE_CSR(mhpmcounter24, CSR_MHPMCOUNTER24) -DECLARE_CSR(mhpmcounter25, CSR_MHPMCOUNTER25) -DECLARE_CSR(mhpmcounter26, CSR_MHPMCOUNTER26) -DECLARE_CSR(mhpmcounter27, CSR_MHPMCOUNTER27) -DECLARE_CSR(mhpmcounter28, CSR_MHPMCOUNTER28) -DECLARE_CSR(mhpmcounter29, CSR_MHPMCOUNTER29) -DECLARE_CSR(mhpmcounter30, CSR_MHPMCOUNTER30) -DECLARE_CSR(mhpmcounter31, CSR_MHPMCOUNTER31) -DECLARE_CSR(mhpmevent3, CSR_MHPMEVENT3) -DECLARE_CSR(mhpmevent4, CSR_MHPMEVENT4) -DECLARE_CSR(mhpmevent5, CSR_MHPMEVENT5) -DECLARE_CSR(mhpmevent6, CSR_MHPMEVENT6) -DECLARE_CSR(mhpmevent7, CSR_MHPMEVENT7) -DECLARE_CSR(mhpmevent8, CSR_MHPMEVENT8) -DECLARE_CSR(mhpmevent9, CSR_MHPMEVENT9) -DECLARE_CSR(mhpmevent10, CSR_MHPMEVENT10) -DECLARE_CSR(mhpmevent11, CSR_MHPMEVENT11) -DECLARE_CSR(mhpmevent12, CSR_MHPMEVENT12) -DECLARE_CSR(mhpmevent13, CSR_MHPMEVENT13) -DECLARE_CSR(mhpmevent14, CSR_MHPMEVENT14) -DECLARE_CSR(mhpmevent15, CSR_MHPMEVENT15) -DECLARE_CSR(mhpmevent16, CSR_MHPMEVENT16) -DECLARE_CSR(mhpmevent17, CSR_MHPMEVENT17) -DECLARE_CSR(mhpmevent18, CSR_MHPMEVENT18) -DECLARE_CSR(mhpmevent19, CSR_MHPMEVENT19) -DECLARE_CSR(mhpmevent20, CSR_MHPMEVENT20) -DECLARE_CSR(mhpmevent21, CSR_MHPMEVENT21) -DECLARE_CSR(mhpmevent22, CSR_MHPMEVENT22) -DECLARE_CSR(mhpmevent23, CSR_MHPMEVENT23) -DECLARE_CSR(mhpmevent24, CSR_MHPMEVENT24) -DECLARE_CSR(mhpmevent25, CSR_MHPMEVENT25) -DECLARE_CSR(mhpmevent26, CSR_MHPMEVENT26) -DECLARE_CSR(mhpmevent27, CSR_MHPMEVENT27) -DECLARE_CSR(mhpmevent28, CSR_MHPMEVENT28) -DECLARE_CSR(mhpmevent29, CSR_MHPMEVENT29) -DECLARE_CSR(mhpmevent30, CSR_MHPMEVENT30) -DECLARE_CSR(mhpmevent31, CSR_MHPMEVENT31) -DECLARE_CSR(mvendorid, CSR_MVENDORID) -DECLARE_CSR(marchid, CSR_MARCHID) -DECLARE_CSR(mimpid, CSR_MIMPID) -DECLARE_CSR(mhartid, CSR_MHARTID) -DECLARE_CSR(cycleh, CSR_CYCLEH) -DECLARE_CSR(timeh, CSR_TIMEH) -DECLARE_CSR(instreth, CSR_INSTRETH) -DECLARE_CSR(hpmcounter3h, CSR_HPMCOUNTER3H) -DECLARE_CSR(hpmcounter4h, CSR_HPMCOUNTER4H) -DECLARE_CSR(hpmcounter5h, CSR_HPMCOUNTER5H) -DECLARE_CSR(hpmcounter6h, CSR_HPMCOUNTER6H) -DECLARE_CSR(hpmcounter7h, CSR_HPMCOUNTER7H) -DECLARE_CSR(hpmcounter8h, CSR_HPMCOUNTER8H) -DECLARE_CSR(hpmcounter9h, CSR_HPMCOUNTER9H) -DECLARE_CSR(hpmcounter10h, CSR_HPMCOUNTER10H) -DECLARE_CSR(hpmcounter11h, CSR_HPMCOUNTER11H) -DECLARE_CSR(hpmcounter12h, CSR_HPMCOUNTER12H) -DECLARE_CSR(hpmcounter13h, CSR_HPMCOUNTER13H) -DECLARE_CSR(hpmcounter14h, CSR_HPMCOUNTER14H) -DECLARE_CSR(hpmcounter15h, CSR_HPMCOUNTER15H) -DECLARE_CSR(hpmcounter16h, CSR_HPMCOUNTER16H) -DECLARE_CSR(hpmcounter17h, CSR_HPMCOUNTER17H) -DECLARE_CSR(hpmcounter18h, CSR_HPMCOUNTER18H) -DECLARE_CSR(hpmcounter19h, CSR_HPMCOUNTER19H) -DECLARE_CSR(hpmcounter20h, CSR_HPMCOUNTER20H) -DECLARE_CSR(hpmcounter21h, CSR_HPMCOUNTER21H) -DECLARE_CSR(hpmcounter22h, CSR_HPMCOUNTER22H) -DECLARE_CSR(hpmcounter23h, CSR_HPMCOUNTER23H) -DECLARE_CSR(hpmcounter24h, CSR_HPMCOUNTER24H) -DECLARE_CSR(hpmcounter25h, CSR_HPMCOUNTER25H) -DECLARE_CSR(hpmcounter26h, CSR_HPMCOUNTER26H) -DECLARE_CSR(hpmcounter27h, CSR_HPMCOUNTER27H) -DECLARE_CSR(hpmcounter28h, CSR_HPMCOUNTER28H) -DECLARE_CSR(hpmcounter29h, CSR_HPMCOUNTER29H) -DECLARE_CSR(hpmcounter30h, CSR_HPMCOUNTER30H) -DECLARE_CSR(hpmcounter31h, CSR_HPMCOUNTER31H) -DECLARE_CSR(mcycleh, CSR_MCYCLEH) -DECLARE_CSR(minstreth, CSR_MINSTRETH) -DECLARE_CSR(mhpmcounter3h, CSR_MHPMCOUNTER3H) -DECLARE_CSR(mhpmcounter4h, CSR_MHPMCOUNTER4H) -DECLARE_CSR(mhpmcounter5h, CSR_MHPMCOUNTER5H) -DECLARE_CSR(mhpmcounter6h, CSR_MHPMCOUNTER6H) -DECLARE_CSR(mhpmcounter7h, CSR_MHPMCOUNTER7H) -DECLARE_CSR(mhpmcounter8h, CSR_MHPMCOUNTER8H) -DECLARE_CSR(mhpmcounter9h, CSR_MHPMCOUNTER9H) -DECLARE_CSR(mhpmcounter10h, CSR_MHPMCOUNTER10H) -DECLARE_CSR(mhpmcounter11h, CSR_MHPMCOUNTER11H) -DECLARE_CSR(mhpmcounter12h, CSR_MHPMCOUNTER12H) -DECLARE_CSR(mhpmcounter13h, CSR_MHPMCOUNTER13H) -DECLARE_CSR(mhpmcounter14h, CSR_MHPMCOUNTER14H) -DECLARE_CSR(mhpmcounter15h, CSR_MHPMCOUNTER15H) -DECLARE_CSR(mhpmcounter16h, CSR_MHPMCOUNTER16H) -DECLARE_CSR(mhpmcounter17h, CSR_MHPMCOUNTER17H) -DECLARE_CSR(mhpmcounter18h, CSR_MHPMCOUNTER18H) -DECLARE_CSR(mhpmcounter19h, CSR_MHPMCOUNTER19H) -DECLARE_CSR(mhpmcounter20h, CSR_MHPMCOUNTER20H) -DECLARE_CSR(mhpmcounter21h, CSR_MHPMCOUNTER21H) -DECLARE_CSR(mhpmcounter22h, CSR_MHPMCOUNTER22H) -DECLARE_CSR(mhpmcounter23h, CSR_MHPMCOUNTER23H) -DECLARE_CSR(mhpmcounter24h, CSR_MHPMCOUNTER24H) -DECLARE_CSR(mhpmcounter25h, CSR_MHPMCOUNTER25H) -DECLARE_CSR(mhpmcounter26h, CSR_MHPMCOUNTER26H) -DECLARE_CSR(mhpmcounter27h, CSR_MHPMCOUNTER27H) -DECLARE_CSR(mhpmcounter28h, CSR_MHPMCOUNTER28H) -DECLARE_CSR(mhpmcounter29h, CSR_MHPMCOUNTER29H) -DECLARE_CSR(mhpmcounter30h, CSR_MHPMCOUNTER30H) -DECLARE_CSR(mhpmcounter31h, CSR_MHPMCOUNTER31H) -#endif -#ifdef DECLARE_CAUSE -DECLARE_CAUSE("misaligned fetch", CAUSE_MISALIGNED_FETCH) -DECLARE_CAUSE("fetch access", CAUSE_FETCH_ACCESS) -DECLARE_CAUSE("illegal instruction", CAUSE_ILLEGAL_INSTRUCTION) -DECLARE_CAUSE("breakpoint", CAUSE_BREAKPOINT) -DECLARE_CAUSE("misaligned load", CAUSE_MISALIGNED_LOAD) -DECLARE_CAUSE("load access", CAUSE_LOAD_ACCESS) -DECLARE_CAUSE("misaligned store", CAUSE_MISALIGNED_STORE) -DECLARE_CAUSE("store access", CAUSE_STORE_ACCESS) -DECLARE_CAUSE("user_ecall", CAUSE_USER_ECALL) -DECLARE_CAUSE("supervisor_ecall", CAUSE_SUPERVISOR_ECALL) -DECLARE_CAUSE("hypervisor_ecall", CAUSE_HYPERVISOR_ECALL) -DECLARE_CAUSE("machine_ecall", CAUSE_MACHINE_ECALL) -DECLARE_CAUSE("fetch page fault", CAUSE_FETCH_PAGE_FAULT) -DECLARE_CAUSE("load page fault", CAUSE_LOAD_PAGE_FAULT) -DECLARE_CAUSE("store page fault", CAUSE_STORE_PAGE_FAULT) -#endif diff --git a/riscv/encoding.h b/riscv/encoding.h new file mode 120000 index 0000000000..1075f15317 --- /dev/null +++ b/riscv/encoding.h @@ -0,0 +1 @@ +../../../software/runtime/encoding.h \ No newline at end of file diff --git a/riscv/execute.cc b/riscv/execute.cc index b56db9f27e..7313c512e9 100644 --- a/riscv/execute.cc +++ b/riscv/execute.cc @@ -2,65 +2,164 @@ #include "processor.h" #include "mmu.h" +#include "disasm.h" #include +#ifdef RISCV_ENABLE_COMMITLOG +static void commit_log_reset(processor_t* p) +{ + p->get_state()->log_reg_write.clear(); + p->get_state()->log_mem_read.clear(); + p->get_state()->log_mem_write.clear(); +} static void commit_log_stash_privilege(processor_t* p) { -#ifdef RISCV_ENABLE_COMMITLOG state_t* state = p->get_state(); state->last_inst_priv = state->prv; state->last_inst_xlen = p->get_xlen(); state->last_inst_flen = p->get_flen(); -#endif } -static void commit_log_print_value(int width, uint64_t hi, uint64_t lo) +static void commit_log_print_value(FILE *log_file, int width, const void *data) { + assert(log_file); + switch (width) { + case 8: + fprintf(log_file, "0x%01" PRIx8, *(const uint8_t *)data); + break; case 16: - fprintf(stderr, "0x%04" PRIx16, (uint16_t)lo); + fprintf(log_file, "0x%04" PRIx16, *(const uint16_t *)data); break; case 32: - fprintf(stderr, "0x%08" PRIx32, (uint32_t)lo); + fprintf(log_file, "0x%08" PRIx32, *(const uint32_t *)data); break; case 64: - fprintf(stderr, "0x%016" PRIx64, lo); - break; - case 128: - fprintf(stderr, "0x%016" PRIx64 "%016" PRIx64, hi, lo); + fprintf(log_file, "0x%016" PRIx64, *(const uint64_t *)data); break; default: - abort(); + // max lengh of vector + if (((width - 1) & width) == 0) { + const uint64_t *arr = (const uint64_t *)data; + + fprintf(log_file, "0x"); + for (int idx = width / 64 - 1; idx >= 0; --idx) { + fprintf(log_file, "%016" PRIx64, arr[idx]); + } + } else { + abort(); + } + break; } } -static void commit_log_print_insn(state_t* state, reg_t pc, insn_t insn) +static void commit_log_print_value(FILE *log_file, int width, uint64_t val) { -#ifdef RISCV_ENABLE_COMMITLOG - auto& reg = state->log_reg_write; - int priv = state->last_inst_priv; - int xlen = state->last_inst_xlen; - int flen = state->last_inst_flen; - - fprintf(stderr, "%1d ", priv); - commit_log_print_value(xlen, 0, pc); - fprintf(stderr, " ("); - commit_log_print_value(insn.length() * 8, 0, insn.bits()); - - if (reg.addr) { - bool fp = reg.addr & 1; - int rd = reg.addr >> 1; - int size = fp ? flen : xlen; - fprintf(stderr, ") %c%2d ", fp ? 'f' : 'x', rd); - commit_log_print_value(size, reg.data.v[1], reg.data.v[0]); - fprintf(stderr, "\n"); - } else { - fprintf(stderr, ")\n"); + commit_log_print_value(log_file, width, &val); +} + +const char* processor_t::get_symbol(uint64_t addr) +{ + return sim->get_symbol(addr); +} + +static void commit_log_print_insn(processor_t *p, reg_t pc, insn_t insn) +{ + FILE *log_file = p->get_log_file(); + + auto& reg = p->get_state()->log_reg_write; + auto& load = p->get_state()->log_mem_read; + auto& store = p->get_state()->log_mem_write; + int priv = p->get_state()->last_inst_priv; + int xlen = p->get_state()->last_inst_xlen; + int flen = p->get_state()->last_inst_flen; + + // print core id on all lines so it is easy to grep + uint64_t id = p->get_csr(CSR_MHARTID); + fprintf(log_file, "core%4" PRId64 ": ", id); + + fprintf(log_file, "%1d ", priv); + commit_log_print_value(log_file, xlen, pc); + fprintf(log_file, " ("); + commit_log_print_value(log_file, insn.length() * 8, insn.bits()); + fprintf(log_file, ")"); + bool show_vec = false; + + for (auto item : reg) { + if (item.first == 0) + continue; + + char prefix; + int size; + int rd = item.first >> 4; + bool is_vec = false; + bool is_vreg = false; + switch (item.first & 0xf) { + case 0: + size = xlen; + prefix = 'x'; + break; + case 1: + size = flen; + prefix = 'f'; + break; + case 2: + size = p->VU.VLEN; + prefix = 'v'; + is_vreg = true; + break; + case 3: + is_vec = true; + break; + case 4: + size = xlen; + prefix = 'c'; + break; + default: + assert("can't been here" && 0); + break; + } + + if (!show_vec && (is_vreg || is_vec)) { + fprintf(log_file, " e%ld %s%ld l%ld", + p->VU.vsew, + p->VU.vflmul < 1 ? "mf" : "m", + p->VU.vflmul < 1 ? (reg_t)(1 / p->VU.vflmul) : (reg_t)p->VU.vflmul, + p->VU.vl); + show_vec = true; + } + + if (!is_vec) { + if (prefix == 'c') + fprintf(log_file, " c%d_%s ", rd, csr_name(rd)); + else + fprintf(log_file, " %c%2d ", prefix, rd); + if (is_vreg) + commit_log_print_value(log_file, size, &p->VU.elt(rd, 0)); + else + commit_log_print_value(log_file, size, item.second.v); + } } - reg.addr = 0; -#endif + + for (auto item : load) { + fprintf(log_file, " mem "); + commit_log_print_value(log_file, xlen, std::get<0>(item)); + } + + for (auto item : store) { + fprintf(log_file, " mem "); + commit_log_print_value(log_file, xlen, std::get<0>(item)); + fprintf(log_file, " "); + commit_log_print_value(log_file, std::get<2>(item) << 3, std::get<1>(item)); + } + fprintf(log_file, "\n"); } +#else +static void commit_log_reset(processor_t* p) {} +static void commit_log_stash_privilege(processor_t* p) {} +static void commit_log_print_insn(processor_t* p, reg_t pc, insn_t insn) {} +#endif inline void processor_t::update_histogram(reg_t pc) { @@ -74,26 +173,55 @@ inline void processor_t::update_histogram(reg_t pc) // function calls. static reg_t execute_insn(processor_t* p, reg_t pc, insn_fetch_t fetch) { + commit_log_reset(p); commit_log_stash_privilege(p); - reg_t npc = fetch.func(p, fetch.insn, pc); - if (npc != PC_SERIALIZE_BEFORE) { - commit_log_print_insn(p->get_state(), pc, fetch.insn); - p->update_histogram(pc); + reg_t npc; + + try { + npc = fetch.func(p, fetch.insn, pc); + if (npc != PC_SERIALIZE_BEFORE) { + +#ifdef RISCV_ENABLE_COMMITLOG + if (p->get_log_commits_enabled()) { + commit_log_print_insn(p, pc, fetch.insn); + } +#endif + + } +#ifdef RISCV_ENABLE_COMMITLOG + } catch(mem_trap_t& t) { + //handle segfault in midlle of vector load/store + if (p->get_log_commits_enabled()) { + for (auto item : p->get_state()->log_reg_write) { + if ((item.first & 3) == 3) { + commit_log_print_insn(p, pc, fetch.insn); + break; + } + } + } + throw; +#endif + } catch(...) { + throw; } + p->update_histogram(pc); + return npc; } bool processor_t::slow_path() { - return trace || debug || state.single_step != state.STEP_NONE || state.dcsr.cause; + return debug || state.single_step != state.STEP_NONE || state.debug_mode; } // fetch/decode/execute loop void processor_t::step(size_t n) { - if (state.dcsr.cause == DCSR_CAUSE_NONE) { - if (halt_request) { + if (!state.debug_mode) { + if (halt_request == HR_REGULAR) { enter_debug_mode(DCSR_CAUSE_DEBUGINT); + } else if (halt_request == HR_GROUP) { + enter_debug_mode(DCSR_CAUSE_GROUP); } // !!!The halt bit in DCSR is deprecated. else if (state.dcsr.halt) { enter_debug_mode(DCSR_CAUSE_HALT); @@ -130,7 +258,7 @@ void processor_t::step(size_t n) { if (unlikely(!state.serialized && state.single_step == state.STEP_STEPPED)) { state.single_step = state.STEP_NONE; - if (state.dcsr.cause == DCSR_CAUSE_NONE) { + if (!state.debug_mode) { enter_debug_mode(DCSR_CAUSE_STEP); // enter_debug_mode changed state.pc, so we can't just continue. break; @@ -142,18 +270,10 @@ void processor_t::step(size_t n) } insn_fetch_t fetch = mmu->load_insn(pc); - if ((trace || debug) && !state.serialized) + if (debug && !state.serialized) disasm(fetch.insn); pc = execute_insn(this, pc, fetch); - advance_pc(); - - if (unlikely(state.pc >= DEBUG_ROM_ENTRY && - state.pc < DEBUG_END)) { - // We're waiting for the debugger to tell us something. - return; - } - } } else while (instret < n) @@ -236,7 +356,7 @@ void processor_t::step(size_t n) enter_debug_mode(DCSR_CAUSE_HWBP); break; case ACTION_DEBUG_EXCEPTION: { - mem_trap_t trap(CAUSE_BREAKPOINT, t.address); + insn_trap_t trap(CAUSE_BREAKPOINT, t.address); take_trap(trap, pc); break; } @@ -244,6 +364,16 @@ void processor_t::step(size_t n) abort(); } } + catch (wait_for_interrupt_t &t) + { + // Return to the outer simulation loop, which gives other devices/harts a + // chance to generate interrupts. + // + // In the debug ROM this prevents us from wasting time looping, but also + // allows us to switch to other threads only once per idle loop in case + // there is activity. + n = instret; + } state.minstret += instret; n -= instret; diff --git a/riscv/extension.h b/riscv/extension.h index 419b172f29..d1e847d9b8 100644 --- a/riscv/extension.h +++ b/riscv/extension.h @@ -16,7 +16,6 @@ class extension_t virtual const char* name() = 0; virtual void reset() {}; virtual void set_debug(bool value) {}; - virtual void set_trace(bool value) {}; virtual ~extension_t(); void set_processor(processor_t* _p) { p = _p; } diff --git a/riscv/extensions.cc b/riscv/extensions.cc index d1690c4b8f..347dc5e915 100644 --- a/riscv/extensions.cc +++ b/riscv/extensions.cc @@ -21,14 +21,23 @@ std::function find_extension(const char* name) if (!extensions().count(name)) { // try to find extension xyz by loading libxyz.so std::string libname = std::string("lib") + name + ".so"; - if (!dlopen(libname.c_str(), RTLD_LAZY)) { - fprintf(stderr, "couldn't find extension '%s' (or library '%s')\n", - name, libname.c_str()); - exit(-1); + std::string libdefault = "libcustomext.so"; + bool is_default = false; + auto dlh = dlopen(libname.c_str(), RTLD_LAZY); + if (!dlh) { + dlh = dlopen(libdefault.c_str(), RTLD_LAZY); + if (!dlh) { + fprintf(stderr, "couldn't find shared library either '%s' or '%s')\n", + libname.c_str(), libdefault.c_str()); + exit(-1); + } + + is_default = true; } + if (!extensions().count(name)) { fprintf(stderr, "couldn't find extension '%s' in shared library '%s'\n", - name, libname.c_str()); + name, is_default ? libdefault.c_str() : libname.c_str()); exit(-1); } } diff --git a/riscv/gen_icache b/riscv/gen_icache index 7ec3c69434..67c0d69f1a 100755 --- a/riscv/gen_icache +++ b/riscv/gen_icache @@ -1,7 +1,8 @@ #!/bin/sh -n=$(($1-1)) -for i in `seq 0 $n` +i=0 +while [ $i -lt $1 ] do echo case $i: ICACHE_ACCESS\($i\)\; + i=$((i+1)) done echo diff --git a/riscv/insn_template.h b/riscv/insn_template.h index 07aa16ba05..3c36d10e52 100644 --- a/riscv/insn_template.h +++ b/riscv/insn_template.h @@ -1,7 +1,7 @@ // See LICENSE for license details. +#include "arith.h" #include "mmu.h" -#include "mulhi.h" #include "softfloat.h" #include "internals.h" #include "specialize.h" diff --git a/riscv/insns/c_ebreak.h b/riscv/insns/c_ebreak.h index 128b86b22c..1c36b2418b 100644 --- a/riscv/insns/c_ebreak.h +++ b/riscv/insns/c_ebreak.h @@ -1,2 +1,2 @@ require_extension('C'); -throw trap_breakpoint(pc); +throw trap_breakpoint(0); diff --git a/riscv/insns/csrrc.h b/riscv/insns/csrrc.h index 0472d80efd..37384b0e52 100644 --- a/riscv/insns/csrrc.h +++ b/riscv/insns/csrrc.h @@ -1,6 +1,6 @@ bool write = insn.rs1() != 0; int csr = validate_csr(insn.csr(), write); -reg_t old = p->get_csr(csr); +reg_t old = p->get_csr(csr, insn, write); if (write) { p->set_csr(csr, old & ~RS1); } diff --git a/riscv/insns/csrrci.h b/riscv/insns/csrrci.h index 4d83cc0617..ad40c8f4c2 100644 --- a/riscv/insns/csrrci.h +++ b/riscv/insns/csrrci.h @@ -1,6 +1,6 @@ bool write = insn.rs1() != 0; int csr = validate_csr(insn.csr(), write); -reg_t old = p->get_csr(csr); +reg_t old = p->get_csr(csr, insn, write); if (write) { p->set_csr(csr, old & ~(reg_t)insn.rs1()); } diff --git a/riscv/insns/csrrs.h b/riscv/insns/csrrs.h index 4e8bde9637..91fcc7a347 100644 --- a/riscv/insns/csrrs.h +++ b/riscv/insns/csrrs.h @@ -1,6 +1,6 @@ bool write = insn.rs1() != 0; int csr = validate_csr(insn.csr(), write); -reg_t old = p->get_csr(csr); +reg_t old = p->get_csr(csr, insn, write); if (write) { p->set_csr(csr, old | RS1); } diff --git a/riscv/insns/csrrsi.h b/riscv/insns/csrrsi.h index b673725b54..f348e570bd 100644 --- a/riscv/insns/csrrsi.h +++ b/riscv/insns/csrrsi.h @@ -1,6 +1,6 @@ bool write = insn.rs1() != 0; int csr = validate_csr(insn.csr(), write); -reg_t old = p->get_csr(csr); +reg_t old = p->get_csr(csr, insn, write); if (write) { p->set_csr(csr, old | insn.rs1()); } diff --git a/riscv/insns/csrrw.h b/riscv/insns/csrrw.h index e45420b570..cc0c28dc95 100644 --- a/riscv/insns/csrrw.h +++ b/riscv/insns/csrrw.h @@ -1,5 +1,5 @@ int csr = validate_csr(insn.csr(), true); -reg_t old = p->get_csr(csr); +reg_t old = p->get_csr(csr, insn, true); p->set_csr(csr, RS1); WRITE_RD(sext_xlen(old)); serialize(); diff --git a/riscv/insns/csrrwi.h b/riscv/insns/csrrwi.h index decadf4121..4d5d06468b 100644 --- a/riscv/insns/csrrwi.h +++ b/riscv/insns/csrrwi.h @@ -1,5 +1,5 @@ int csr = validate_csr(insn.csr(), true); -reg_t old = p->get_csr(csr); +reg_t old = p->get_csr(csr, insn, true); p->set_csr(csr, insn.rs1()); WRITE_RD(sext_xlen(old)); serialize(); diff --git a/riscv/insns/dret.h b/riscv/insns/dret.h index 35c19cb8a2..ba503a0c7f 100644 --- a/riscv/insns/dret.h +++ b/riscv/insns/dret.h @@ -1,9 +1,9 @@ -require_privilege(PRV_M); +require(STATE.debug_mode); set_pc_and_serialize(STATE.dpc); p->set_privilege(STATE.dcsr.prv); /* We're not in Debug Mode anymore. */ -STATE.dcsr.cause = 0; +STATE.debug_mode = false; if (STATE.dcsr.step) STATE.single_step = STATE.STEP_STEPPING; diff --git a/riscv/insns/ebreak.h b/riscv/insns/ebreak.h index 736cebef4b..f123f9544d 100644 --- a/riscv/insns/ebreak.h +++ b/riscv/insns/ebreak.h @@ -1 +1 @@ -throw trap_breakpoint(pc); +throw trap_breakpoint(0); diff --git a/riscv/insns/ecall.h b/riscv/insns/ecall.h index e298ac722b..e6c723f4e3 100644 --- a/riscv/insns/ecall.h +++ b/riscv/insns/ecall.h @@ -1,7 +1,11 @@ switch (STATE.prv) { case PRV_U: throw trap_user_ecall(); - case PRV_S: throw trap_supervisor_ecall(); + case PRV_S: + if (STATE.v) + throw trap_virtual_supervisor_ecall(); + else + throw trap_supervisor_ecall(); case PRV_M: throw trap_machine_ecall(); default: abort(); } diff --git a/riscv/insns/fadd_h.h b/riscv/insns/fadd_h.h new file mode 100644 index 0000000000..2b646ae77b --- /dev/null +++ b/riscv/insns/fadd_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_add(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/fclass_h.h b/riscv/insns/fclass_h.h new file mode 100644 index 0000000000..066a2d24d6 --- /dev/null +++ b/riscv/insns/fclass_h.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_RD(f16_classify(f16(FRS1))); diff --git a/riscv/insns/fcvt_d_h.h b/riscv/insns/fcvt_d_h.h new file mode 100644 index 0000000000..6906fc06c1 --- /dev/null +++ b/riscv/insns/fcvt_d_h.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_extension('D'); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_to_f64(f16(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_d.h b/riscv/insns/fcvt_h_d.h new file mode 100644 index 0000000000..f463dd58e6 --- /dev/null +++ b/riscv/insns/fcvt_h_d.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_extension('D'); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f64_to_f16(f64(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_l.h b/riscv/insns/fcvt_h_l.h new file mode 100644 index 0000000000..39178c2fd3 --- /dev/null +++ b/riscv/insns/fcvt_h_l.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_rv64; +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(i64_to_f16(RS1)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_lu.h b/riscv/insns/fcvt_h_lu.h new file mode 100644 index 0000000000..a872c48091 --- /dev/null +++ b/riscv/insns/fcvt_h_lu.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_rv64; +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(ui64_to_f16(RS1)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_q.h b/riscv/insns/fcvt_h_q.h new file mode 100644 index 0000000000..94b0001635 --- /dev/null +++ b/riscv/insns/fcvt_h_q.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_extension('Q'); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f128_to_f16(f128(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_s.h b/riscv/insns/fcvt_h_s.h new file mode 100644 index 0000000000..eb928e9785 --- /dev/null +++ b/riscv/insns/fcvt_h_s.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f32_to_f16(f32(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_w.h b/riscv/insns/fcvt_h_w.h new file mode 100644 index 0000000000..c08245451f --- /dev/null +++ b/riscv/insns/fcvt_h_w.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(i32_to_f16((int32_t)RS1)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_wu.h b/riscv/insns/fcvt_h_wu.h new file mode 100644 index 0000000000..9f2f5f6a7c --- /dev/null +++ b/riscv/insns/fcvt_h_wu.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(ui32_to_f16((uint32_t)RS1)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_l_h.h b/riscv/insns/fcvt_l_h.h new file mode 100644 index 0000000000..5a1fea850d --- /dev/null +++ b/riscv/insns/fcvt_l_h.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_rv64; +require_fp; +softfloat_roundingMode = RM; +WRITE_RD(f16_to_i64(f16(FRS1), RM, true)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_lu_h.h b/riscv/insns/fcvt_lu_h.h new file mode 100644 index 0000000000..f1454c3e99 --- /dev/null +++ b/riscv/insns/fcvt_lu_h.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_rv64; +require_fp; +softfloat_roundingMode = RM; +WRITE_RD(f16_to_ui64(f16(FRS1), RM, true)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_q_h.h b/riscv/insns/fcvt_q_h.h new file mode 100644 index 0000000000..8a5f6805c5 --- /dev/null +++ b/riscv/insns/fcvt_q_h.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_extension('Q'); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_to_f128(f16(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_s_h.h b/riscv/insns/fcvt_s_h.h new file mode 100644 index 0000000000..bfa2e91497 --- /dev/null +++ b/riscv/insns/fcvt_s_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_to_f32(f16(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_w_h.h b/riscv/insns/fcvt_w_h.h new file mode 100644 index 0000000000..fe8bb48fb2 --- /dev/null +++ b/riscv/insns/fcvt_w_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_RD(sext32(f16_to_i32(f16(FRS1), RM, true))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_wu_h.h b/riscv/insns/fcvt_wu_h.h new file mode 100644 index 0000000000..bf6648d3c6 --- /dev/null +++ b/riscv/insns/fcvt_wu_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_RD(sext32(f16_to_ui32(f16(FRS1), RM, true))); +set_fp_exceptions; diff --git a/riscv/insns/fdiv_h.h b/riscv/insns/fdiv_h.h new file mode 100644 index 0000000000..a169eae83a --- /dev/null +++ b/riscv/insns/fdiv_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_div(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/feq_h.h b/riscv/insns/feq_h.h new file mode 100644 index 0000000000..47e75a5b92 --- /dev/null +++ b/riscv/insns/feq_h.h @@ -0,0 +1,4 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_RD(f16_eq(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/fle_h.h b/riscv/insns/fle_h.h new file mode 100644 index 0000000000..9fc5968532 --- /dev/null +++ b/riscv/insns/fle_h.h @@ -0,0 +1,4 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_RD(f16_le(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/flh.h b/riscv/insns/flh.h new file mode 100644 index 0000000000..c887999398 --- /dev/null +++ b/riscv/insns/flh.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(f16(MMU.load_uint16(RS1 + insn.i_imm()))); diff --git a/riscv/insns/flt_h.h b/riscv/insns/flt_h.h new file mode 100644 index 0000000000..f516a38a62 --- /dev/null +++ b/riscv/insns/flt_h.h @@ -0,0 +1,4 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_RD(f16_lt(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/fmadd_h.h b/riscv/insns/fmadd_h.h new file mode 100644 index 0000000000..6551de5e30 --- /dev/null +++ b/riscv/insns/fmadd_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_mulAdd(f16(FRS1), f16(FRS2), f16(FRS3))); +set_fp_exceptions; diff --git a/riscv/insns/fmax_h.h b/riscv/insns/fmax_h.h new file mode 100644 index 0000000000..3d4c40ebf9 --- /dev/null +++ b/riscv/insns/fmax_h.h @@ -0,0 +1,4 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(f16_max(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/fmin_h.h b/riscv/insns/fmin_h.h new file mode 100644 index 0000000000..5fb1404fe0 --- /dev/null +++ b/riscv/insns/fmin_h.h @@ -0,0 +1,4 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(f16_min(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/fmsub_h.h b/riscv/insns/fmsub_h.h new file mode 100644 index 0000000000..934291fc81 --- /dev/null +++ b/riscv/insns/fmsub_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_mulAdd(f16(FRS1), f16(FRS2), f16(f16(FRS3).v ^ F16_SIGN))); +set_fp_exceptions; diff --git a/riscv/insns/fmul_h.h b/riscv/insns/fmul_h.h new file mode 100644 index 0000000000..0152df8f09 --- /dev/null +++ b/riscv/insns/fmul_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_mul(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/fmv_h_x.h b/riscv/insns/fmv_h_x.h new file mode 100644 index 0000000000..c022508e24 --- /dev/null +++ b/riscv/insns/fmv_h_x.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(f16(RS1)); diff --git a/riscv/insns/fmv_x_h.h b/riscv/insns/fmv_x_h.h new file mode 100644 index 0000000000..5e89c4f0f0 --- /dev/null +++ b/riscv/insns/fmv_x_h.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_RD(sext32((int16_t)(FRS1.v[0]))); diff --git a/riscv/insns/fnmadd_h.h b/riscv/insns/fnmadd_h.h new file mode 100644 index 0000000000..e4c619e77a --- /dev/null +++ b/riscv/insns/fnmadd_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_mulAdd(f16(f16(FRS1).v ^ F16_SIGN), f16(FRS2), f16(f16(FRS3).v ^ F16_SIGN))); +set_fp_exceptions; diff --git a/riscv/insns/fnmsub_h.h b/riscv/insns/fnmsub_h.h new file mode 100644 index 0000000000..0410c3bba6 --- /dev/null +++ b/riscv/insns/fnmsub_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_mulAdd(f16(f16(FRS1).v ^ F16_SIGN), f16(FRS2), f16(FRS3))); +set_fp_exceptions; diff --git a/riscv/insns/fsgnj_h.h b/riscv/insns/fsgnj_h.h new file mode 100644 index 0000000000..79d50f5fa3 --- /dev/null +++ b/riscv/insns/fsgnj_h.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(fsgnj16(FRS1, FRS2, false, false)); diff --git a/riscv/insns/fsgnjn_h.h b/riscv/insns/fsgnjn_h.h new file mode 100644 index 0000000000..ebb4ac9f50 --- /dev/null +++ b/riscv/insns/fsgnjn_h.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(fsgnj16(FRS1, FRS2, true, false)); diff --git a/riscv/insns/fsgnjx_h.h b/riscv/insns/fsgnjx_h.h new file mode 100644 index 0000000000..9310269545 --- /dev/null +++ b/riscv/insns/fsgnjx_h.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(fsgnj16(FRS1, FRS2, false, true)); diff --git a/riscv/insns/fsh.h b/riscv/insns/fsh.h new file mode 100644 index 0000000000..b9fa4e0557 --- /dev/null +++ b/riscv/insns/fsh.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +MMU.store_uint16(RS1 + insn.s_imm(), FRS2.v[0]); diff --git a/riscv/insns/fsqrt_h.h b/riscv/insns/fsqrt_h.h new file mode 100644 index 0000000000..138d572744 --- /dev/null +++ b/riscv/insns/fsqrt_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_sqrt(f16(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fsub_h.h b/riscv/insns/fsub_h.h new file mode 100644 index 0000000000..43b51cc2eb --- /dev/null +++ b/riscv/insns/fsub_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_sub(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/hfence_gvma.h b/riscv/insns/hfence_gvma.h new file mode 100644 index 0000000000..f1996d921f --- /dev/null +++ b/riscv/insns/hfence_gvma.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.mstatus, MSTATUS_TVM) ? PRV_M : PRV_S); +MMU.flush_tlb(); diff --git a/riscv/insns/hfence_vvma.h b/riscv/insns/hfence_vvma.h new file mode 100644 index 0000000000..ecd42c198c --- /dev/null +++ b/riscv/insns/hfence_vvma.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(PRV_S); +MMU.flush_tlb(); diff --git a/riscv/insns/hlv_b.h b/riscv/insns/hlv_b.h new file mode 100644 index 0000000000..86192c6328 --- /dev/null +++ b/riscv/insns/hlv_b.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_int8(RS1)); diff --git a/riscv/insns/hlv_bu.h b/riscv/insns/hlv_bu.h new file mode 100644 index 0000000000..2f951947d0 --- /dev/null +++ b/riscv/insns/hlv_bu.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_uint8(RS1)); diff --git a/riscv/insns/hlv_d.h b/riscv/insns/hlv_d.h new file mode 100644 index 0000000000..1bbd0277b4 --- /dev/null +++ b/riscv/insns/hlv_d.h @@ -0,0 +1,5 @@ +require_extension('H'); +require_rv64; +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_int64(RS1)); diff --git a/riscv/insns/hlv_h.h b/riscv/insns/hlv_h.h new file mode 100644 index 0000000000..6825fe46bd --- /dev/null +++ b/riscv/insns/hlv_h.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_int16(RS1)); diff --git a/riscv/insns/hlv_hu.h b/riscv/insns/hlv_hu.h new file mode 100644 index 0000000000..3d9d98e2f4 --- /dev/null +++ b/riscv/insns/hlv_hu.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_uint16(RS1)); diff --git a/riscv/insns/hlv_w.h b/riscv/insns/hlv_w.h new file mode 100644 index 0000000000..be420d37c5 --- /dev/null +++ b/riscv/insns/hlv_w.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_int32(RS1)); diff --git a/riscv/insns/hlv_wu.h b/riscv/insns/hlv_wu.h new file mode 100644 index 0000000000..851be27c6a --- /dev/null +++ b/riscv/insns/hlv_wu.h @@ -0,0 +1,5 @@ +require_extension('H'); +require_rv64; +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_uint32(RS1)); diff --git a/riscv/insns/hlvx_hu.h b/riscv/insns/hlvx_hu.h new file mode 100644 index 0000000000..19dbcfbf63 --- /dev/null +++ b/riscv/insns/hlvx_hu.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_x_uint16(RS1)); diff --git a/riscv/insns/hlvx_wu.h b/riscv/insns/hlvx_wu.h new file mode 100644 index 0000000000..4dfe702350 --- /dev/null +++ b/riscv/insns/hlvx_wu.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_x_uint32(RS1)); diff --git a/riscv/insns/hsv_b.h b/riscv/insns/hsv_b.h new file mode 100644 index 0000000000..a5c34ff072 --- /dev/null +++ b/riscv/insns/hsv_b.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +MMU.guest_store_uint8(RS1, RS2); diff --git a/riscv/insns/hsv_d.h b/riscv/insns/hsv_d.h new file mode 100644 index 0000000000..14c6d5d913 --- /dev/null +++ b/riscv/insns/hsv_d.h @@ -0,0 +1,5 @@ +require_extension('H'); +require_rv64; +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +MMU.guest_store_uint64(RS1, RS2); diff --git a/riscv/insns/hsv_h.h b/riscv/insns/hsv_h.h new file mode 100644 index 0000000000..1cfe77aae8 --- /dev/null +++ b/riscv/insns/hsv_h.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +MMU.guest_store_uint16(RS1, RS2); diff --git a/riscv/insns/hsv_w.h b/riscv/insns/hsv_w.h new file mode 100644 index 0000000000..d54f6731ee --- /dev/null +++ b/riscv/insns/hsv_w.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +MMU.guest_store_uint32(RS1, RS2); diff --git a/riscv/insns/lr_d.h b/riscv/insns/lr_d.h index 52090c31b8..3f3521be54 100644 --- a/riscv/insns/lr_d.h +++ b/riscv/insns/lr_d.h @@ -1,4 +1,5 @@ require_extension('A'); require_rv64; +auto res = MMU.load_int64(RS1); MMU.acquire_load_reservation(RS1); -WRITE_RD(MMU.load_int64(RS1)); +WRITE_RD(res); diff --git a/riscv/insns/lr_w.h b/riscv/insns/lr_w.h index c5845a68e1..8605cc5df9 100644 --- a/riscv/insns/lr_w.h +++ b/riscv/insns/lr_w.h @@ -1,3 +1,4 @@ require_extension('A'); +auto res = MMU.load_int32(RS1); MMU.acquire_load_reservation(RS1); -WRITE_RD(MMU.load_int32(RS1)); +WRITE_RD(res); diff --git a/riscv/insns/mret.h b/riscv/insns/mret.h index 96933cf672..cedfc72840 100644 --- a/riscv/insns/mret.h +++ b/riscv/insns/mret.h @@ -2,8 +2,10 @@ require_privilege(PRV_M); set_pc_and_serialize(p->get_state()->mepc); reg_t s = STATE.mstatus; reg_t prev_prv = get_field(s, MSTATUS_MPP); +reg_t prev_virt = get_field(s, MSTATUS_MPV); s = set_field(s, MSTATUS_MIE, get_field(s, MSTATUS_MPIE)); s = set_field(s, MSTATUS_MPIE, 1); s = set_field(s, MSTATUS_MPP, PRV_U); -p->set_privilege(prev_prv); p->set_csr(CSR_MSTATUS, s); +p->set_privilege(prev_prv); +p->set_virt(prev_virt); diff --git a/riscv/insns/p_abs.h b/riscv/insns/p_abs.h new file mode 100644 index 0000000000..409446b218 --- /dev/null +++ b/riscv/insns/p_abs.h @@ -0,0 +1,4 @@ +if(sreg_t(RS1) > 0) + WRITE_RD(RS1); +else + WRITE_RD(-RS1); diff --git a/riscv/insns/p_beqimm.h b/riscv/insns/p_beqimm.h new file mode 100644 index 0000000000..5d63493212 --- /dev/null +++ b/riscv/insns/p_beqimm.h @@ -0,0 +1,2 @@ +if(sreg_t(RS1) == insn.p_simm5()) + set_pc(BRANCH_TARGET); diff --git a/riscv/insns/p_bneimm.h b/riscv/insns/p_bneimm.h new file mode 100644 index 0000000000..ef565ccfd4 --- /dev/null +++ b/riscv/insns/p_bneimm.h @@ -0,0 +1,2 @@ +if(sreg_t(RS1) != insn.p_simm5()) + set_pc(BRANCH_TARGET); diff --git a/riscv/insns/p_clip.h b/riscv/insns/p_clip.h new file mode 100644 index 0000000000..4cc255f00d --- /dev/null +++ b/riscv/insns/p_clip.h @@ -0,0 +1,9 @@ +sreg_t clip_lower = insn.p_zimm5() ? -(1 << (insn.p_zimm5() - 1)) : -1; +sreg_t clip_upper = insn.p_zimm5() ? ((1 << (insn.p_zimm5() - 1)) - 1) : 0; + +if(sreg_t(RS1) <= clip_lower) + WRITE_RD(clip_lower); +else if(sreg_t(RS1) >= clip_upper) + WRITE_RD(clip_upper); +else + WRITE_RD(sreg_t(RS1)); diff --git a/riscv/insns/p_clipr.h b/riscv/insns/p_clipr.h new file mode 100644 index 0000000000..abe0846e57 --- /dev/null +++ b/riscv/insns/p_clipr.h @@ -0,0 +1,6 @@ +if(sreg_t(RS1) <= -(sreg_t(RS2) + 1)) + WRITE_RD(-(sreg_t(RS2) + 1)); +else if(sreg_t(RS1) >= sreg_t(RS2)) + WRITE_RD(sreg_t(RS2)); +else + WRITE_RD(sreg_t(RS1)); diff --git a/riscv/insns/p_clipu.h b/riscv/insns/p_clipu.h new file mode 100644 index 0000000000..aa4da5858e --- /dev/null +++ b/riscv/insns/p_clipu.h @@ -0,0 +1,8 @@ +sreg_t clipu_upper = insn.p_zimm5() ? ((1 << (insn.p_zimm5() - 1)) - 1) : 0; + +if(sreg_t(RS1) <= 0) + WRITE_RD(0); +else if(sreg_t(RS1) >= clipu_upper) + WRITE_RD(clipu_upper); +else + WRITE_RD(sreg_t(RS1)); diff --git a/riscv/insns/p_clipur.h b/riscv/insns/p_clipur.h new file mode 100644 index 0000000000..08c03703a4 --- /dev/null +++ b/riscv/insns/p_clipur.h @@ -0,0 +1,6 @@ +if(sreg_t(RS1) <= 0) + WRITE_RD(0); +else if(sreg_t(RS1) >= sreg_t(RS2)) + WRITE_RD(sreg_t(RS2)); +else + WRITE_RD(sreg_t(RS1)); diff --git a/riscv/insns/p_extbs.h b/riscv/insns/p_extbs.h new file mode 100644 index 0000000000..de3e100e89 --- /dev/null +++ b/riscv/insns/p_extbs.h @@ -0,0 +1 @@ +WRITE_RD(sext8(RS1)); diff --git a/riscv/insns/p_extbz.h b/riscv/insns/p_extbz.h new file mode 100644 index 0000000000..419622cbaf --- /dev/null +++ b/riscv/insns/p_extbz.h @@ -0,0 +1 @@ +WRITE_RD(zext8(RS1)); diff --git a/riscv/insns/p_exths.h b/riscv/insns/p_exths.h new file mode 100644 index 0000000000..16cbe7b5a7 --- /dev/null +++ b/riscv/insns/p_exths.h @@ -0,0 +1 @@ +WRITE_RD(sext16(RS1)); diff --git a/riscv/insns/p_exthz.h b/riscv/insns/p_exthz.h new file mode 100644 index 0000000000..8b548b6ffa --- /dev/null +++ b/riscv/insns/p_exthz.h @@ -0,0 +1 @@ +WRITE_RD(zext16(RS1)); diff --git a/riscv/insns/p_lb_irpost.h b/riscv/insns/p_lb_irpost.h new file mode 100644 index 0000000000..ed17db1627 --- /dev/null +++ b/riscv/insns/p_lb_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int8(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/riscv/insns/p_lb_rr.h b/riscv/insns/p_lb_rr.h new file mode 100644 index 0000000000..c32237fe12 --- /dev/null +++ b/riscv/insns/p_lb_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_int8(RS1 + sreg_t(RS2))); diff --git a/riscv/insns/p_lb_rrpost.h b/riscv/insns/p_lb_rrpost.h new file mode 100644 index 0000000000..9dc2bd93da --- /dev/null +++ b/riscv/insns/p_lb_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int8(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/riscv/insns/p_lbu_irpost.h b/riscv/insns/p_lbu_irpost.h new file mode 100644 index 0000000000..0f015c3766 --- /dev/null +++ b/riscv/insns/p_lbu_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_uint8(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/riscv/insns/p_lbu_rr.h b/riscv/insns/p_lbu_rr.h new file mode 100644 index 0000000000..a95ca2a9a9 --- /dev/null +++ b/riscv/insns/p_lbu_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_uint8(RS1 + sreg_t(RS2))); diff --git a/riscv/insns/p_lbu_rrpost.h b/riscv/insns/p_lbu_rrpost.h new file mode 100644 index 0000000000..3456c8aecb --- /dev/null +++ b/riscv/insns/p_lbu_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_uint8(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/riscv/insns/p_lh_irpost.h b/riscv/insns/p_lh_irpost.h new file mode 100644 index 0000000000..3fea47c188 --- /dev/null +++ b/riscv/insns/p_lh_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int16(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/riscv/insns/p_lh_rr.h b/riscv/insns/p_lh_rr.h new file mode 100644 index 0000000000..cd5bf82194 --- /dev/null +++ b/riscv/insns/p_lh_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_int16(RS1 + sreg_t(RS2))); diff --git a/riscv/insns/p_lh_rrpost.h b/riscv/insns/p_lh_rrpost.h new file mode 100644 index 0000000000..60353fd3e0 --- /dev/null +++ b/riscv/insns/p_lh_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int16(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/riscv/insns/p_lhu_irpost.h b/riscv/insns/p_lhu_irpost.h new file mode 100644 index 0000000000..8e7cfb6beb --- /dev/null +++ b/riscv/insns/p_lhu_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_uint16(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/riscv/insns/p_lhu_rr.h b/riscv/insns/p_lhu_rr.h new file mode 100644 index 0000000000..6568736a78 --- /dev/null +++ b/riscv/insns/p_lhu_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_uint16(RS1 + sreg_t(RS2))); diff --git a/riscv/insns/p_lhu_rrpost.h b/riscv/insns/p_lhu_rrpost.h new file mode 100644 index 0000000000..195222ac04 --- /dev/null +++ b/riscv/insns/p_lhu_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_uint16(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/riscv/insns/p_lw_irpost.h b/riscv/insns/p_lw_irpost.h new file mode 100644 index 0000000000..fb77d87236 --- /dev/null +++ b/riscv/insns/p_lw_irpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int32(RS1)); +WRITE_RS1(RS1 + insn.i_imm()); diff --git a/riscv/insns/p_lw_rr.h b/riscv/insns/p_lw_rr.h new file mode 100644 index 0000000000..78fa33231b --- /dev/null +++ b/riscv/insns/p_lw_rr.h @@ -0,0 +1 @@ +WRITE_RD(MMU.load_int32(RS1 + sreg_t(RS2))); diff --git a/riscv/insns/p_lw_rrpost.h b/riscv/insns/p_lw_rrpost.h new file mode 100644 index 0000000000..e315c5dfe1 --- /dev/null +++ b/riscv/insns/p_lw_rrpost.h @@ -0,0 +1,2 @@ +WRITE_RD(MMU.load_int32(RS1)); +WRITE_RS1(RS1 + sreg_t(RS2)); diff --git a/riscv/insns/p_mac.h b/riscv/insns/p_mac.h new file mode 100644 index 0000000000..bf5c77a149 --- /dev/null +++ b/riscv/insns/p_mac.h @@ -0,0 +1 @@ +WRITE_RD(sext_xlen(sreg_t(RD) + sext_xlen(sreg_t(RS1) * sreg_t(RS2)))); diff --git a/riscv/insns/p_max.h b/riscv/insns/p_max.h new file mode 100644 index 0000000000..7a7ddcd8e9 --- /dev/null +++ b/riscv/insns/p_max.h @@ -0,0 +1,4 @@ +if(sreg_t(RS1) > sreg_t(RS2)) + WRITE_RD(RS1); +else + WRITE_RD(RS2); diff --git a/riscv/insns/p_maxu.h b/riscv/insns/p_maxu.h new file mode 100644 index 0000000000..8cba177c7b --- /dev/null +++ b/riscv/insns/p_maxu.h @@ -0,0 +1,4 @@ +if(RS1 > RS2) + WRITE_RD(RS1); +else + WRITE_RD(RS2); diff --git a/riscv/insns/p_min.h b/riscv/insns/p_min.h new file mode 100644 index 0000000000..1fc66807ed --- /dev/null +++ b/riscv/insns/p_min.h @@ -0,0 +1,4 @@ +if(sreg_t(RS1) <= sreg_t(RS2)) + WRITE_RD(RS1); +else + WRITE_RD(RS2); diff --git a/riscv/insns/p_minu.h b/riscv/insns/p_minu.h new file mode 100644 index 0000000000..42339df5e6 --- /dev/null +++ b/riscv/insns/p_minu.h @@ -0,0 +1,4 @@ +if(RS1 <= RS2) + WRITE_RD(RS1); +else + WRITE_RD(RS2); diff --git a/riscv/insns/p_msu.h b/riscv/insns/p_msu.h new file mode 100644 index 0000000000..2a42cf05e0 --- /dev/null +++ b/riscv/insns/p_msu.h @@ -0,0 +1 @@ +WRITE_RD(sext_xlen(sreg_t(RD) - sext_xlen(sreg_t(RS1) * sreg_t(RS2)))); diff --git a/riscv/insns/p_sb_irpost.h b/riscv/insns/p_sb_irpost.h new file mode 100644 index 0000000000..9339bc9cac --- /dev/null +++ b/riscv/insns/p_sb_irpost.h @@ -0,0 +1,2 @@ +MMU.store_uint8(RS1, RS2); +WRITE_RS1(RS1 + insn.s_imm()); diff --git a/riscv/insns/p_sb_rr.h b/riscv/insns/p_sb_rr.h new file mode 100644 index 0000000000..73e49727c4 --- /dev/null +++ b/riscv/insns/p_sb_rr.h @@ -0,0 +1 @@ +MMU.store_uint8(RS1 + sreg_t(P_RS3), RS2); diff --git a/riscv/insns/p_sb_rrpost.h b/riscv/insns/p_sb_rrpost.h new file mode 100644 index 0000000000..0442551744 --- /dev/null +++ b/riscv/insns/p_sb_rrpost.h @@ -0,0 +1,2 @@ +MMU.store_uint8(RS1, RS2); +WRITE_RS1(RS1 + sreg_t(P_RS3)); diff --git a/riscv/insns/p_sh_irpost.h b/riscv/insns/p_sh_irpost.h new file mode 100644 index 0000000000..f915c518dc --- /dev/null +++ b/riscv/insns/p_sh_irpost.h @@ -0,0 +1,2 @@ +MMU.store_uint16(RS1, RS2); +WRITE_RS1(RS1 + insn.s_imm()); diff --git a/riscv/insns/p_sh_rr.h b/riscv/insns/p_sh_rr.h new file mode 100644 index 0000000000..f3270bd561 --- /dev/null +++ b/riscv/insns/p_sh_rr.h @@ -0,0 +1 @@ +MMU.store_uint16(RS1 + sreg_t(P_RS3), RS2); diff --git a/riscv/insns/p_sh_rrpost.h b/riscv/insns/p_sh_rrpost.h new file mode 100644 index 0000000000..5043c62876 --- /dev/null +++ b/riscv/insns/p_sh_rrpost.h @@ -0,0 +1,2 @@ +MMU.store_uint16(RS1, RS2); +WRITE_RS1(RS1 + sreg_t(P_RS3)); diff --git a/riscv/insns/p_slet.h b/riscv/insns/p_slet.h new file mode 100644 index 0000000000..82f7cc2db1 --- /dev/null +++ b/riscv/insns/p_slet.h @@ -0,0 +1 @@ +WRITE_RD(sreg_t(RS1) <= sreg_t(RS2)); diff --git a/riscv/insns/p_sletu.h b/riscv/insns/p_sletu.h new file mode 100644 index 0000000000..12547170bb --- /dev/null +++ b/riscv/insns/p_sletu.h @@ -0,0 +1 @@ +WRITE_RD(RS1 <= RS2); diff --git a/riscv/insns/p_sw_irpost.h b/riscv/insns/p_sw_irpost.h new file mode 100644 index 0000000000..7ff0406fea --- /dev/null +++ b/riscv/insns/p_sw_irpost.h @@ -0,0 +1,2 @@ +MMU.store_uint32(RS1, RS2); +WRITE_RS1(RS1 + insn.s_imm()); diff --git a/riscv/insns/p_sw_rr.h b/riscv/insns/p_sw_rr.h new file mode 100644 index 0000000000..6bef97f73d --- /dev/null +++ b/riscv/insns/p_sw_rr.h @@ -0,0 +1 @@ +MMU.store_uint32(RS1 + sreg_t(P_RS3), RS2); diff --git a/riscv/insns/p_sw_rrpost.h b/riscv/insns/p_sw_rrpost.h new file mode 100644 index 0000000000..6382d6d801 --- /dev/null +++ b/riscv/insns/p_sw_rrpost.h @@ -0,0 +1,2 @@ +MMU.store_uint32(RS1, RS2); +WRITE_RS1(RS1 + sreg_t(P_RS3)); diff --git a/riscv/insns/pv_abs_b.h b/riscv/insns/pv_abs_b.h new file mode 100644 index 0000000000..c0bc089cc1 --- /dev/null +++ b/riscv/insns/pv_abs_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > 0 ? RS1_B(i) : -sext8(RS1_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_abs_h.h b/riscv/insns/pv_abs_h.h new file mode 100644 index 0000000000..42ca4ff3ca --- /dev/null +++ b/riscv/insns/pv_abs_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > 0 ? RS1_H(i) : -sext16(RS1_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_add_b.h b/riscv/insns/pv_add_b.h new file mode 100644 index 0000000000..ecae63a049 --- /dev/null +++ b/riscv/insns/pv_add_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) + sext8(RS2_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_add_h.h b/riscv/insns/pv_add_h.h new file mode 100644 index 0000000000..0a78665af0 --- /dev/null +++ b/riscv/insns/pv_add_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) + sext16(RS2_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_add_sc_b.h b/riscv/insns/pv_add_sc_b.h new file mode 100644 index 0000000000..572b61c07f --- /dev/null +++ b/riscv/insns/pv_add_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) + sext8(RS2_B(0)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_add_sc_h.h b/riscv/insns/pv_add_sc_h.h new file mode 100644 index 0000000000..734a911b8f --- /dev/null +++ b/riscv/insns/pv_add_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) + sext16(RS2_H(0)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_add_sci_b.h b/riscv/insns/pv_add_sci_b.h new file mode 100644 index 0000000000..df47f1cb50 --- /dev/null +++ b/riscv/insns/pv_add_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) + insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_add_sci_h.h b/riscv/insns/pv_add_sci_h.h new file mode 100644 index 0000000000..907621c09f --- /dev/null +++ b/riscv/insns/pv_add_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) + insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_and_b.h b/riscv/insns/pv_and_b.h new file mode 100644 index 0000000000..d3711b762d --- /dev/null +++ b/riscv/insns/pv_and_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) & RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_and_h.h b/riscv/insns/pv_and_h.h new file mode 100644 index 0000000000..8bae35685b --- /dev/null +++ b/riscv/insns/pv_and_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) & RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_and_sc_b.h b/riscv/insns/pv_and_sc_b.h new file mode 100644 index 0000000000..b1e6c865e5 --- /dev/null +++ b/riscv/insns/pv_and_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) & RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_and_sc_h.h b/riscv/insns/pv_and_sc_h.h new file mode 100644 index 0000000000..2389d11e1b --- /dev/null +++ b/riscv/insns/pv_and_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) & RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_and_sci_b.h b/riscv/insns/pv_and_sci_b.h new file mode 100644 index 0000000000..7e4e9e0ac5 --- /dev/null +++ b/riscv/insns/pv_and_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) & insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_and_sci_h.h b/riscv/insns/pv_and_sci_h.h new file mode 100644 index 0000000000..fbd57d1163 --- /dev/null +++ b/riscv/insns/pv_and_sci_h.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_H(i) & insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avg_b.h b/riscv/insns/pv_avg_b.h new file mode 100644 index 0000000000..3d5d6d4723 --- /dev/null +++ b/riscv/insns/pv_avg_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(sext8(RS1_B(i)) + sext8(RS2_B(i))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avg_h.h b/riscv/insns/pv_avg_h.h new file mode 100644 index 0000000000..725f2f2e09 --- /dev/null +++ b/riscv/insns/pv_avg_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(sext16(RS1_H(i)) + sext16(RS2_H(i))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avg_sc_b.h b/riscv/insns/pv_avg_sc_b.h new file mode 100644 index 0000000000..0b7d2f8d2c --- /dev/null +++ b/riscv/insns/pv_avg_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(sext8(RS1_B(i)) + sext8(RS2_B(0))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avg_sc_h.h b/riscv/insns/pv_avg_sc_h.h new file mode 100644 index 0000000000..8a6cb5e504 --- /dev/null +++ b/riscv/insns/pv_avg_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(sext16(RS1_H(i)) + sext16(RS2_H(0))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avg_sci_b.h b/riscv/insns/pv_avg_sci_b.h new file mode 100644 index 0000000000..ff67065e34 --- /dev/null +++ b/riscv/insns/pv_avg_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(sext8(RS1_B(i)) + insn.p_simm6()) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avg_sci_h.h b/riscv/insns/pv_avg_sci_h.h new file mode 100644 index 0000000000..f7deefd25b --- /dev/null +++ b/riscv/insns/pv_avg_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(sext16(RS1_H(i)) + insn.p_simm6()) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avgu_b.h b/riscv/insns/pv_avgu_b.h new file mode 100644 index 0000000000..435c4d22cc --- /dev/null +++ b/riscv/insns/pv_avgu_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(zext8(RS1_B(i)) + zext8(RS2_B(i))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avgu_h.h b/riscv/insns/pv_avgu_h.h new file mode 100644 index 0000000000..3fdbaf4ddb --- /dev/null +++ b/riscv/insns/pv_avgu_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(zext16(RS1_H(i)) + zext16(RS2_H(i))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avgu_sc_b.h b/riscv/insns/pv_avgu_sc_b.h new file mode 100644 index 0000000000..47ca3888bc --- /dev/null +++ b/riscv/insns/pv_avgu_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(zext8(RS1_B(i)) + zext8(RS2_B(0))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avgu_sc_h.h b/riscv/insns/pv_avgu_sc_h.h new file mode 100644 index 0000000000..0bf92f93b3 --- /dev/null +++ b/riscv/insns/pv_avgu_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(zext16(RS1_H(i)) + zext16(RS2_H(0))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avgu_sci_b.h b/riscv/insns/pv_avgu_sci_b.h new file mode 100644 index 0000000000..fbc0dff921 --- /dev/null +++ b/riscv/insns/pv_avgu_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(zext8(RS1_B(i)) + insn.p_zimm6()) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avgu_sci_h.h b/riscv/insns/pv_avgu_sci_h.h new file mode 100644 index 0000000000..dd8cd35442 --- /dev/null +++ b/riscv/insns/pv_avgu_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(zext16(RS1_H(i)) + insn.p_zimm6()) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_dotsp_b.h b/riscv/insns/pv_dotsp_b.h new file mode 100644 index 0000000000..93b7233cca --- /dev/null +++ b/riscv/insns/pv_dotsp_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotsp_h.h b/riscv/insns/pv_dotsp_h.h new file mode 100644 index 0000000000..9feed35efd --- /dev/null +++ b/riscv/insns/pv_dotsp_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotsp_sc_b.h b/riscv/insns/pv_dotsp_sc_b.h new file mode 100644 index 0000000000..cef11d5e77 --- /dev/null +++ b/riscv/insns/pv_dotsp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotsp_sc_h.h b/riscv/insns/pv_dotsp_sc_h.h new file mode 100644 index 0000000000..ef558d39f0 --- /dev/null +++ b/riscv/insns/pv_dotsp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotsp_sci_b.h b/riscv/insns/pv_dotsp_sci_b.h new file mode 100644 index 0000000000..3470fd55a3 --- /dev/null +++ b/riscv/insns/pv_dotsp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotsp_sci_h.h b/riscv/insns/pv_dotsp_sci_h.h new file mode 100644 index 0000000000..97e30eb290 --- /dev/null +++ b/riscv/insns/pv_dotsp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotup_b.h b/riscv/insns/pv_dotup_b.h new file mode 100644 index 0000000000..fa77f36678 --- /dev/null +++ b/riscv/insns/pv_dotup_b.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotup_h.h b/riscv/insns/pv_dotup_h.h new file mode 100644 index 0000000000..4e170b238b --- /dev/null +++ b/riscv/insns/pv_dotup_h.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotup_sc_b.h b/riscv/insns/pv_dotup_sc_b.h new file mode 100644 index 0000000000..a581d0162a --- /dev/null +++ b/riscv/insns/pv_dotup_sc_b.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotup_sc_h.h b/riscv/insns/pv_dotup_sc_h.h new file mode 100644 index 0000000000..b78762a871 --- /dev/null +++ b/riscv/insns/pv_dotup_sc_h.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotup_sci_b.h b/riscv/insns/pv_dotup_sci_b.h new file mode 100644 index 0000000000..0dedb1caf4 --- /dev/null +++ b/riscv/insns/pv_dotup_sci_b.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * insn.p_zimm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotup_sci_h.h b/riscv/insns/pv_dotup_sci_h.h new file mode 100644 index 0000000000..64a36d5690 --- /dev/null +++ b/riscv/insns/pv_dotup_sci_h.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * insn.p_zimm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotusp_b.h b/riscv/insns/pv_dotusp_b.h new file mode 100644 index 0000000000..1cdfc2f2c2 --- /dev/null +++ b/riscv/insns/pv_dotusp_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotusp_h.h b/riscv/insns/pv_dotusp_h.h new file mode 100644 index 0000000000..81968a14d6 --- /dev/null +++ b/riscv/insns/pv_dotusp_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotusp_sc_b.h b/riscv/insns/pv_dotusp_sc_b.h new file mode 100644 index 0000000000..d562a7d4d3 --- /dev/null +++ b/riscv/insns/pv_dotusp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotusp_sc_h.h b/riscv/insns/pv_dotusp_sc_h.h new file mode 100644 index 0000000000..3815c3721f --- /dev/null +++ b/riscv/insns/pv_dotusp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotusp_sci_b.h b/riscv/insns/pv_dotusp_sci_b.h new file mode 100644 index 0000000000..92c229540e --- /dev/null +++ b/riscv/insns/pv_dotusp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotusp_sci_h.h b/riscv/insns/pv_dotusp_sci_h.h new file mode 100644 index 0000000000..8f91a89a33 --- /dev/null +++ b/riscv/insns/pv_dotusp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_extract_b.h b/riscv/insns/pv_extract_b.h new file mode 100644 index 0000000000..fce80bbb6e --- /dev/null +++ b/riscv/insns/pv_extract_b.h @@ -0,0 +1 @@ +WRITE_RD(sext8(RS1_B(insn.p_zimm6() & 0x03))); diff --git a/riscv/insns/pv_extract_h.h b/riscv/insns/pv_extract_h.h new file mode 100644 index 0000000000..ee35393d49 --- /dev/null +++ b/riscv/insns/pv_extract_h.h @@ -0,0 +1 @@ +WRITE_RD(sext16(RS1_H(insn.p_zimm6() & 0x01))); diff --git a/riscv/insns/pv_extractu_b.h b/riscv/insns/pv_extractu_b.h new file mode 100644 index 0000000000..c240233874 --- /dev/null +++ b/riscv/insns/pv_extractu_b.h @@ -0,0 +1 @@ +WRITE_RD(zext8(RS1_B(insn.p_zimm6() & 0x03))); diff --git a/riscv/insns/pv_extractu_h.h b/riscv/insns/pv_extractu_h.h new file mode 100644 index 0000000000..90b679afd0 --- /dev/null +++ b/riscv/insns/pv_extractu_h.h @@ -0,0 +1 @@ +WRITE_RD(zext16(RS1_H(insn.p_zimm6() & 0x01))); diff --git a/riscv/insns/pv_insert_b.h b/riscv/insns/pv_insert_b.h new file mode 100644 index 0000000000..5575e79671 --- /dev/null +++ b/riscv/insns/pv_insert_b.h @@ -0,0 +1,6 @@ +uint32_t ins_rd = RD; +uint8_t i = insn.p_zimm6() & 0x03; /* select to which rd half to write the 16-bit value */ + +ins_rd = (ins_rd & ~(0xFF << ((xlen >> 2) * i))) | ((RS1_H(0) & 0xFF) << ((xlen >> 2) * i)); + +WRITE_RD(sext_xlen(ins_rd)); diff --git a/riscv/insns/pv_insert_h.h b/riscv/insns/pv_insert_h.h new file mode 100644 index 0000000000..eccb0eda6d --- /dev/null +++ b/riscv/insns/pv_insert_h.h @@ -0,0 +1,6 @@ +uint32_t ins_rd = RD; +uint8_t i = insn.p_zimm6() & 0x01; /* select to which rd half to write the 16-bit value */ + +ins_rd = (ins_rd & ~(0xFFFF << ((xlen >> 1) * i))) | ((RS1_H(0) & 0xFFFF) << ((xlen >> 1) * i)); + +WRITE_RD(sext_xlen(ins_rd)); diff --git a/riscv/insns/pv_max_b.h b/riscv/insns/pv_max_b.h new file mode 100644 index 0000000000..4dc3e6be89 --- /dev/null +++ b/riscv/insns/pv_max_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > sext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_max_h.h b/riscv/insns/pv_max_h.h new file mode 100644 index 0000000000..c65a32da64 --- /dev/null +++ b/riscv/insns/pv_max_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > sext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_max_sc_b.h b/riscv/insns/pv_max_sc_b.h new file mode 100644 index 0000000000..896087f623 --- /dev/null +++ b/riscv/insns/pv_max_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > sext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_max_sc_h.h b/riscv/insns/pv_max_sc_h.h new file mode 100644 index 0000000000..fd55fb49b3 --- /dev/null +++ b/riscv/insns/pv_max_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > sext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_max_sci_b.h b/riscv/insns/pv_max_sci_b.h new file mode 100644 index 0000000000..5e06669faa --- /dev/null +++ b/riscv/insns/pv_max_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > insn.p_simm6() ? RS1_B(i) : insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_max_sci_h.h b/riscv/insns/pv_max_sci_h.h new file mode 100644 index 0000000000..ce1df2ee1d --- /dev/null +++ b/riscv/insns/pv_max_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > insn.p_simm6() ? RS1_H(i) : insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_maxu_b.h b/riscv/insns/pv_maxu_b.h new file mode 100644 index 0000000000..5821c17266 --- /dev/null +++ b/riscv/insns/pv_maxu_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) > zext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_maxu_h.h b/riscv/insns/pv_maxu_h.h new file mode 100644 index 0000000000..3e587c3c99 --- /dev/null +++ b/riscv/insns/pv_maxu_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) > zext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_maxu_sc_b.h b/riscv/insns/pv_maxu_sc_b.h new file mode 100644 index 0000000000..c297b87ab2 --- /dev/null +++ b/riscv/insns/pv_maxu_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) > zext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_maxu_sc_h.h b/riscv/insns/pv_maxu_sc_h.h new file mode 100644 index 0000000000..fbb5c7feb9 --- /dev/null +++ b/riscv/insns/pv_maxu_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) > zext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_maxu_sci_b.h b/riscv/insns/pv_maxu_sci_b.h new file mode 100644 index 0000000000..ab5f6e5f98 --- /dev/null +++ b/riscv/insns/pv_maxu_sci_b.h @@ -0,0 +1,10 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) > insn.p_zimm6() ? RS1_B(i) : insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); + diff --git a/riscv/insns/pv_maxu_sci_h.h b/riscv/insns/pv_maxu_sci_h.h new file mode 100644 index 0000000000..9aaf9effc6 --- /dev/null +++ b/riscv/insns/pv_maxu_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) > insn.p_zimm6() ? RS1_H(i) : insn.p_zimm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_min_b.h b/riscv/insns/pv_min_b.h new file mode 100644 index 0000000000..1b9104b55f --- /dev/null +++ b/riscv/insns/pv_min_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) <= sext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_min_h.h b/riscv/insns/pv_min_h.h new file mode 100644 index 0000000000..bbc83caea8 --- /dev/null +++ b/riscv/insns/pv_min_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) <= sext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_min_sc_b.h b/riscv/insns/pv_min_sc_b.h new file mode 100644 index 0000000000..1d2aac507b --- /dev/null +++ b/riscv/insns/pv_min_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) <= sext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_min_sc_h.h b/riscv/insns/pv_min_sc_h.h new file mode 100644 index 0000000000..b2b8ab1109 --- /dev/null +++ b/riscv/insns/pv_min_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) <= sext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_min_sci_b.h b/riscv/insns/pv_min_sci_b.h new file mode 100644 index 0000000000..031b51f362 --- /dev/null +++ b/riscv/insns/pv_min_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) <= insn.p_simm6() ? RS1_B(i) : insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_min_sci_h.h b/riscv/insns/pv_min_sci_h.h new file mode 100644 index 0000000000..d007e06620 --- /dev/null +++ b/riscv/insns/pv_min_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) <= insn.p_simm6() ? RS1_H(i) : insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_minu_b.h b/riscv/insns/pv_minu_b.h new file mode 100644 index 0000000000..bbb92ca55a --- /dev/null +++ b/riscv/insns/pv_minu_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) <= zext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_minu_h.h b/riscv/insns/pv_minu_h.h new file mode 100644 index 0000000000..fa7b0a4e21 --- /dev/null +++ b/riscv/insns/pv_minu_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) <= zext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_minu_sc_b.h b/riscv/insns/pv_minu_sc_b.h new file mode 100644 index 0000000000..566bcce6da --- /dev/null +++ b/riscv/insns/pv_minu_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) <= zext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_minu_sc_h.h b/riscv/insns/pv_minu_sc_h.h new file mode 100644 index 0000000000..7471d96780 --- /dev/null +++ b/riscv/insns/pv_minu_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) <= zext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_minu_sci_b.h b/riscv/insns/pv_minu_sci_b.h new file mode 100644 index 0000000000..75c43787c3 --- /dev/null +++ b/riscv/insns/pv_minu_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) <= insn.p_zimm6() ? RS1_B(i) : insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_minu_sci_h.h b/riscv/insns/pv_minu_sci_h.h new file mode 100644 index 0000000000..c665e92f40 --- /dev/null +++ b/riscv/insns/pv_minu_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) <= insn.p_zimm6() ? RS1_H(i) : insn.p_zimm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_or_b.h b/riscv/insns/pv_or_b.h new file mode 100644 index 0000000000..d27a6e5d17 --- /dev/null +++ b/riscv/insns/pv_or_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) | RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_or_h.h b/riscv/insns/pv_or_h.h new file mode 100644 index 0000000000..65b112893e --- /dev/null +++ b/riscv/insns/pv_or_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) | RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_or_sc_b.h b/riscv/insns/pv_or_sc_b.h new file mode 100644 index 0000000000..cac508744b --- /dev/null +++ b/riscv/insns/pv_or_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) | RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_or_sc_h.h b/riscv/insns/pv_or_sc_h.h new file mode 100644 index 0000000000..e6f567cf3b --- /dev/null +++ b/riscv/insns/pv_or_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) | RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_or_sci_b.h b/riscv/insns/pv_or_sci_b.h new file mode 100644 index 0000000000..0cb7b5cb6c --- /dev/null +++ b/riscv/insns/pv_or_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) | insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_or_sci_h.h b/riscv/insns/pv_or_sci_h.h new file mode 100644 index 0000000000..e95922e1eb --- /dev/null +++ b/riscv/insns/pv_or_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) | insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sdotsp_b.h b/riscv/insns/pv_sdotsp_b.h new file mode 100644 index 0000000000..812e3d4369 --- /dev/null +++ b/riscv/insns/pv_sdotsp_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotsp_h.h b/riscv/insns/pv_sdotsp_h.h new file mode 100644 index 0000000000..9ccfae939d --- /dev/null +++ b/riscv/insns/pv_sdotsp_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotsp_sc_b.h b/riscv/insns/pv_sdotsp_sc_b.h new file mode 100644 index 0000000000..e665a669f2 --- /dev/null +++ b/riscv/insns/pv_sdotsp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotsp_sc_h.h b/riscv/insns/pv_sdotsp_sc_h.h new file mode 100644 index 0000000000..fa1ca93fee --- /dev/null +++ b/riscv/insns/pv_sdotsp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotsp_sci_b.h b/riscv/insns/pv_sdotsp_sci_b.h new file mode 100644 index 0000000000..31aab1fe54 --- /dev/null +++ b/riscv/insns/pv_sdotsp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotsp_sci_h.h b/riscv/insns/pv_sdotsp_sci_h.h new file mode 100644 index 0000000000..151d16a2e9 --- /dev/null +++ b/riscv/insns/pv_sdotsp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotup_b.h b/riscv/insns/pv_sdotup_b.h new file mode 100644 index 0000000000..82e47b4f82 --- /dev/null +++ b/riscv/insns/pv_sdotup_b.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotup_h.h b/riscv/insns/pv_sdotup_h.h new file mode 100644 index 0000000000..de77009a07 --- /dev/null +++ b/riscv/insns/pv_sdotup_h.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotup_sc_b.h b/riscv/insns/pv_sdotup_sc_b.h new file mode 100644 index 0000000000..717fffc119 --- /dev/null +++ b/riscv/insns/pv_sdotup_sc_b.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotup_sc_h.h b/riscv/insns/pv_sdotup_sc_h.h new file mode 100644 index 0000000000..ecf0485664 --- /dev/null +++ b/riscv/insns/pv_sdotup_sc_h.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotup_sci_b.h b/riscv/insns/pv_sdotup_sci_b.h new file mode 100644 index 0000000000..bd4d850e6d --- /dev/null +++ b/riscv/insns/pv_sdotup_sci_b.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * insn.p_zimm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotup_sci_h.h b/riscv/insns/pv_sdotup_sci_h.h new file mode 100644 index 0000000000..145e73717f --- /dev/null +++ b/riscv/insns/pv_sdotup_sci_h.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * insn.p_zimm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotusp_b.h b/riscv/insns/pv_sdotusp_b.h new file mode 100644 index 0000000000..05d268ed21 --- /dev/null +++ b/riscv/insns/pv_sdotusp_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotusp_h.h b/riscv/insns/pv_sdotusp_h.h new file mode 100644 index 0000000000..fdc550db1c --- /dev/null +++ b/riscv/insns/pv_sdotusp_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotusp_sc_b.h b/riscv/insns/pv_sdotusp_sc_b.h new file mode 100644 index 0000000000..2840cd1483 --- /dev/null +++ b/riscv/insns/pv_sdotusp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotusp_sc_h.h b/riscv/insns/pv_sdotusp_sc_h.h new file mode 100644 index 0000000000..ca4c25ac10 --- /dev/null +++ b/riscv/insns/pv_sdotusp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotusp_sci_b.h b/riscv/insns/pv_sdotusp_sci_b.h new file mode 100644 index 0000000000..d6823f83a9 --- /dev/null +++ b/riscv/insns/pv_sdotusp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotusp_sci_h.h b/riscv/insns/pv_sdotusp_sci_h.h new file mode 100644 index 0000000000..42c4fbe88b --- /dev/null +++ b/riscv/insns/pv_sdotusp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_shuffle2_b.h b/riscv/insns/pv_shuffle2_b.h new file mode 100644 index 0000000000..8dd4e99945 --- /dev/null +++ b/riscv/insns/pv_shuffle2_b.h @@ -0,0 +1,14 @@ +uint8_t src_sel; // select rd or rs1 as source (bit [2] of second operand) +uint8_t byte_sel; // select which byte from source (bits [1:0] of second operand) +uint8_t source; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + byte_sel = RS2_B(i) & 0x03; // bits [1:0] of RS2_B(i) + src_sel = (RS2_B(i) >> 2) & 0x01; // bit [2] of RS2_B(i) + source = src_sel ? RS1_B(byte_sel) : RD_B(byte_sel); + simd_rd <<= 8; + simd_rd += (uint32_t)source & 0x000000FF; +} + +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_shuffle2_h.h b/riscv/insns/pv_shuffle2_h.h new file mode 100644 index 0000000000..362a4bdc77 --- /dev/null +++ b/riscv/insns/pv_shuffle2_h.h @@ -0,0 +1,14 @@ +uint8_t src_sel; // select rd or rs1 as source (bit [1] of second operand) +uint8_t half_sel; // select which half from source (bit [0] of second operand) +uint16_t source; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + half_sel = RS2_H(i) & 0x01; // bit [0] of RS2_H(i) + src_sel = (RS2_H(i) >> 1) & 0x01; // bit [1] of RS2_H(i) + source = src_sel ? RS1_H(half_sel) : RD_H(half_sel); + simd_rd <<= 16; + simd_rd += (uint32_t)source & 0x0000FFFF; +} + +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sll_b.h b/riscv/insns/pv_sll_b.h new file mode 100644 index 0000000000..ca8bcd6883 --- /dev/null +++ b/riscv/insns/pv_sll_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) << (zext8(RS2_B(i)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sll_h.h b/riscv/insns/pv_sll_h.h new file mode 100644 index 0000000000..cb9200caca --- /dev/null +++ b/riscv/insns/pv_sll_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) << (zext16(RS2_H(i)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sll_sc_b.h b/riscv/insns/pv_sll_sc_b.h new file mode 100644 index 0000000000..d320519982 --- /dev/null +++ b/riscv/insns/pv_sll_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) << (zext8(RS2_B(0)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sll_sc_h.h b/riscv/insns/pv_sll_sc_h.h new file mode 100644 index 0000000000..e84cf0214a --- /dev/null +++ b/riscv/insns/pv_sll_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) << (zext16(RS2_H(0)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sll_sci_b.h b/riscv/insns/pv_sll_sci_b.h new file mode 100644 index 0000000000..8e637bea82 --- /dev/null +++ b/riscv/insns/pv_sll_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) << (insn.p_simm6() & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sll_sci_h.h b/riscv/insns/pv_sll_sci_h.h new file mode 100644 index 0000000000..ec94a2e288 --- /dev/null +++ b/riscv/insns/pv_sll_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) << (insn.p_simm6() & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sra_b.h b/riscv/insns/pv_sra_b.h new file mode 100644 index 0000000000..9525a0afc1 --- /dev/null +++ b/riscv/insns/pv_sra_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) >> (zext8(RS2_B(i)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sra_h.h b/riscv/insns/pv_sra_h.h new file mode 100644 index 0000000000..b3e8a0b944 --- /dev/null +++ b/riscv/insns/pv_sra_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) >> (zext16(RS2_H(i)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sra_sc_b.h b/riscv/insns/pv_sra_sc_b.h new file mode 100644 index 0000000000..9442d92804 --- /dev/null +++ b/riscv/insns/pv_sra_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) >> (zext8(RS2_B(0)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sra_sc_h.h b/riscv/insns/pv_sra_sc_h.h new file mode 100644 index 0000000000..1e012f750e --- /dev/null +++ b/riscv/insns/pv_sra_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) >> (zext16(RS2_H(0)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sra_sci_b.h b/riscv/insns/pv_sra_sci_b.h new file mode 100644 index 0000000000..3dafb3cb51 --- /dev/null +++ b/riscv/insns/pv_sra_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) >> (insn.p_simm6() & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sra_sci_h.h b/riscv/insns/pv_sra_sci_h.h new file mode 100644 index 0000000000..4f56d0e5ee --- /dev/null +++ b/riscv/insns/pv_sra_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) >> (insn.p_simm6() & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_srl_b.h b/riscv/insns/pv_srl_b.h new file mode 100644 index 0000000000..37be2e23aa --- /dev/null +++ b/riscv/insns/pv_srl_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) >> (zext8(RS2_B(i)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_srl_h.h b/riscv/insns/pv_srl_h.h new file mode 100644 index 0000000000..1b35116d3b --- /dev/null +++ b/riscv/insns/pv_srl_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) >> (zext16(RS2_H(i)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_srl_sc_b.h b/riscv/insns/pv_srl_sc_b.h new file mode 100644 index 0000000000..4b04ab6f74 --- /dev/null +++ b/riscv/insns/pv_srl_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) >> (zext8(RS2_B(0)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_srl_sc_h.h b/riscv/insns/pv_srl_sc_h.h new file mode 100644 index 0000000000..f49f784db8 --- /dev/null +++ b/riscv/insns/pv_srl_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) >> (zext16(RS2_H(0)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_srl_sci_b.h b/riscv/insns/pv_srl_sci_b.h new file mode 100644 index 0000000000..b0b38f2a90 --- /dev/null +++ b/riscv/insns/pv_srl_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) >> (insn.p_simm6() & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_srl_sci_h.h b/riscv/insns/pv_srl_sci_h.h new file mode 100644 index 0000000000..5aba29cc91 --- /dev/null +++ b/riscv/insns/pv_srl_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) >> (insn.p_simm6() & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sub_b.h b/riscv/insns/pv_sub_b.h new file mode 100644 index 0000000000..2ce1fe224f --- /dev/null +++ b/riscv/insns/pv_sub_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) - sext8(RS2_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sub_h.h b/riscv/insns/pv_sub_h.h new file mode 100644 index 0000000000..4ec5137268 --- /dev/null +++ b/riscv/insns/pv_sub_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) - sext16(RS2_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sub_sc_b.h b/riscv/insns/pv_sub_sc_b.h new file mode 100644 index 0000000000..3375e64c24 --- /dev/null +++ b/riscv/insns/pv_sub_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) - sext8(RS2_B(0)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sub_sc_h.h b/riscv/insns/pv_sub_sc_h.h new file mode 100644 index 0000000000..4bb12839c4 --- /dev/null +++ b/riscv/insns/pv_sub_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) - sext16(RS2_H(0)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sub_sci_b.h b/riscv/insns/pv_sub_sci_b.h new file mode 100644 index 0000000000..20cc941239 --- /dev/null +++ b/riscv/insns/pv_sub_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) - insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sub_sci_h.h b/riscv/insns/pv_sub_sci_h.h new file mode 100644 index 0000000000..50b11a6653 --- /dev/null +++ b/riscv/insns/pv_sub_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) - insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_xor_b.h b/riscv/insns/pv_xor_b.h new file mode 100644 index 0000000000..2fc203b4d6 --- /dev/null +++ b/riscv/insns/pv_xor_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) ^ RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_xor_h.h b/riscv/insns/pv_xor_h.h new file mode 100644 index 0000000000..56cf0b7c9a --- /dev/null +++ b/riscv/insns/pv_xor_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) ^ RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_xor_sc_b.h b/riscv/insns/pv_xor_sc_b.h new file mode 100644 index 0000000000..ed3d5075ab --- /dev/null +++ b/riscv/insns/pv_xor_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) ^ RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_xor_sc_h.h b/riscv/insns/pv_xor_sc_h.h new file mode 100644 index 0000000000..9d632f367b --- /dev/null +++ b/riscv/insns/pv_xor_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) ^ RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_xor_sci_b.h b/riscv/insns/pv_xor_sci_b.h new file mode 100644 index 0000000000..7ecbf94fc4 --- /dev/null +++ b/riscv/insns/pv_xor_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) ^ insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_xor_sci_h.h b/riscv/insns/pv_xor_sci_h.h new file mode 100644 index 0000000000..0a02ced601 --- /dev/null +++ b/riscv/insns/pv_xor_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) ^ insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/sc_d.h b/riscv/insns/sc_d.h index aeeabd350d..54023ed449 100644 --- a/riscv/insns/sc_d.h +++ b/riscv/insns/sc_d.h @@ -1,11 +1,11 @@ require_extension('A'); require_rv64; -if (MMU.check_load_reservation(RS1)) -{ + +bool have_reservation = MMU.check_load_reservation(RS1, 8); + +if (have_reservation) MMU.store_uint64(RS1, RS2); - WRITE_RD(0); -} -else - WRITE_RD(1); MMU.yield_load_reservation(); + +WRITE_RD(!have_reservation); diff --git a/riscv/insns/sc_w.h b/riscv/insns/sc_w.h index 4b4be50584..e430dcb2e5 100644 --- a/riscv/insns/sc_w.h +++ b/riscv/insns/sc_w.h @@ -1,10 +1,10 @@ require_extension('A'); -if (MMU.check_load_reservation(RS1)) -{ + +bool have_reservation = MMU.check_load_reservation(RS1, 4); + +if (have_reservation) MMU.store_uint32(RS1, RS2); - WRITE_RD(0); -} -else - WRITE_RD(1); MMU.yield_load_reservation(); + +WRITE_RD(!have_reservation); diff --git a/riscv/insns/sfence_vma.h b/riscv/insns/sfence_vma.h index fc4625f0bf..ff949c7fad 100644 --- a/riscv/insns/sfence_vma.h +++ b/riscv/insns/sfence_vma.h @@ -1,2 +1,8 @@ -require_privilege(get_field(STATE.mstatus, MSTATUS_TVM) ? PRV_M : PRV_S); +require_extension('S'); +if (STATE.v) { + if (STATE.prv == PRV_U || get_field(STATE.hstatus, HSTATUS_VTVM)) + require_novirt(); +} else { + require_privilege(get_field(STATE.mstatus, MSTATUS_TVM) ? PRV_M : PRV_S); +} MMU.flush_tlb(); diff --git a/riscv/insns/sret.h b/riscv/insns/sret.h index ae841de93f..315f4f0ec2 100644 --- a/riscv/insns/sret.h +++ b/riscv/insns/sret.h @@ -1,9 +1,20 @@ -require_privilege(get_field(STATE.mstatus, MSTATUS_TSR) ? PRV_M : PRV_S); -set_pc_and_serialize(p->get_state()->sepc); +require_extension('S'); +if (STATE.v) { + if (STATE.prv == PRV_U || get_field(STATE.hstatus, HSTATUS_VTSR)) + require_novirt(); +} else { + require_privilege(get_field(STATE.mstatus, MSTATUS_TSR) ? PRV_M : PRV_S); +} +reg_t next_pc = (STATE.v) ? p->get_state()->vsepc : p->get_state()->sepc; +set_pc_and_serialize(next_pc); reg_t s = STATE.mstatus; reg_t prev_prv = get_field(s, MSTATUS_SPP); s = set_field(s, MSTATUS_SIE, get_field(s, MSTATUS_SPIE)); s = set_field(s, MSTATUS_SPIE, 1); s = set_field(s, MSTATUS_SPP, PRV_U); -p->set_privilege(prev_prv); p->set_csr(CSR_MSTATUS, s); +p->set_privilege(prev_prv); +if (!STATE.v) { + reg_t prev_virt = get_field(STATE.hstatus, HSTATUS_SPV); + p->set_virt(prev_virt); +} diff --git a/riscv/insns/vaadd_vv.h b/riscv/insns/vaadd_vv.h new file mode 100644 index 0000000000..0a14467f61 --- /dev/null +++ b/riscv/insns/vaadd_vv.h @@ -0,0 +1,2 @@ +// vaadd.vv vd, vs2, vs1 +VI_VVX_LOOP_AVG(vs1, +, true); diff --git a/riscv/insns/vaadd_vx.h b/riscv/insns/vaadd_vx.h new file mode 100644 index 0000000000..ae00d8e46c --- /dev/null +++ b/riscv/insns/vaadd_vx.h @@ -0,0 +1,2 @@ +// vaadd.vx vd, vs2, rs1 +VI_VVX_LOOP_AVG(rs1, +, false); diff --git a/riscv/insns/vaaddu_vv.h b/riscv/insns/vaaddu_vv.h new file mode 100644 index 0000000000..2f3fe745e7 --- /dev/null +++ b/riscv/insns/vaaddu_vv.h @@ -0,0 +1,2 @@ +// vaaddu.vv vd, vs2, vs1 +VI_VVX_ULOOP_AVG(vs1, +, true); diff --git a/riscv/insns/vaaddu_vx.h b/riscv/insns/vaaddu_vx.h new file mode 100644 index 0000000000..0e9fddcb1b --- /dev/null +++ b/riscv/insns/vaaddu_vx.h @@ -0,0 +1,2 @@ +// vaaddu.vx vd, vs2, rs1 +VI_VVX_ULOOP_AVG(rs1, +, false); diff --git a/riscv/insns/vadc_vim.h b/riscv/insns/vadc_vim.h new file mode 100644 index 0000000000..824fac970e --- /dev/null +++ b/riscv/insns/vadc_vim.h @@ -0,0 +1,10 @@ +// vadc.vim vd, vs2, simm5, v0 +VI_XI_LOOP_WITH_CARRY +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & simm5) + (op_mask & vs2) + carry; + vd = res; +}) diff --git a/riscv/insns/vadc_vvm.h b/riscv/insns/vadc_vvm.h new file mode 100644 index 0000000000..2d6803a8b4 --- /dev/null +++ b/riscv/insns/vadc_vvm.h @@ -0,0 +1,10 @@ +// vadc.vvm vd, vs2, rs1, v0 +VI_VV_LOOP_WITH_CARRY +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & vs1) + (op_mask & vs2) + carry; + vd = res; +}) diff --git a/riscv/insns/vadc_vxm.h b/riscv/insns/vadc_vxm.h new file mode 100644 index 0000000000..0d2d052cca --- /dev/null +++ b/riscv/insns/vadc_vxm.h @@ -0,0 +1,10 @@ +// vadc.vxm vd, vs2, rs1, v0 +VI_XI_LOOP_WITH_CARRY +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & rs1) + (op_mask & vs2) + carry; + vd = res; +}) diff --git a/riscv/insns/vadd_vi.h b/riscv/insns/vadd_vi.h new file mode 100644 index 0000000000..45fc6b74e7 --- /dev/null +++ b/riscv/insns/vadd_vi.h @@ -0,0 +1,5 @@ +// vadd.vi vd, simm5, vs2, vm +VI_VI_LOOP +({ + vd = simm5 + vs2; +}) diff --git a/riscv/insns/vadd_vv.h b/riscv/insns/vadd_vv.h new file mode 100644 index 0000000000..45c6bdcba7 --- /dev/null +++ b/riscv/insns/vadd_vv.h @@ -0,0 +1,5 @@ +// vadd.vv vd, vs1, vs2, vm +VI_VV_LOOP +({ + vd = vs1 + vs2; +}) diff --git a/riscv/insns/vadd_vx.h b/riscv/insns/vadd_vx.h new file mode 100644 index 0000000000..33e72ee495 --- /dev/null +++ b/riscv/insns/vadd_vx.h @@ -0,0 +1,5 @@ +// vadd.vx vd, rs1, vs2, vm +VI_VX_LOOP +({ + vd = rs1 + vs2; +}) diff --git a/riscv/insns/vamoaddei16_v.h b/riscv/insns/vamoaddei16_v.h new file mode 100644 index 0000000000..3cb3db709d --- /dev/null +++ b/riscv/insns/vamoaddei16_v.h @@ -0,0 +1,2 @@ +//vamoadde.v vd, (rs1), vs2, vd +VI_AMO({ return lhs + vs3; }, uint, e16); diff --git a/riscv/insns/vamoaddei32_v.h b/riscv/insns/vamoaddei32_v.h new file mode 100644 index 0000000000..2bd77fcbd2 --- /dev/null +++ b/riscv/insns/vamoaddei32_v.h @@ -0,0 +1,2 @@ +//vamoadde.v vd, (rs1), vs2, vd +VI_AMO({ return lhs + vs3; }, uint, e32); diff --git a/riscv/insns/vamoaddei64_v.h b/riscv/insns/vamoaddei64_v.h new file mode 100644 index 0000000000..79ca748205 --- /dev/null +++ b/riscv/insns/vamoaddei64_v.h @@ -0,0 +1,2 @@ +//vamoadde.v vd, (rs1), vs2, vd +VI_AMO({ return lhs + vs3; }, uint, e64); diff --git a/riscv/insns/vamoaddei8_v.h b/riscv/insns/vamoaddei8_v.h new file mode 100644 index 0000000000..06b8c79302 --- /dev/null +++ b/riscv/insns/vamoaddei8_v.h @@ -0,0 +1,2 @@ +//vamoadde.v vd, (rs1), vs2, vd +VI_AMO({ return lhs + vs3; }, uint, e8); diff --git a/riscv/insns/vamoandei16_v.h b/riscv/insns/vamoandei16_v.h new file mode 100644 index 0000000000..be119497f3 --- /dev/null +++ b/riscv/insns/vamoandei16_v.h @@ -0,0 +1,2 @@ +//vamoande.v vd, (rs1), vs2, vd +VI_AMO({ return lhs & vs3; }, uint, e16); diff --git a/riscv/insns/vamoandei32_v.h b/riscv/insns/vamoandei32_v.h new file mode 100644 index 0000000000..71506704ff --- /dev/null +++ b/riscv/insns/vamoandei32_v.h @@ -0,0 +1,2 @@ +//vamoande.v vd, (rs1), vs2, vd +VI_AMO({ return lhs & vs3; }, uint, e32); diff --git a/riscv/insns/vamoandei64_v.h b/riscv/insns/vamoandei64_v.h new file mode 100644 index 0000000000..3efae3b59f --- /dev/null +++ b/riscv/insns/vamoandei64_v.h @@ -0,0 +1,2 @@ +//vamoande.v vd, (rs1), vs2, vd +VI_AMO({ return lhs & vs3; }, uint, e64); diff --git a/riscv/insns/vamoandei8_v.h b/riscv/insns/vamoandei8_v.h new file mode 100644 index 0000000000..c47645d3e0 --- /dev/null +++ b/riscv/insns/vamoandei8_v.h @@ -0,0 +1,2 @@ +//vamoande.v vd, (rs1), vs2, vd +VI_AMO({ return lhs & vs3; }, uint, e8); diff --git a/riscv/insns/vamomaxei16_v.h b/riscv/insns/vamomaxei16_v.h new file mode 100644 index 0000000000..ca67893e99 --- /dev/null +++ b/riscv/insns/vamomaxei16_v.h @@ -0,0 +1,2 @@ +//vamomaxe.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3; }, int, e16); diff --git a/riscv/insns/vamomaxei32_v.h b/riscv/insns/vamomaxei32_v.h new file mode 100644 index 0000000000..b6823cd042 --- /dev/null +++ b/riscv/insns/vamomaxei32_v.h @@ -0,0 +1,2 @@ +//vamomaxe.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3; }, int, e32); diff --git a/riscv/insns/vamomaxei64_v.h b/riscv/insns/vamomaxei64_v.h new file mode 100644 index 0000000000..46e8a3bbd1 --- /dev/null +++ b/riscv/insns/vamomaxei64_v.h @@ -0,0 +1,2 @@ +//vamomaxe.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3; }, int, e64); diff --git a/riscv/insns/vamomaxei8_v.h b/riscv/insns/vamomaxei8_v.h new file mode 100644 index 0000000000..9697b3a4cb --- /dev/null +++ b/riscv/insns/vamomaxei8_v.h @@ -0,0 +1,2 @@ +//vamomaxe.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3; }, int, e8); diff --git a/riscv/insns/vamomaxuei16_v.h b/riscv/insns/vamomaxuei16_v.h new file mode 100644 index 0000000000..e05971dfcf --- /dev/null +++ b/riscv/insns/vamomaxuei16_v.h @@ -0,0 +1,2 @@ +//vamomaxue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3;; }, uint, e16); diff --git a/riscv/insns/vamomaxuei32_v.h b/riscv/insns/vamomaxuei32_v.h new file mode 100644 index 0000000000..9b873543b9 --- /dev/null +++ b/riscv/insns/vamomaxuei32_v.h @@ -0,0 +1,2 @@ +//vamomaxue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3;; }, uint, e32); diff --git a/riscv/insns/vamomaxuei64_v.h b/riscv/insns/vamomaxuei64_v.h new file mode 100644 index 0000000000..bbfbc9f2a3 --- /dev/null +++ b/riscv/insns/vamomaxuei64_v.h @@ -0,0 +1,2 @@ +//vamomaxue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3;; }, uint, e64); diff --git a/riscv/insns/vamomaxuei8_v.h b/riscv/insns/vamomaxuei8_v.h new file mode 100644 index 0000000000..357ba2454a --- /dev/null +++ b/riscv/insns/vamomaxuei8_v.h @@ -0,0 +1,2 @@ +//vamomaxue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3;; }, uint, e8); diff --git a/riscv/insns/vamominei16_v.h b/riscv/insns/vamominei16_v.h new file mode 100644 index 0000000000..9d1ecac643 --- /dev/null +++ b/riscv/insns/vamominei16_v.h @@ -0,0 +1,2 @@ +//vamomine.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3; }, int, e16); diff --git a/riscv/insns/vamominei32_v.h b/riscv/insns/vamominei32_v.h new file mode 100644 index 0000000000..6cb8475e39 --- /dev/null +++ b/riscv/insns/vamominei32_v.h @@ -0,0 +1,2 @@ +//vamomine.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3; }, int, e32); diff --git a/riscv/insns/vamominei64_v.h b/riscv/insns/vamominei64_v.h new file mode 100644 index 0000000000..9ef3d4ee3b --- /dev/null +++ b/riscv/insns/vamominei64_v.h @@ -0,0 +1,2 @@ +//vamomine.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3; }, int, e64); diff --git a/riscv/insns/vamominei8_v.h b/riscv/insns/vamominei8_v.h new file mode 100644 index 0000000000..5c035ea47b --- /dev/null +++ b/riscv/insns/vamominei8_v.h @@ -0,0 +1,2 @@ +//vamomine.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3; }, int, e8); diff --git a/riscv/insns/vamominuei16_v.h b/riscv/insns/vamominuei16_v.h new file mode 100644 index 0000000000..d4a8f89292 --- /dev/null +++ b/riscv/insns/vamominuei16_v.h @@ -0,0 +1,2 @@ +//vamominue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3;; }, uint, e16); diff --git a/riscv/insns/vamominuei32_v.h b/riscv/insns/vamominuei32_v.h new file mode 100644 index 0000000000..16296c5beb --- /dev/null +++ b/riscv/insns/vamominuei32_v.h @@ -0,0 +1,2 @@ +//vamominue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3;; }, uint, e32); diff --git a/riscv/insns/vamominuei64_v.h b/riscv/insns/vamominuei64_v.h new file mode 100644 index 0000000000..fd850fd063 --- /dev/null +++ b/riscv/insns/vamominuei64_v.h @@ -0,0 +1,2 @@ +//vamominue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3;; }, uint, e64); diff --git a/riscv/insns/vamominuei8_v.h b/riscv/insns/vamominuei8_v.h new file mode 100644 index 0000000000..3749d0525d --- /dev/null +++ b/riscv/insns/vamominuei8_v.h @@ -0,0 +1,2 @@ +//vamominue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3;; }, uint, e8); diff --git a/riscv/insns/vamoorei16_v.h b/riscv/insns/vamoorei16_v.h new file mode 100644 index 0000000000..a5ba1caa74 --- /dev/null +++ b/riscv/insns/vamoorei16_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs | vs3; }, uint, e16); diff --git a/riscv/insns/vamoorei32_v.h b/riscv/insns/vamoorei32_v.h new file mode 100644 index 0000000000..94e4458e49 --- /dev/null +++ b/riscv/insns/vamoorei32_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs | vs3; }, uint, e32); diff --git a/riscv/insns/vamoorei64_v.h b/riscv/insns/vamoorei64_v.h new file mode 100644 index 0000000000..84e03944e5 --- /dev/null +++ b/riscv/insns/vamoorei64_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs | vs3; }, uint, e64); diff --git a/riscv/insns/vamoorei8_v.h b/riscv/insns/vamoorei8_v.h new file mode 100644 index 0000000000..364035dbb2 --- /dev/null +++ b/riscv/insns/vamoorei8_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs | vs3; }, uint, e8); diff --git a/riscv/insns/vamoswapei16_v.h b/riscv/insns/vamoswapei16_v.h new file mode 100644 index 0000000000..31ff021030 --- /dev/null +++ b/riscv/insns/vamoswapei16_v.h @@ -0,0 +1,2 @@ +//vamoswape.v vd, (rs1), vs2, vd +VI_AMO({ return vs3; }, uint, e16); diff --git a/riscv/insns/vamoswapei32_v.h b/riscv/insns/vamoswapei32_v.h new file mode 100644 index 0000000000..a5741929ab --- /dev/null +++ b/riscv/insns/vamoswapei32_v.h @@ -0,0 +1,2 @@ +//vamoswape.v vd, (rs1), vs2, vd +VI_AMO({ return vs3; }, uint, e32); diff --git a/riscv/insns/vamoswapei64_v.h b/riscv/insns/vamoswapei64_v.h new file mode 100644 index 0000000000..58bd035217 --- /dev/null +++ b/riscv/insns/vamoswapei64_v.h @@ -0,0 +1,2 @@ +//vamoswape.v vd, (rs1), vs2, vd +VI_AMO({ return vs3; }, uint, e64); diff --git a/riscv/insns/vamoswapei8_v.h b/riscv/insns/vamoswapei8_v.h new file mode 100644 index 0000000000..af37c8c3f8 --- /dev/null +++ b/riscv/insns/vamoswapei8_v.h @@ -0,0 +1,2 @@ +//vamoswape.v vd, (rs1), vs2, vd +VI_AMO({ return vs3; }, uint, e8); diff --git a/riscv/insns/vamoxorei16_v.h b/riscv/insns/vamoxorei16_v.h new file mode 100644 index 0000000000..61e8c3272c --- /dev/null +++ b/riscv/insns/vamoxorei16_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs ^ vs3; }, uint, e16); diff --git a/riscv/insns/vamoxorei32_v.h b/riscv/insns/vamoxorei32_v.h new file mode 100644 index 0000000000..d48d951504 --- /dev/null +++ b/riscv/insns/vamoxorei32_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs ^ vs3; }, uint, e32); diff --git a/riscv/insns/vamoxorei64_v.h b/riscv/insns/vamoxorei64_v.h new file mode 100644 index 0000000000..f7a3ca42e1 --- /dev/null +++ b/riscv/insns/vamoxorei64_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs ^ vs3; }, uint, e64); diff --git a/riscv/insns/vamoxorei8_v.h b/riscv/insns/vamoxorei8_v.h new file mode 100644 index 0000000000..4b6c79824c --- /dev/null +++ b/riscv/insns/vamoxorei8_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs ^ vs3; }, uint, e8); diff --git a/riscv/insns/vand_vi.h b/riscv/insns/vand_vi.h new file mode 100644 index 0000000000..dd9618ba94 --- /dev/null +++ b/riscv/insns/vand_vi.h @@ -0,0 +1,5 @@ +// vand.vi vd, simm5, vs2, vm +VI_VI_LOOP +({ + vd = simm5 & vs2; +}) diff --git a/riscv/insns/vand_vv.h b/riscv/insns/vand_vv.h new file mode 100644 index 0000000000..65558e4b6a --- /dev/null +++ b/riscv/insns/vand_vv.h @@ -0,0 +1,5 @@ +// vand.vv vd, vs1, vs2, vm +VI_VV_LOOP +({ + vd = vs1 & vs2; +}) diff --git a/riscv/insns/vand_vx.h b/riscv/insns/vand_vx.h new file mode 100644 index 0000000000..8eea1ed526 --- /dev/null +++ b/riscv/insns/vand_vx.h @@ -0,0 +1,5 @@ +// vand.vx vd, rs1, vs2, vm +VI_VX_LOOP +({ + vd = rs1 & vs2; +}) diff --git a/riscv/insns/vasub_vv.h b/riscv/insns/vasub_vv.h new file mode 100644 index 0000000000..a45c18db7d --- /dev/null +++ b/riscv/insns/vasub_vv.h @@ -0,0 +1,2 @@ +// vasub.vv vd, vs2, vs1 +VI_VVX_LOOP_AVG(vs1, -, true); diff --git a/riscv/insns/vasub_vx.h b/riscv/insns/vasub_vx.h new file mode 100644 index 0000000000..4e8dba1c9a --- /dev/null +++ b/riscv/insns/vasub_vx.h @@ -0,0 +1,2 @@ +// vasub.vx vd, vs2, rs1 +VI_VVX_LOOP_AVG(rs1, -, false); diff --git a/riscv/insns/vasubu_vv.h b/riscv/insns/vasubu_vv.h new file mode 100644 index 0000000000..8e2be01aca --- /dev/null +++ b/riscv/insns/vasubu_vv.h @@ -0,0 +1,2 @@ +// vasubu.vv vd, vs2, vs1 +VI_VVX_ULOOP_AVG(vs1, -, true); diff --git a/riscv/insns/vasubu_vx.h b/riscv/insns/vasubu_vx.h new file mode 100644 index 0000000000..3cc9ca8a01 --- /dev/null +++ b/riscv/insns/vasubu_vx.h @@ -0,0 +1,2 @@ +// vasubu.vx vd, vs2, rs1 +VI_VVX_ULOOP_AVG(rs1, -, false); diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h new file mode 100644 index 0000000000..325e40adb4 --- /dev/null +++ b/riscv/insns/vcompress_vm.h @@ -0,0 +1,33 @@ +// vcompress vd, vs2, vs1 +require(P.VU.vstart == 0); +require_align(insn.rd(), P.VU.vflmul); +require_align(insn.rs2(), P.VU.vflmul); +require(insn.rd() != insn.rs2()); +require_noover(insn.rd(), P.VU.vflmul, insn.rs1(), 1); + +reg_t pos = 0; + +VI_GENERAL_LOOP_BASE + const int midx = i / 64; + const int mpos = i % 64; + + bool do_mask = (P.VU.elt(rs1_num, midx) >> mpos) & 0x1; + if (do_mask) { + switch (sew) { + case e8: + P.VU.elt(rd_num, pos, true) = P.VU.elt(rs2_num, i); + break; + case e16: + P.VU.elt(rd_num, pos, true) = P.VU.elt(rs2_num, i); + break; + case e32: + P.VU.elt(rd_num, pos, true) = P.VU.elt(rs2_num, i); + break; + default: + P.VU.elt(rd_num, pos, true) = P.VU.elt(rs2_num, i); + break; + } + + ++pos; + } +VI_LOOP_END; diff --git a/riscv/insns/vdiv_vv.h b/riscv/insns/vdiv_vv.h new file mode 100644 index 0000000000..0d4bd0d8e4 --- /dev/null +++ b/riscv/insns/vdiv_vv.h @@ -0,0 +1,10 @@ +// vdiv.vv vd, vs2, vs1 +VI_VV_LOOP +({ + if (vs1 == 0) + vd = -1; + else if (vs2 == (INT64_MIN >> (64 - sew)) && vs1 == -1) + vd = vs2; + else + vd = vs2 / vs1; +}) diff --git a/riscv/insns/vdiv_vx.h b/riscv/insns/vdiv_vx.h new file mode 100644 index 0000000000..405295270e --- /dev/null +++ b/riscv/insns/vdiv_vx.h @@ -0,0 +1,10 @@ +// vdiv.vx vd, vs2, rs1 +VI_VX_LOOP +({ + if(rs1 == 0) + vd = -1; + else if(vs2 == (INT64_MIN >> (64 - sew)) && rs1 == -1) + vd = vs2; + else + vd = vs2 / rs1; +}) diff --git a/riscv/insns/vdivu_vv.h b/riscv/insns/vdivu_vv.h new file mode 100644 index 0000000000..ef6e777d6b --- /dev/null +++ b/riscv/insns/vdivu_vv.h @@ -0,0 +1,8 @@ +// vdivu.vv vd, vs2, vs1 +VI_VV_ULOOP +({ + if(vs1 == 0) + vd = -1; + else + vd = vs2 / vs1; +}) diff --git a/riscv/insns/vdivu_vx.h b/riscv/insns/vdivu_vx.h new file mode 100644 index 0000000000..7ffe1c6803 --- /dev/null +++ b/riscv/insns/vdivu_vx.h @@ -0,0 +1,8 @@ +// vdivu.vx vd, vs2, rs1 +VI_VX_ULOOP +({ + if(rs1 == 0) + vd = -1; + else + vd = vs2 / rs1; +}) diff --git a/riscv/insns/vdot_vv.h b/riscv/insns/vdot_vv.h new file mode 100644 index 0000000000..7685230497 --- /dev/null +++ b/riscv/insns/vdot_vv.h @@ -0,0 +1,5 @@ +// vdot vd, vs2, vs1 +VI_VV_LOOP +({ + vd += vs2 * vs1; +}) diff --git a/riscv/insns/vdotu_vv.h b/riscv/insns/vdotu_vv.h new file mode 100644 index 0000000000..9c4c59dde2 --- /dev/null +++ b/riscv/insns/vdotu_vv.h @@ -0,0 +1,5 @@ +// vdotu vd, vs2, vs1 +VI_VV_ULOOP +({ + vd += vs2 * vs1; +}) diff --git a/riscv/insns/vfadd_vf.h b/riscv/insns/vfadd_vf.h new file mode 100644 index 0000000000..2b808e0ccd --- /dev/null +++ b/riscv/insns/vfadd_vf.h @@ -0,0 +1,11 @@ +// vfadd.vf vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f16_add(rs1, vs2); +}, +{ + vd = f32_add(rs1, vs2); +}, +{ + vd = f64_add(rs1, vs2); +}) diff --git a/riscv/insns/vfadd_vv.h b/riscv/insns/vfadd_vv.h new file mode 100644 index 0000000000..ce94921d56 --- /dev/null +++ b/riscv/insns/vfadd_vv.h @@ -0,0 +1,11 @@ +// vfadd.vv vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f16_add(vs1, vs2); +}, +{ + vd = f32_add(vs1, vs2); +}, +{ + vd = f64_add(vs1, vs2); +}) diff --git a/riscv/insns/vfclass_v.h b/riscv/insns/vfclass_v.h new file mode 100644 index 0000000000..658f28a23f --- /dev/null +++ b/riscv/insns/vfclass_v.h @@ -0,0 +1,11 @@ +// vfclass.v vd, vs2, vm +VI_VFP_V_LOOP +({ + vd.v = f16_classify(vs2); +}, +{ + vd.v = f32_classify(vs2); +}, +{ + vd.v = f64_classify(vs2); +}) diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h new file mode 100644 index 0000000000..c53b0e1fd6 --- /dev/null +++ b/riscv/insns/vfcvt_f_x_v.h @@ -0,0 +1,14 @@ +// vfcvt.f.x.v vd, vd2, vm +VI_VFP_VF_LOOP +({ + auto vs2_i = P.VU.elt(rs2_num, i); + vd = i32_to_f16(vs2_i); +}, +{ + auto vs2_i = P.VU.elt(rs2_num, i); + vd = i32_to_f32(vs2_i); +}, +{ + auto vs2_i = P.VU.elt(rs2_num, i); + vd = i64_to_f64(vs2_i); +}) diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h new file mode 100644 index 0000000000..bd03768dbd --- /dev/null +++ b/riscv/insns/vfcvt_f_xu_v.h @@ -0,0 +1,14 @@ +// vfcvt.f.xu.v vd, vd2, vm +VI_VFP_VF_LOOP +({ + auto vs2_u = P.VU.elt(rs2_num, i); + vd = ui32_to_f16(vs2_u); +}, +{ + auto vs2_u = P.VU.elt(rs2_num, i); + vd = ui32_to_f32(vs2_u); +}, +{ + auto vs2_u = P.VU.elt(rs2_num, i); + vd = ui64_to_f64(vs2_u); +}) diff --git a/riscv/insns/vfcvt_rtz_x_f_v.h b/riscv/insns/vfcvt_rtz_x_f_v.h new file mode 100644 index 0000000000..e7241bd033 --- /dev/null +++ b/riscv/insns/vfcvt_rtz_x_f_v.h @@ -0,0 +1,11 @@ +// vfcvt.rtz.x.f.v vd, vd2, vm +VI_VFP_VF_LOOP +({ + P.VU.elt(rd_num, i) = f16_to_i16(vs2, softfloat_round_minMag, true); +}, +{ + P.VU.elt(rd_num, i) = f32_to_i32(vs2, softfloat_round_minMag, true); +}, +{ + P.VU.elt(rd_num, i) = f64_to_i64(vs2, softfloat_round_minMag, true); +}) diff --git a/riscv/insns/vfcvt_rtz_xu_f_v.h b/riscv/insns/vfcvt_rtz_xu_f_v.h new file mode 100644 index 0000000000..d3d266d0c9 --- /dev/null +++ b/riscv/insns/vfcvt_rtz_xu_f_v.h @@ -0,0 +1,11 @@ +// vfcvt.rtz.xu.f.v vd, vd2, vm +VI_VFP_VF_LOOP +({ + P.VU.elt(rd_num, i) = f16_to_ui16(vs2, softfloat_round_minMag, true); +}, +{ + P.VU.elt(rd_num, i) = f32_to_ui32(vs2, softfloat_round_minMag, true); +}, +{ + P.VU.elt(rd_num, i) = f64_to_ui64(vs2, softfloat_round_minMag, true); +}) diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h new file mode 100644 index 0000000000..01e5ca17f4 --- /dev/null +++ b/riscv/insns/vfcvt_x_f_v.h @@ -0,0 +1,11 @@ +// vfcvt.x.f.v vd, vd2, vm +VI_VFP_VF_LOOP +({ + P.VU.elt(rd_num, i) = f16_to_i16(vs2, STATE.frm, true); +}, +{ + P.VU.elt(rd_num, i) = f32_to_i32(vs2, STATE.frm, true); +}, +{ + P.VU.elt(rd_num, i) = f64_to_i64(vs2, STATE.frm, true); +}) diff --git a/riscv/insns/vfcvt_xu_f_v.h b/riscv/insns/vfcvt_xu_f_v.h new file mode 100644 index 0000000000..725cbda23a --- /dev/null +++ b/riscv/insns/vfcvt_xu_f_v.h @@ -0,0 +1,11 @@ +// vfcvt.xu.f.v vd, vd2, vm +VI_VFP_VV_LOOP +({ + P.VU.elt(rd_num, i) = f16_to_ui16(vs2, STATE.frm, true); +}, +{ + P.VU.elt(rd_num, i) = f32_to_ui32(vs2, STATE.frm, true); +}, +{ + P.VU.elt(rd_num, i) = f64_to_ui64(vs2, STATE.frm, true); +}) diff --git a/riscv/insns/vfdiv_vf.h b/riscv/insns/vfdiv_vf.h new file mode 100644 index 0000000000..a703ef02bf --- /dev/null +++ b/riscv/insns/vfdiv_vf.h @@ -0,0 +1,11 @@ +// vfdiv.vf vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f16_div(vs2, rs1); +}, +{ + vd = f32_div(vs2, rs1); +}, +{ + vd = f64_div(vs2, rs1); +}) diff --git a/riscv/insns/vfdiv_vv.h b/riscv/insns/vfdiv_vv.h new file mode 100644 index 0000000000..c66d751659 --- /dev/null +++ b/riscv/insns/vfdiv_vv.h @@ -0,0 +1,11 @@ +// vfdiv.vv vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f16_div(vs2, vs1); +}, +{ + vd = f32_div(vs2, vs1); +}, +{ + vd = f64_div(vs2, vs1); +}) diff --git a/riscv/insns/vfdot_vv.h b/riscv/insns/vfdot_vv.h new file mode 100644 index 0000000000..8f5225acd4 --- /dev/null +++ b/riscv/insns/vfdot_vv.h @@ -0,0 +1,11 @@ +// vfdot.vv vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f16_add(vd, f16_mul(vs2, vs1)); +}, +{ + vd = f32_add(vd, f32_mul(vs2, vs1)); +}, +{ + vd = f64_add(vd, f64_mul(vs2, vs1)); +}) diff --git a/riscv/insns/vfirst_m.h b/riscv/insns/vfirst_m.h new file mode 100644 index 0000000000..309572374d --- /dev/null +++ b/riscv/insns/vfirst_m.h @@ -0,0 +1,20 @@ +// vmfirst rd, vs2 +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs2_num = insn.rs2(); +require(P.VU.vstart == 0); +reg_t pos = -1; +for (reg_t i=P.VU.vstart; i < vl; ++i) { + VI_LOOP_ELEMENT_SKIP() + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx ) >> mpos) & 0x1) == 1; + if (vs2_lsb) { + pos = i; + break; + } +} +P.VU.vstart = 0; +WRITE_RD(pos); diff --git a/riscv/insns/vfmacc_vf.h b/riscv/insns/vfmacc_vf.h new file mode 100644 index 0000000000..61578d3318 --- /dev/null +++ b/riscv/insns/vfmacc_vf.h @@ -0,0 +1,11 @@ +// vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(vs2[i] * x[rs1]) + vd[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(rs1, vs2, vd); +}, +{ + vd = f32_mulAdd(rs1, vs2, vd); +}, +{ + vd = f64_mulAdd(rs1, vs2, vd); +}) diff --git a/riscv/insns/vfmacc_vv.h b/riscv/insns/vfmacc_vv.h new file mode 100644 index 0000000000..499b1d4d22 --- /dev/null +++ b/riscv/insns/vfmacc_vv.h @@ -0,0 +1,11 @@ +// vfmacc.vv vd, rs1, vs2, vm # vd[i] = +(vs2[i] * vs1[i]) + vd[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(vs1, vs2, vd); +}, +{ + vd = f32_mulAdd(vs1, vs2, vd); +}, +{ + vd = f64_mulAdd(vs1, vs2, vd); +}) diff --git a/riscv/insns/vfmadd_vf.h b/riscv/insns/vfmadd_vf.h new file mode 100644 index 0000000000..2a01429506 --- /dev/null +++ b/riscv/insns/vfmadd_vf.h @@ -0,0 +1,11 @@ +// vfmadd: vd[i] = +(vd[i] * f[rs1]) + vs2[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(vd, rs1, vs2); +}, +{ + vd = f32_mulAdd(vd, rs1, vs2); +}, +{ + vd = f64_mulAdd(vd, rs1, vs2); +}) diff --git a/riscv/insns/vfmadd_vv.h b/riscv/insns/vfmadd_vv.h new file mode 100644 index 0000000000..7ef734f847 --- /dev/null +++ b/riscv/insns/vfmadd_vv.h @@ -0,0 +1,11 @@ +// vfmadd: vd[i] = +(vd[i] * vs1[i]) + vs2[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(vd, vs1, vs2); +}, +{ + vd = f32_mulAdd(vd, vs1, vs2); +}, +{ + vd = f64_mulAdd(vd, vs1, vs2); +}) diff --git a/riscv/insns/vfmax_vf.h b/riscv/insns/vfmax_vf.h new file mode 100644 index 0000000000..c4b74cbd54 --- /dev/null +++ b/riscv/insns/vfmax_vf.h @@ -0,0 +1,11 @@ +// vfmax +VI_VFP_VF_LOOP +({ + vd = f16_max(vs2, rs1); +}, +{ + vd = f32_max(vs2, rs1); +}, +{ + vd = f64_max(vs2, rs1); +}) diff --git a/riscv/insns/vfmax_vv.h b/riscv/insns/vfmax_vv.h new file mode 100644 index 0000000000..6439c8997f --- /dev/null +++ b/riscv/insns/vfmax_vv.h @@ -0,0 +1,11 @@ +// vfmax +VI_VFP_VV_LOOP +({ + vd = f16_max(vs2, vs1); +}, +{ + vd = f32_max(vs2, vs1); +}, +{ + vd = f64_max(vs2, vs1); +}) diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h new file mode 100644 index 0000000000..c9b39fe052 --- /dev/null +++ b/riscv/insns/vfmerge_vfm.h @@ -0,0 +1,50 @@ +// vfmerge_vf vd, vs2, vs1, vm +VI_CHECK_SSS(false); +VI_VFP_COMMON; + +switch(P.VU.vsew) { + case e16: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f16(READ_FREG(rs1_num)); + auto vs2 = P.VU.elt(rs2_num, i); + + int midx = i / 64; + int mpos = i % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? rs1 : vs2; + } + break; + case e32: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f32(READ_FREG(rs1_num)); + auto vs2 = P.VU.elt(rs2_num, i); + + int midx = i / 64; + int mpos = i % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? rs1 : vs2; + } + break; + case e64: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f64(READ_FREG(rs1_num)); + auto vs2 = P.VU.elt(rs2_num, i); + + int midx = i / 64; + int mpos = i % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? rs1 : vs2; + } + break; + default: + require(0); + break; +} + +P.VU.vstart = 0; diff --git a/riscv/insns/vfmin_vf.h b/riscv/insns/vfmin_vf.h new file mode 100644 index 0000000000..1560cdf7dc --- /dev/null +++ b/riscv/insns/vfmin_vf.h @@ -0,0 +1,11 @@ +// vfmin vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f16_min(vs2, rs1); +}, +{ + vd = f32_min(vs2, rs1); +}, +{ + vd = f64_min(vs2, rs1); +}) diff --git a/riscv/insns/vfmin_vv.h b/riscv/insns/vfmin_vv.h new file mode 100644 index 0000000000..882a774044 --- /dev/null +++ b/riscv/insns/vfmin_vv.h @@ -0,0 +1,11 @@ +// vfmin vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f16_min(vs2, vs1); +}, +{ + vd = f32_min(vs2, vs1); +}, +{ + vd = f64_min(vs2, vs1); +}) diff --git a/riscv/insns/vfmsac_vf.h b/riscv/insns/vfmsac_vf.h new file mode 100644 index 0000000000..8af397b999 --- /dev/null +++ b/riscv/insns/vfmsac_vf.h @@ -0,0 +1,11 @@ +// vfmsac: vd[i] = +(f[rs1] * vs2[i]) - vd[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(rs1, vs2, f16(vd.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(rs1, vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfmsac_vv.h b/riscv/insns/vfmsac_vv.h new file mode 100644 index 0000000000..3bb50e50a9 --- /dev/null +++ b/riscv/insns/vfmsac_vv.h @@ -0,0 +1,11 @@ +// vfmsac: vd[i] = +(vs1[i] * vs2[i]) - vd[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(vs1, vs2, f16(vd.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(vs1, vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfmsub_vf.h b/riscv/insns/vfmsub_vf.h new file mode 100644 index 0000000000..ab77b4c6e1 --- /dev/null +++ b/riscv/insns/vfmsub_vf.h @@ -0,0 +1,11 @@ +// vfmsub: vd[i] = +(vd[i] * f[rs1]) - vs2[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(vd, rs1, f16(vs2.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(vd, rs1, f64(vs2.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfmsub_vv.h b/riscv/insns/vfmsub_vv.h new file mode 100644 index 0000000000..3cac937fd1 --- /dev/null +++ b/riscv/insns/vfmsub_vv.h @@ -0,0 +1,11 @@ +// vfmsub: vd[i] = +(vd[i] * vs1[i]) - vs2[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(vd, vs1, f16(vs2.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(vd, vs1, f32(vs2.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(vd, vs1, f64(vs2.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfmul_vf.h b/riscv/insns/vfmul_vf.h new file mode 100644 index 0000000000..f5f63e497a --- /dev/null +++ b/riscv/insns/vfmul_vf.h @@ -0,0 +1,11 @@ +// vfmul.vf vd, vs2, rs1, vm +VI_VFP_VF_LOOP +({ + vd = f16_mul(vs2, rs1); +}, +{ + vd = f32_mul(vs2, rs1); +}, +{ + vd = f64_mul(vs2, rs1); +}) diff --git a/riscv/insns/vfmul_vv.h b/riscv/insns/vfmul_vv.h new file mode 100644 index 0000000000..7930fd034e --- /dev/null +++ b/riscv/insns/vfmul_vv.h @@ -0,0 +1,11 @@ +// vfmul.vv vd, vs1, vs2, vm +VI_VFP_VV_LOOP +({ + vd = f16_mul(vs1, vs2); +}, +{ + vd = f32_mul(vs1, vs2); +}, +{ + vd = f64_mul(vs1, vs2); +}) diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h new file mode 100644 index 0000000000..4a81436c10 --- /dev/null +++ b/riscv/insns/vfmv_f_s.h @@ -0,0 +1,38 @@ +// vfmv_f_s: rd = vs2[0] (rs1=0) +require_vector(true); +require_fp; +require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) || + (P.VU.vsew == e32 && p->supports_extension('F')) || + (P.VU.vsew == e64 && p->supports_extension('D'))); +require(STATE.frm < 0x5); + +reg_t rs2_num = insn.rs2(); +uint64_t vs2_0 = 0; +const reg_t sew = P.VU.vsew; +switch(sew) { + case e16: + vs2_0 = P.VU.elt(rs2_num, 0); + break; + case e32: + vs2_0 = P.VU.elt(rs2_num, 0); + break; + case e64: + vs2_0 = P.VU.elt(rs2_num, 0); + break; + default: + require(0); + break; +} + +// nan_extened +if (FLEN > sew) { + vs2_0 = vs2_0 | (UINT64_MAX << sew); +} + +if (FLEN == 64) { + WRITE_FRD(f64(vs2_0)); +} else { + WRITE_FRD(f32(vs2_0)); +} + +P.VU.vstart = 0; diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h new file mode 100644 index 0000000000..52ed7b2d11 --- /dev/null +++ b/riscv/insns/vfmv_s_f.h @@ -0,0 +1,29 @@ +// vfmv_s_f: vd[0] = rs1 (vs2=0) +require_vector(true); +require_fp; +require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) || + (P.VU.vsew == e32 && p->supports_extension('F')) || + (P.VU.vsew == e64 && p->supports_extension('D'))); +require(STATE.frm < 0x5); + +reg_t vl = P.VU.vl; + +if (vl > 0 && P.VU.vstart < vl) { + reg_t rd_num = insn.rd(); + + switch(P.VU.vsew) { + case e16: + P.VU.elt(rd_num, 0, true) = f16(FRS1).v; + break; + case e32: + P.VU.elt(rd_num, 0, true) = f32(FRS1).v; + break; + case e64: + if (FLEN == 64) + P.VU.elt(rd_num, 0, true) = f64(FRS1).v; + else + P.VU.elt(rd_num, 0, true) = f32(FRS1).v; + break; + } +} +P.VU.vstart = 0; diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h new file mode 100644 index 0000000000..fb9c78827e --- /dev/null +++ b/riscv/insns/vfmv_v_f.h @@ -0,0 +1,31 @@ +// vfmv_vf vd, vs1 +require_align(insn.rd(), P.VU.vflmul); +VI_VFP_COMMON +switch(P.VU.vsew) { + case e16: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f16(READ_FREG(rs1_num)); + + vd = rs1; + } + break; + case e32: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f32(READ_FREG(rs1_num)); + + vd = rs1; + } + break; + case e64: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f64(READ_FREG(rs1_num)); + + vd = rs1; + } + break; +} + +P.VU.vstart = 0; diff --git a/riscv/insns/vfncvt_f_f_w.h b/riscv/insns/vfncvt_f_f_w.h new file mode 100644 index 0000000000..e9f3b25746 --- /dev/null +++ b/riscv/insns/vfncvt_f_f_w.h @@ -0,0 +1,23 @@ +// vfncvt.f.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_f16(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f64_to_f32(vs2); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('D')); +}, +false, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfncvt_f_x_w.h b/riscv/insns/vfncvt_f_x_w.h new file mode 100644 index 0000000000..556ee3c878 --- /dev/null +++ b/riscv/insns/vfncvt_f_x_w.h @@ -0,0 +1,23 @@ +// vfncvt.f.x.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = i32_to_f16(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = i64_to_f32(vs2); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +false, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfncvt_f_xu_w.h b/riscv/insns/vfncvt_f_xu_w.h new file mode 100644 index 0000000000..0626ecb4ad --- /dev/null +++ b/riscv/insns/vfncvt_f_xu_w.h @@ -0,0 +1,23 @@ +// vfncvt.f.xu.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = ui32_to_f16(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = ui64_to_f32(vs2); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +false, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfncvt_rod_f_f_w.h b/riscv/insns/vfncvt_rod_f_f_w.h new file mode 100644 index 0000000000..7113fd572a --- /dev/null +++ b/riscv/insns/vfncvt_rod_f_f_w.h @@ -0,0 +1,25 @@ +// vfncvt.rod.f.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + softfloat_roundingMode = softfloat_round_odd; + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_f16(vs2); +}, +{ + softfloat_roundingMode = softfloat_round_odd; + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f64_to_f32(vs2); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +false, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfncvt_rtz_x_f_w.h b/riscv/insns/vfncvt_rtz_x_f_w.h new file mode 100644 index 0000000000..1dfc6ecea4 --- /dev/null +++ b/riscv/insns/vfncvt_rtz_x_f_w.h @@ -0,0 +1,24 @@ +// vfncvt.rtz.x.f.w vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_i8(vs2, softfloat_round_minMag, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_i16(vs2, softfloat_round_minMag, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f64_to_i32(vs2, softfloat_round_minMag, true); +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +{ + require(p->supports_extension('D')); +}, +false, (P.VU.vsew <= 32)) diff --git a/riscv/insns/vfncvt_rtz_xu_f_w.h b/riscv/insns/vfncvt_rtz_xu_f_w.h new file mode 100644 index 0000000000..c6adcec9ab --- /dev/null +++ b/riscv/insns/vfncvt_rtz_xu_f_w.h @@ -0,0 +1,24 @@ +// vfncvt.rtz.xu.f.w vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_ui8(vs2, softfloat_round_minMag, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_ui16(vs2, softfloat_round_minMag, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f64_to_ui32(vs2, softfloat_round_minMag, true); +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +{ + require(p->supports_extension('D')); +}, +false, (P.VU.vsew <= 32)) diff --git a/riscv/insns/vfncvt_x_f_w.h b/riscv/insns/vfncvt_x_f_w.h new file mode 100644 index 0000000000..01b52a2af5 --- /dev/null +++ b/riscv/insns/vfncvt_x_f_w.h @@ -0,0 +1,24 @@ +// vfncvt.x.f.w vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_i8(vs2, STATE.frm, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_i16(vs2, STATE.frm, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f64_to_i32(vs2, STATE.frm, true); +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +{ + require(p->supports_extension('D')); +}, +false, (P.VU.vsew <= 32)) diff --git a/riscv/insns/vfncvt_xu_f_w.h b/riscv/insns/vfncvt_xu_f_w.h new file mode 100644 index 0000000000..bb55ec318d --- /dev/null +++ b/riscv/insns/vfncvt_xu_f_w.h @@ -0,0 +1,24 @@ +// vfncvt.xu.f.w vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_ui8(vs2, STATE.frm, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_ui16(vs2, STATE.frm, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f64_to_ui32(vs2, STATE.frm, true); +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +{ + require(p->supports_extension('D')); +}, +false, (P.VU.vsew <= 32)) diff --git a/riscv/insns/vfnmacc_vf.h b/riscv/insns/vfnmacc_vf.h new file mode 100644 index 0000000000..1b99302c6c --- /dev/null +++ b/riscv/insns/vfnmacc_vf.h @@ -0,0 +1,11 @@ +// vfnmacc: vd[i] = -(f[rs1] * vs2[i]) - vd[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), f16(vd.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(rs1, f64(vs2.v ^ F64_SIGN), f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfnmacc_vv.h b/riscv/insns/vfnmacc_vv.h new file mode 100644 index 0000000000..7200e063ab --- /dev/null +++ b/riscv/insns/vfnmacc_vv.h @@ -0,0 +1,11 @@ +// vfnmacc: vd[i] = -(vs1[i] * vs2[i]) - vd[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(f16(vs2.v ^ F16_SIGN), vs1, f16(vd.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(f32(vs2.v ^ F32_SIGN), vs1, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(f64(vs2.v ^ F64_SIGN), vs1, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfnmadd_vf.h b/riscv/insns/vfnmadd_vf.h new file mode 100644 index 0000000000..cb9c217ff2 --- /dev/null +++ b/riscv/insns/vfnmadd_vf.h @@ -0,0 +1,11 @@ +// vfnmadd: vd[i] = -(vd[i] * f[rs1]) - vs2[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, f16(vs2.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), rs1, f64(vs2.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfnmadd_vv.h b/riscv/insns/vfnmadd_vv.h new file mode 100644 index 0000000000..7160ed7d6f --- /dev/null +++ b/riscv/insns/vfnmadd_vv.h @@ -0,0 +1,11 @@ +// vfnmadd: vd[i] = -(vd[i] * vs1[i]) - vs2[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, f16(vs2.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, f32(vs2.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), vs1, f64(vs2.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfnmsac_vf.h b/riscv/insns/vfnmsac_vf.h new file mode 100644 index 0000000000..aa6baa30c6 --- /dev/null +++ b/riscv/insns/vfnmsac_vf.h @@ -0,0 +1,11 @@ +// vfnmsac: vd[i] = -(f[rs1] * vs2[i]) + vd[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), vd); +}, +{ + vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd); +}, +{ + vd = f64_mulAdd(rs1, f64(vs2.v ^ F64_SIGN), vd); +}) diff --git a/riscv/insns/vfnmsac_vv.h b/riscv/insns/vfnmsac_vv.h new file mode 100644 index 0000000000..47db61d2d0 --- /dev/null +++ b/riscv/insns/vfnmsac_vv.h @@ -0,0 +1,11 @@ +// vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs2[i] * vs1[i]) + vd[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(f16(vs1.v ^ F16_SIGN), vs2, vd); +}, +{ + vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd); +}, +{ + vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, vd); +}) diff --git a/riscv/insns/vfnmsub_vf.h b/riscv/insns/vfnmsub_vf.h new file mode 100644 index 0000000000..43aa9e2685 --- /dev/null +++ b/riscv/insns/vfnmsub_vf.h @@ -0,0 +1,11 @@ +// vfnmsub: vd[i] = -(vd[i] * f[rs1]) + vs2[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, vs2); +}, +{ + vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2); +}, +{ + vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), rs1, vs2); +}) diff --git a/riscv/insns/vfnmsub_vv.h b/riscv/insns/vfnmsub_vv.h new file mode 100644 index 0000000000..2a45c8fca0 --- /dev/null +++ b/riscv/insns/vfnmsub_vv.h @@ -0,0 +1,11 @@ +// vfnmsub: vd[i] = -(vd[i] * vs1[i]) + vs2[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, vs2); +}, +{ + vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, vs2); +}, +{ + vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), vs1, vs2); +}) diff --git a/riscv/insns/vfrdiv_vf.h b/riscv/insns/vfrdiv_vf.h new file mode 100644 index 0000000000..b283343cc2 --- /dev/null +++ b/riscv/insns/vfrdiv_vf.h @@ -0,0 +1,11 @@ +// vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i] +VI_VFP_VF_LOOP +({ + vd = f16_div(rs1, vs2); +}, +{ + vd = f32_div(rs1, vs2); +}, +{ + vd = f64_div(rs1, vs2); +}) diff --git a/riscv/insns/vfrece7_v.h b/riscv/insns/vfrece7_v.h new file mode 100644 index 0000000000..69c026b058 --- /dev/null +++ b/riscv/insns/vfrece7_v.h @@ -0,0 +1,11 @@ +// vfclass.v vd, vs2, vm +VI_VFP_V_LOOP +({ + vd = f16_recip7(vs2); +}, +{ + vd = f32_recip7(vs2); +}, +{ + vd = f64_recip7(vs2); +}) diff --git a/riscv/insns/vfredmax_vs.h b/riscv/insns/vfredmax_vs.h new file mode 100644 index 0000000000..f19ec59791 --- /dev/null +++ b/riscv/insns/vfredmax_vs.h @@ -0,0 +1,12 @@ +// vfredmax vd, vs2, vs1 +bool is_propagate = false; +VI_VFP_VV_LOOP_REDUCTION +({ + vd_0 = f16_max(vd_0, vs2); +}, +{ + vd_0 = f32_max(vd_0, vs2); +}, +{ + vd_0 = f64_max(vd_0, vs2); +}) diff --git a/riscv/insns/vfredmin_vs.h b/riscv/insns/vfredmin_vs.h new file mode 100644 index 0000000000..e3cf151324 --- /dev/null +++ b/riscv/insns/vfredmin_vs.h @@ -0,0 +1,12 @@ +// vfredmin vd, vs2, vs1 +bool is_propagate = false; +VI_VFP_VV_LOOP_REDUCTION +({ + vd_0 = f16_min(vd_0, vs2); +}, +{ + vd_0 = f32_min(vd_0, vs2); +}, +{ + vd_0 = f64_min(vd_0, vs2); +}) diff --git a/riscv/insns/vfredosum_vs.h b/riscv/insns/vfredosum_vs.h new file mode 100644 index 0000000000..2438a7ba9c --- /dev/null +++ b/riscv/insns/vfredosum_vs.h @@ -0,0 +1,12 @@ +// vfredosum: vd[0] = sum( vs2[*] , vs1[0] ) +bool is_propagate = false; +VI_VFP_VV_LOOP_REDUCTION +({ + vd_0 = f16_add(vd_0, vs2); +}, +{ + vd_0 = f32_add(vd_0, vs2); +}, +{ + vd_0 = f64_add(vd_0, vs2); +}) diff --git a/riscv/insns/vfredsum_vs.h b/riscv/insns/vfredsum_vs.h new file mode 100644 index 0000000000..bad7308e54 --- /dev/null +++ b/riscv/insns/vfredsum_vs.h @@ -0,0 +1,12 @@ +// vfredsum: vd[0] = sum( vs2[*] , vs1[0] ) +bool is_propagate = true; +VI_VFP_VV_LOOP_REDUCTION +({ + vd_0 = f16_add(vd_0, vs2); +}, +{ + vd_0 = f32_add(vd_0, vs2); +}, +{ + vd_0 = f64_add(vd_0, vs2); +}) diff --git a/riscv/insns/vfrsqrte7_v.h b/riscv/insns/vfrsqrte7_v.h new file mode 100644 index 0000000000..a073764127 --- /dev/null +++ b/riscv/insns/vfrsqrte7_v.h @@ -0,0 +1,11 @@ +// vfclass.v vd, vs2, vm +VI_VFP_V_LOOP +({ + vd = f16_rsqrte7(vs2); +}, +{ + vd = f32_rsqrte7(vs2); +}, +{ + vd = f64_rsqrte7(vs2); +}) diff --git a/riscv/insns/vfrsub_vf.h b/riscv/insns/vfrsub_vf.h new file mode 100644 index 0000000000..7fb26a5b5d --- /dev/null +++ b/riscv/insns/vfrsub_vf.h @@ -0,0 +1,11 @@ +// vfsub.vf vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f16_sub(rs1, vs2); +}, +{ + vd = f32_sub(rs1, vs2); +}, +{ + vd = f64_sub(rs1, vs2); +}) diff --git a/riscv/insns/vfsgnj_vf.h b/riscv/insns/vfsgnj_vf.h new file mode 100644 index 0000000000..ce06185ee9 --- /dev/null +++ b/riscv/insns/vfsgnj_vf.h @@ -0,0 +1,11 @@ +// vfsgnj vd, vs2, vs1 +VI_VFP_VF_LOOP +({ + vd = fsgnj16(vs2.v, rs1.v, false, false); +}, +{ + vd = fsgnj32(vs2.v, rs1.v, false, false); +}, +{ + vd = fsgnj64(vs2.v, rs1.v, false, false); +}) diff --git a/riscv/insns/vfsgnj_vv.h b/riscv/insns/vfsgnj_vv.h new file mode 100644 index 0000000000..722cb29cf0 --- /dev/null +++ b/riscv/insns/vfsgnj_vv.h @@ -0,0 +1,11 @@ +// vfsgnj +VI_VFP_VV_LOOP +({ + vd = fsgnj16(vs2.v, vs1.v, false, false); +}, +{ + vd = fsgnj32(vs2.v, vs1.v, false, false); +}, +{ + vd = fsgnj64(vs2.v, vs1.v, false, false); +}) diff --git a/riscv/insns/vfsgnjn_vf.h b/riscv/insns/vfsgnjn_vf.h new file mode 100644 index 0000000000..e4894124aa --- /dev/null +++ b/riscv/insns/vfsgnjn_vf.h @@ -0,0 +1,11 @@ +// vfsgnn +VI_VFP_VF_LOOP +({ + vd = fsgnj16(vs2.v, rs1.v, true, false); +}, +{ + vd = fsgnj32(vs2.v, rs1.v, true, false); +}, +{ + vd = fsgnj64(vs2.v, rs1.v, true, false); +}) diff --git a/riscv/insns/vfsgnjn_vv.h b/riscv/insns/vfsgnjn_vv.h new file mode 100644 index 0000000000..1d91f69199 --- /dev/null +++ b/riscv/insns/vfsgnjn_vv.h @@ -0,0 +1,11 @@ +// vfsgnn +VI_VFP_VV_LOOP +({ + vd = fsgnj16(vs2.v, vs1.v, true, false); +}, +{ + vd = fsgnj32(vs2.v, vs1.v, true, false); +}, +{ + vd = fsgnj64(vs2.v, vs1.v, true, false); +}) diff --git a/riscv/insns/vfsgnjx_vf.h b/riscv/insns/vfsgnjx_vf.h new file mode 100644 index 0000000000..7be164c770 --- /dev/null +++ b/riscv/insns/vfsgnjx_vf.h @@ -0,0 +1,11 @@ +// vfsgnx +VI_VFP_VF_LOOP +({ + vd = fsgnj16(vs2.v, rs1.v, false, true); +}, +{ + vd = fsgnj32(vs2.v, rs1.v, false, true); +}, +{ + vd = fsgnj64(vs2.v, rs1.v, false, true); +}) diff --git a/riscv/insns/vfsgnjx_vv.h b/riscv/insns/vfsgnjx_vv.h new file mode 100644 index 0000000000..b04b8454ac --- /dev/null +++ b/riscv/insns/vfsgnjx_vv.h @@ -0,0 +1,11 @@ +// vfsgnx +VI_VFP_VV_LOOP +({ + vd = fsgnj16(vs2.v, vs1.v, false, true); +}, +{ + vd = fsgnj32(vs2.v, vs1.v, false, true); +}, +{ + vd = fsgnj64(vs2.v, vs1.v, false, true); +}) diff --git a/riscv/insns/vfslide1down_vf.h b/riscv/insns/vfslide1down_vf.h new file mode 100644 index 0000000000..66eeaccbf4 --- /dev/null +++ b/riscv/insns/vfslide1down_vf.h @@ -0,0 +1,36 @@ +//vfslide1down.vf vd, vs2, rs1 +VI_CHECK_SLIDE(false); + +VI_VFP_LOOP_BASE +if (i != vl - 1) { + switch (P.VU.vsew) { + case e16: { + VI_XI_SLIDEDOWN_PARAMS(e16, 1); + vd = vs2; + } + break; + case e32: { + VI_XI_SLIDEDOWN_PARAMS(e32, 1); + vd = vs2; + } + break; + case e64: { + VI_XI_SLIDEDOWN_PARAMS(e64, 1); + vd = vs2; + } + break; + } +} else { + switch (P.VU.vsew) { + case e16: + P.VU.elt(rd_num, vl - 1, true) = f16(FRS1); + break; + case e32: + P.VU.elt(rd_num, vl - 1, true) = f32(FRS1); + break; + case e64: + P.VU.elt(rd_num, vl - 1, true) = f64(FRS1); + break; + } +} +VI_VFP_LOOP_END diff --git a/riscv/insns/vfslide1up_vf.h b/riscv/insns/vfslide1up_vf.h new file mode 100644 index 0000000000..b9c2817c28 --- /dev/null +++ b/riscv/insns/vfslide1up_vf.h @@ -0,0 +1,36 @@ +//vfslide1up.vf vd, vs2, rs1 +VI_CHECK_SLIDE(true); + +VI_VFP_LOOP_BASE +if (i != 0) { + switch (P.VU.vsew) { + case e16: { + VI_XI_SLIDEUP_PARAMS(e16, 1); + vd = vs2; + } + break; + case e32: { + VI_XI_SLIDEUP_PARAMS(e32, 1); + vd = vs2; + } + break; + case e64: { + VI_XI_SLIDEUP_PARAMS(e64, 1); + vd = vs2; + } + break; + } +} else { + switch (P.VU.vsew) { + case e16: + P.VU.elt(rd_num, 0, true) = f16(FRS1); + break; + case e32: + P.VU.elt(rd_num, 0, true) = f32(FRS1); + break; + case e64: + P.VU.elt(rd_num, 0, true) = f64(FRS1); + break; + } +} +VI_VFP_LOOP_END diff --git a/riscv/insns/vfsqrt_v.h b/riscv/insns/vfsqrt_v.h new file mode 100644 index 0000000000..86f0148d4b --- /dev/null +++ b/riscv/insns/vfsqrt_v.h @@ -0,0 +1,11 @@ +// vsqrt.v vd, vd2, vm +VI_VFP_V_LOOP +({ + vd = f16_sqrt(vs2); +}, +{ + vd = f32_sqrt(vs2); +}, +{ + vd = f64_sqrt(vs2); +}) diff --git a/riscv/insns/vfsub_vf.h b/riscv/insns/vfsub_vf.h new file mode 100644 index 0000000000..fc6877ca5a --- /dev/null +++ b/riscv/insns/vfsub_vf.h @@ -0,0 +1,11 @@ +// vfsub.vf vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f16_sub(vs2, rs1); +}, +{ + vd = f32_sub(vs2, rs1); +}, +{ + vd = f64_sub(vs2, rs1); +}) diff --git a/riscv/insns/vfsub_vv.h b/riscv/insns/vfsub_vv.h new file mode 100644 index 0000000000..b0403f1180 --- /dev/null +++ b/riscv/insns/vfsub_vv.h @@ -0,0 +1,11 @@ +// vfsub.vv vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f16_sub(vs2, vs1); +}, +{ + vd = f32_sub(vs2, vs1); +}, +{ + vd = f64_sub(vs2, vs1); +}) diff --git a/riscv/insns/vfwadd_vf.h b/riscv/insns/vfwadd_vf.h new file mode 100644 index 0000000000..b8249001e0 --- /dev/null +++ b/riscv/insns/vfwadd_vf.h @@ -0,0 +1,8 @@ +// vfwadd.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_add(vs2, rs1); +}, +{ + vd = f64_add(vs2, rs1); +}) diff --git a/riscv/insns/vfwadd_vv.h b/riscv/insns/vfwadd_vv.h new file mode 100644 index 0000000000..7255a50e13 --- /dev/null +++ b/riscv/insns/vfwadd_vv.h @@ -0,0 +1,8 @@ +// vfwadd.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_add(vs2, vs1); +}, +{ + vd = f64_add(vs2, vs1); +}) diff --git a/riscv/insns/vfwadd_wf.h b/riscv/insns/vfwadd_wf.h new file mode 100644 index 0000000000..021b17f049 --- /dev/null +++ b/riscv/insns/vfwadd_wf.h @@ -0,0 +1,8 @@ +// vfwadd.wf vd, vs2, vs1 +VI_VFP_WF_LOOP_WIDE +({ + vd = f32_add(vs2, rs1); +}, +{ + vd = f64_add(vs2, rs1); +}) diff --git a/riscv/insns/vfwadd_wv.h b/riscv/insns/vfwadd_wv.h new file mode 100644 index 0000000000..c1ed038925 --- /dev/null +++ b/riscv/insns/vfwadd_wv.h @@ -0,0 +1,8 @@ +// vfwadd.wv vd, vs2, vs1 +VI_VFP_WV_LOOP_WIDE +({ + vd = f32_add(vs2, vs1); +}, +{ + vd = f64_add(vs2, vs1); +}) diff --git a/riscv/insns/vfwcvt_f_f_v.h b/riscv/insns/vfwcvt_f_f_v.h new file mode 100644 index 0000000000..9bf3f386e3 --- /dev/null +++ b/riscv/insns/vfwcvt_f_f_v.h @@ -0,0 +1,23 @@ +// vfwcvt.f.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_f32(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_f64(vs2); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('D')); +}, +true, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfwcvt_f_x_v.h b/riscv/insns/vfwcvt_f_x_v.h new file mode 100644 index 0000000000..481f37122a --- /dev/null +++ b/riscv/insns/vfwcvt_f_x_v.h @@ -0,0 +1,24 @@ +// vfwcvt.f.x.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = i32_to_f16(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = i32_to_f32(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = i32_to_f64(vs2); +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +{ + require(p->supports_extension('D')); +}, +true, (P.VU.vsew >= 8)) diff --git a/riscv/insns/vfwcvt_f_xu_v.h b/riscv/insns/vfwcvt_f_xu_v.h new file mode 100644 index 0000000000..544f33dd4b --- /dev/null +++ b/riscv/insns/vfwcvt_f_xu_v.h @@ -0,0 +1,24 @@ +// vfwcvt.f.xu.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = ui32_to_f16(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = ui32_to_f32(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = ui32_to_f64(vs2); +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +{ + require(p->supports_extension('D')); +}, +true, (P.VU.vsew >= 8)) diff --git a/riscv/insns/vfwcvt_rtz_x_f_v.h b/riscv/insns/vfwcvt_rtz_x_f_v.h new file mode 100644 index 0000000000..7cbcf3116e --- /dev/null +++ b/riscv/insns/vfwcvt_rtz_x_f_v.h @@ -0,0 +1,23 @@ +// vfwcvt.rtz.x.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_i32(vs2, softfloat_round_minMag, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_i64(vs2, softfloat_round_minMag, true); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +true, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfwcvt_rtz_xu_f_v.h b/riscv/insns/vfwcvt_rtz_xu_f_v.h new file mode 100644 index 0000000000..81be047a3c --- /dev/null +++ b/riscv/insns/vfwcvt_rtz_xu_f_v.h @@ -0,0 +1,23 @@ +// vfwcvt.rtz,xu.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_ui32(vs2, softfloat_round_minMag, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_ui64(vs2, softfloat_round_minMag, true); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +true, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfwcvt_x_f_v.h b/riscv/insns/vfwcvt_x_f_v.h new file mode 100644 index 0000000000..ebd99c0cd1 --- /dev/null +++ b/riscv/insns/vfwcvt_x_f_v.h @@ -0,0 +1,23 @@ +// vfwcvt.x.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_i32(vs2, STATE.frm, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_i64(vs2, STATE.frm, true); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +true, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfwcvt_xu_f_v.h b/riscv/insns/vfwcvt_xu_f_v.h new file mode 100644 index 0000000000..55036f6c55 --- /dev/null +++ b/riscv/insns/vfwcvt_xu_f_v.h @@ -0,0 +1,23 @@ +// vfwcvt.xu.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_ui32(vs2, STATE.frm, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_ui64(vs2, STATE.frm, true); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +true, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfwmacc_vf.h b/riscv/insns/vfwmacc_vf.h new file mode 100644 index 0000000000..441fa0a791 --- /dev/null +++ b/riscv/insns/vfwmacc_vf.h @@ -0,0 +1,8 @@ +// vfwmacc.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_mulAdd(rs1, vs2, vd); +}, +{ + vd = f64_mulAdd(rs1, vs2, vd); +}) diff --git a/riscv/insns/vfwmacc_vv.h b/riscv/insns/vfwmacc_vv.h new file mode 100644 index 0000000000..a654198bfd --- /dev/null +++ b/riscv/insns/vfwmacc_vv.h @@ -0,0 +1,8 @@ +// vfwmacc.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_mulAdd(vs1, vs2, vd); +}, +{ + vd = f64_mulAdd(vs1, vs2, vd); +}) diff --git a/riscv/insns/vfwmsac_vf.h b/riscv/insns/vfwmsac_vf.h new file mode 100644 index 0000000000..18010ff490 --- /dev/null +++ b/riscv/insns/vfwmsac_vf.h @@ -0,0 +1,8 @@ +// vfwmsac.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(rs1, vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfwmsac_vv.h b/riscv/insns/vfwmsac_vv.h new file mode 100644 index 0000000000..9dc4073fef --- /dev/null +++ b/riscv/insns/vfwmsac_vv.h @@ -0,0 +1,8 @@ +// vfwmsac.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(vs1, vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfwmul_vf.h b/riscv/insns/vfwmul_vf.h new file mode 100644 index 0000000000..2bb543f63a --- /dev/null +++ b/riscv/insns/vfwmul_vf.h @@ -0,0 +1,8 @@ +// vfwmul.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_mul(vs2, rs1); +}, +{ + vd = f64_mul(vs2, rs1); +}) diff --git a/riscv/insns/vfwmul_vv.h b/riscv/insns/vfwmul_vv.h new file mode 100644 index 0000000000..2ce38e62c1 --- /dev/null +++ b/riscv/insns/vfwmul_vv.h @@ -0,0 +1,8 @@ +// vfwmul.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_mul(vs2, vs1); +}, +{ + vd = f64_mul(vs2, vs1); +}) diff --git a/riscv/insns/vfwnmacc_vf.h b/riscv/insns/vfwnmacc_vf.h new file mode 100644 index 0000000000..038bda08ca --- /dev/null +++ b/riscv/insns/vfwnmacc_vf.h @@ -0,0 +1,8 @@ +// vfwnmacc.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_mulAdd(f32(rs1.v ^ F32_SIGN), vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfwnmacc_vv.h b/riscv/insns/vfwnmacc_vv.h new file mode 100644 index 0000000000..bf863e04c2 --- /dev/null +++ b/riscv/insns/vfwnmacc_vv.h @@ -0,0 +1,8 @@ +// vfwnmacc.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfwnmsac_vf.h b/riscv/insns/vfwnmsac_vf.h new file mode 100644 index 0000000000..1e288e1b91 --- /dev/null +++ b/riscv/insns/vfwnmsac_vf.h @@ -0,0 +1,8 @@ +// vfwnmacc.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_mulAdd(f32(rs1.v ^ F32_SIGN), vs2, vd); +}, +{ + vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, vd); +}) diff --git a/riscv/insns/vfwnmsac_vv.h b/riscv/insns/vfwnmsac_vv.h new file mode 100644 index 0000000000..ce97749e1c --- /dev/null +++ b/riscv/insns/vfwnmsac_vv.h @@ -0,0 +1,8 @@ +// vfwnmsac.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd); +}, +{ + vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, vd); +}) diff --git a/riscv/insns/vfwredosum_vs.h b/riscv/insns/vfwredosum_vs.h new file mode 100644 index 0000000000..1f42d8ff2f --- /dev/null +++ b/riscv/insns/vfwredosum_vs.h @@ -0,0 +1,9 @@ +// vfwredosum.vs vd, vs2, vs1 +bool is_propagate = false; +VI_VFP_VV_LOOP_WIDE_REDUCTION +({ + vd_0 = f32_add(vd_0, vs2); +}, +{ + vd_0 = f64_add(vd_0, vs2); +}) diff --git a/riscv/insns/vfwredsum_vs.h b/riscv/insns/vfwredsum_vs.h new file mode 100644 index 0000000000..4ef28969dc --- /dev/null +++ b/riscv/insns/vfwredsum_vs.h @@ -0,0 +1,9 @@ +// vfwredsum.vs vd, vs2, vs1 +bool is_propagate = true; +VI_VFP_VV_LOOP_WIDE_REDUCTION +({ + vd_0 = f32_add(vd_0, vs2); +}, +{ + vd_0 = f64_add(vd_0, vs2); +}) diff --git a/riscv/insns/vfwsub_vf.h b/riscv/insns/vfwsub_vf.h new file mode 100644 index 0000000000..8c37688419 --- /dev/null +++ b/riscv/insns/vfwsub_vf.h @@ -0,0 +1,8 @@ +// vfwsub.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_sub(vs2, rs1); +}, +{ + vd = f64_sub(vs2, rs1); +}) diff --git a/riscv/insns/vfwsub_vv.h b/riscv/insns/vfwsub_vv.h new file mode 100644 index 0000000000..ce08e36af7 --- /dev/null +++ b/riscv/insns/vfwsub_vv.h @@ -0,0 +1,8 @@ +// vfwsub.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_sub(vs2, vs1); +}, +{ + vd = f64_sub(vs2, vs1); +}) diff --git a/riscv/insns/vfwsub_wf.h b/riscv/insns/vfwsub_wf.h new file mode 100644 index 0000000000..f6f47ca5cf --- /dev/null +++ b/riscv/insns/vfwsub_wf.h @@ -0,0 +1,8 @@ +// vfwsub.wf vd, vs2, rs1 +VI_VFP_WF_LOOP_WIDE +({ + vd = f32_sub(vs2, rs1); +}, +{ + vd = f64_sub(vs2, rs1); +}) diff --git a/riscv/insns/vfwsub_wv.h b/riscv/insns/vfwsub_wv.h new file mode 100644 index 0000000000..eef904dcc2 --- /dev/null +++ b/riscv/insns/vfwsub_wv.h @@ -0,0 +1,8 @@ +// vfwsub.wv vd, vs2, vs1 +VI_VFP_WV_LOOP_WIDE +({ + vd = f32_sub(vs2, vs1); +}, +{ + vd = f64_sub(vs2, vs1); +}) diff --git a/riscv/insns/vid_v.h b/riscv/insns/vid_v.h new file mode 100644 index 0000000000..012d124a43 --- /dev/null +++ b/riscv/insns/vid_v.h @@ -0,0 +1,31 @@ +// vmpopc rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); +require_align(rd_num, P.VU.vflmul); +require_vm; + +for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) { + VI_LOOP_ELEMENT_SKIP(); + + switch (sew) { + case e8: + P.VU.elt(rd_num, i, true) = i; + break; + case e16: + P.VU.elt(rd_num, i, true) = i; + break; + case e32: + P.VU.elt(rd_num, i, true) = i; + break; + default: + P.VU.elt(rd_num, i, true) = i; + break; + } +} + +P.VU.vstart = 0; diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h new file mode 100644 index 0000000000..a4368254e3 --- /dev/null +++ b/riscv/insns/viota_m.h @@ -0,0 +1,53 @@ +// vmpopc rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); +require(P.VU.vstart == 0); +require_vm; +require_align(rd_num, P.VU.vflmul); +require_noover(rd_num, P.VU.vflmul, rs2_num, 1); + +int cnt = 0; +for (reg_t i = 0; i < vl; ++i) { + const int midx = i / 64; + const int mpos = i % 64; + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx) >> mpos) & 0x1) == 1; + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + + bool has_one = false; + if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { + if (vs2_lsb) { + has_one = true; + } + } + + bool use_ori = (insn.v_vm() == 0) && !do_mask; + switch (sew) { + case e8: + P.VU.elt(rd_num, i, true) = use_ori ? + P.VU.elt(rd_num, i) : cnt; + break; + case e16: + P.VU.elt(rd_num, i, true) = use_ori ? + P.VU.elt(rd_num, i) : cnt; + break; + case e32: + P.VU.elt(rd_num, i, true) = use_ori ? + P.VU.elt(rd_num, i) : cnt; + break; + default: + P.VU.elt(rd_num, i, true) = use_ori ? + P.VU.elt(rd_num, i) : cnt; + break; + } + + if (has_one) { + cnt++; + } +} + diff --git a/riscv/insns/vl1re16_v.h b/riscv/insns/vl1re16_v.h new file mode 100644 index 0000000000..220e83e6bb --- /dev/null +++ b/riscv/insns/vl1re16_v.h @@ -0,0 +1,2 @@ +// vl1re16.v vd, (rs1) +VI_LD_WHOLE(uint16); diff --git a/riscv/insns/vl1re32_v.h b/riscv/insns/vl1re32_v.h new file mode 100644 index 0000000000..e72ca02a3d --- /dev/null +++ b/riscv/insns/vl1re32_v.h @@ -0,0 +1,2 @@ +// vl1re32.v vd, (rs1) +VI_LD_WHOLE(uint32); diff --git a/riscv/insns/vl1re64_v.h b/riscv/insns/vl1re64_v.h new file mode 100644 index 0000000000..265701a06f --- /dev/null +++ b/riscv/insns/vl1re64_v.h @@ -0,0 +1,2 @@ +// vl1re64.v vd, (rs1) +VI_LD_WHOLE(uint64); diff --git a/riscv/insns/vl1re8_v.h b/riscv/insns/vl1re8_v.h new file mode 100644 index 0000000000..b4ce661688 --- /dev/null +++ b/riscv/insns/vl1re8_v.h @@ -0,0 +1,2 @@ +// vl1re8.v vd, (rs1) +VI_LD_WHOLE(uint8); diff --git a/riscv/insns/vl2re16_v.h b/riscv/insns/vl2re16_v.h new file mode 100644 index 0000000000..2846edd980 --- /dev/null +++ b/riscv/insns/vl2re16_v.h @@ -0,0 +1,2 @@ +// vl2e16.v vd, (rs1) +VI_LD_WHOLE(uint16); diff --git a/riscv/insns/vl2re32_v.h b/riscv/insns/vl2re32_v.h new file mode 100644 index 0000000000..5cea835524 --- /dev/null +++ b/riscv/insns/vl2re32_v.h @@ -0,0 +1,2 @@ +// vl2re32.v vd, (rs1) +VI_LD_WHOLE(uint32); diff --git a/riscv/insns/vl2re64_v.h b/riscv/insns/vl2re64_v.h new file mode 100644 index 0000000000..efdf2ce2ac --- /dev/null +++ b/riscv/insns/vl2re64_v.h @@ -0,0 +1,2 @@ +// vl2re64.v vd, (rs1) +VI_LD_WHOLE(uint64); diff --git a/riscv/insns/vl2re8_v.h b/riscv/insns/vl2re8_v.h new file mode 100644 index 0000000000..fcc3c4c057 --- /dev/null +++ b/riscv/insns/vl2re8_v.h @@ -0,0 +1,2 @@ +// vl2re8.v vd, (rs1) +VI_LD_WHOLE(uint8); diff --git a/riscv/insns/vl4re16_v.h b/riscv/insns/vl4re16_v.h new file mode 100644 index 0000000000..0363418319 --- /dev/null +++ b/riscv/insns/vl4re16_v.h @@ -0,0 +1,2 @@ +// vl4re16.v vd, (rs1) +VI_LD_WHOLE(uint16); diff --git a/riscv/insns/vl4re32_v.h b/riscv/insns/vl4re32_v.h new file mode 100644 index 0000000000..e37cc1ab7b --- /dev/null +++ b/riscv/insns/vl4re32_v.h @@ -0,0 +1,2 @@ +// vl4re32.v vd, (rs1) +VI_LD_WHOLE(uint32); diff --git a/riscv/insns/vl4re64_v.h b/riscv/insns/vl4re64_v.h new file mode 100644 index 0000000000..11486f5d1e --- /dev/null +++ b/riscv/insns/vl4re64_v.h @@ -0,0 +1,2 @@ +// vl4re64.v vd, (rs1) +VI_LD_WHOLE(uint64); diff --git a/riscv/insns/vl4re8_v.h b/riscv/insns/vl4re8_v.h new file mode 100644 index 0000000000..f9ce3ff7c7 --- /dev/null +++ b/riscv/insns/vl4re8_v.h @@ -0,0 +1,2 @@ +// vl4re8.v vd, (rs1) +VI_LD_WHOLE(uint8); diff --git a/riscv/insns/vl8re16_v.h b/riscv/insns/vl8re16_v.h new file mode 100644 index 0000000000..0b3f1413ba --- /dev/null +++ b/riscv/insns/vl8re16_v.h @@ -0,0 +1,2 @@ +// vl8re16.v vd, (rs1) +VI_LD_WHOLE(uint16); diff --git a/riscv/insns/vl8re32_v.h b/riscv/insns/vl8re32_v.h new file mode 100644 index 0000000000..3372b89d05 --- /dev/null +++ b/riscv/insns/vl8re32_v.h @@ -0,0 +1,2 @@ +// vl8re32.v vd, (rs1) +VI_LD_WHOLE(uint32); diff --git a/riscv/insns/vl8re64_v.h b/riscv/insns/vl8re64_v.h new file mode 100644 index 0000000000..f9a9ca981f --- /dev/null +++ b/riscv/insns/vl8re64_v.h @@ -0,0 +1,2 @@ +// vl8re64.v vd, (rs1) +VI_LD_WHOLE(uint64); diff --git a/riscv/insns/vl8re8_v.h b/riscv/insns/vl8re8_v.h new file mode 100644 index 0000000000..ee05e81a9d --- /dev/null +++ b/riscv/insns/vl8re8_v.h @@ -0,0 +1,2 @@ +// vl8re8.v vd, (rs1) +VI_LD_WHOLE(uint8); diff --git a/riscv/insns/vle16_v.h b/riscv/insns/vle16_v.h new file mode 100644 index 0000000000..7bd2e837af --- /dev/null +++ b/riscv/insns/vle16_v.h @@ -0,0 +1,2 @@ +// vle16.v and vlseg[2-8]e16.v +VI_LD(0, (i * nf + fn), int16); diff --git a/riscv/insns/vle16ff_v.h b/riscv/insns/vle16ff_v.h new file mode 100644 index 0000000000..53c8889137 --- /dev/null +++ b/riscv/insns/vle16ff_v.h @@ -0,0 +1,2 @@ +// vle16ff.v and vlseg[2-8]e16ff.v +VI_LDST_FF(int16); diff --git a/riscv/insns/vle32_v.h b/riscv/insns/vle32_v.h new file mode 100644 index 0000000000..9399fd621f --- /dev/null +++ b/riscv/insns/vle32_v.h @@ -0,0 +1,2 @@ +// vle32.v and vlseg[2-8]e32.v +VI_LD(0, (i * nf + fn), int32); diff --git a/riscv/insns/vle32ff_v.h b/riscv/insns/vle32ff_v.h new file mode 100644 index 0000000000..7d03d7ddd5 --- /dev/null +++ b/riscv/insns/vle32ff_v.h @@ -0,0 +1,2 @@ +// vle32ff.v and vlseg[2-8]e32ff.v +VI_LDST_FF(int32); diff --git a/riscv/insns/vle64_v.h b/riscv/insns/vle64_v.h new file mode 100644 index 0000000000..3f2654dd8b --- /dev/null +++ b/riscv/insns/vle64_v.h @@ -0,0 +1,2 @@ +// vle64.v and vlseg[2-8]e64.v +VI_LD(0, (i * nf + fn), int64); diff --git a/riscv/insns/vle64ff_v.h b/riscv/insns/vle64ff_v.h new file mode 100644 index 0000000000..39996da6f1 --- /dev/null +++ b/riscv/insns/vle64ff_v.h @@ -0,0 +1,2 @@ +// vle64ff.v and vlseg[2-8]e64ff.v +VI_LDST_FF(int64); diff --git a/riscv/insns/vle8_v.h b/riscv/insns/vle8_v.h new file mode 100644 index 0000000000..5613a1dd3e --- /dev/null +++ b/riscv/insns/vle8_v.h @@ -0,0 +1,2 @@ +// vle8.v and vlseg[2-8]e8.v +VI_LD(0, (i * nf + fn), int8); diff --git a/riscv/insns/vle8ff_v.h b/riscv/insns/vle8ff_v.h new file mode 100644 index 0000000000..b56d1d339c --- /dev/null +++ b/riscv/insns/vle8ff_v.h @@ -0,0 +1,2 @@ +// vle8ff.v and vlseg[2-8]e8ff.v +VI_LDST_FF(int8); diff --git a/riscv/insns/vlse16_v.h b/riscv/insns/vlse16_v.h new file mode 100644 index 0000000000..7622ded97d --- /dev/null +++ b/riscv/insns/vlse16_v.h @@ -0,0 +1,2 @@ +// vlse16.v and vlsseg[2-8]e16.v +VI_LD(i * RS2, fn, int16); diff --git a/riscv/insns/vlse32_v.h b/riscv/insns/vlse32_v.h new file mode 100644 index 0000000000..1afc5e9cf4 --- /dev/null +++ b/riscv/insns/vlse32_v.h @@ -0,0 +1,2 @@ +// vlse32.v and vlsseg[2-8]e32.v +VI_LD(i * RS2, fn, int32); diff --git a/riscv/insns/vlse64_v.h b/riscv/insns/vlse64_v.h new file mode 100644 index 0000000000..c6d999955e --- /dev/null +++ b/riscv/insns/vlse64_v.h @@ -0,0 +1,2 @@ +// vlse64.v and vlsseg[2-8]e64.v +VI_LD(i * RS2, fn, int64); diff --git a/riscv/insns/vlse8_v.h b/riscv/insns/vlse8_v.h new file mode 100644 index 0000000000..021a1fbcb4 --- /dev/null +++ b/riscv/insns/vlse8_v.h @@ -0,0 +1,2 @@ +// vlse8.v and vlsseg[2-8]e8.v +VI_LD(i * RS2, fn, int8); diff --git a/riscv/insns/vlxei16_v.h b/riscv/insns/vlxei16_v.h new file mode 100644 index 0000000000..6e4ed49b57 --- /dev/null +++ b/riscv/insns/vlxei16_v.h @@ -0,0 +1,2 @@ +// vlxei16.v and vlxseg[2-8]e16.v +VI_LD_INDEX(e16, true); diff --git a/riscv/insns/vlxei32_v.h b/riscv/insns/vlxei32_v.h new file mode 100644 index 0000000000..a7da8ff035 --- /dev/null +++ b/riscv/insns/vlxei32_v.h @@ -0,0 +1,2 @@ +// vlxe32.v and vlxseg[2-8]ei32.v +VI_LD_INDEX(e32, true); diff --git a/riscv/insns/vlxei64_v.h b/riscv/insns/vlxei64_v.h new file mode 100644 index 0000000000..067224e4c5 --- /dev/null +++ b/riscv/insns/vlxei64_v.h @@ -0,0 +1,3 @@ +// vlxei64.v and vlxseg[2-8]ei64.v +VI_LD_INDEX(e64, true); + diff --git a/riscv/insns/vlxei8_v.h b/riscv/insns/vlxei8_v.h new file mode 100644 index 0000000000..d27304996b --- /dev/null +++ b/riscv/insns/vlxei8_v.h @@ -0,0 +1,2 @@ +// vlxei8.v and vlxseg[2-8]ei8.v +VI_LD_INDEX(e8, true); diff --git a/riscv/insns/vmacc_vv.h b/riscv/insns/vmacc_vv.h new file mode 100644 index 0000000000..e6ec93ff71 --- /dev/null +++ b/riscv/insns/vmacc_vv.h @@ -0,0 +1,5 @@ +// vmacc.vv: vd[i] = +(vs1[i] * vs2[i]) + vd[i] +VI_VV_LOOP +({ + vd = vs1 * vs2 + vd; +}) diff --git a/riscv/insns/vmacc_vx.h b/riscv/insns/vmacc_vx.h new file mode 100644 index 0000000000..d40b264a05 --- /dev/null +++ b/riscv/insns/vmacc_vx.h @@ -0,0 +1,5 @@ +// vmacc.vx: vd[i] = +(x[rs1] * vs2[i]) + vd[i] +VI_VX_LOOP +({ + vd = rs1 * vs2 + vd; +}) diff --git a/riscv/insns/vmadc_vim.h b/riscv/insns/vmadc_vim.h new file mode 100644 index 0000000000..afdca7e3c5 --- /dev/null +++ b/riscv/insns/vmadc_vim.h @@ -0,0 +1,13 @@ +// vmadc.vim vd, vs2, simm5 +VI_XI_LOOP_CARRY +({ + auto v0 = P.VU.elt(0, midx); + const uint64_t mmask = UINT64_C(1) << mpos; \ + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; + + uint128_t res = (op_mask & simm5) + (op_mask & vs2) + carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmadc_vvm.h b/riscv/insns/vmadc_vvm.h new file mode 100644 index 0000000000..a5d54c6dff --- /dev/null +++ b/riscv/insns/vmadc_vvm.h @@ -0,0 +1,13 @@ +// vmadc.vvm vd, vs2, rs1 +VI_VV_LOOP_CARRY +({ + auto v0 = P.VU.elt(0, midx); + const uint64_t mmask = UINT64_C(1) << mpos; \ + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; + + uint128_t res = (op_mask & vs1) + (op_mask & vs2) + carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmadc_vxm.h b/riscv/insns/vmadc_vxm.h new file mode 100644 index 0000000000..ca0342e0ce --- /dev/null +++ b/riscv/insns/vmadc_vxm.h @@ -0,0 +1,13 @@ +// vadc.vx vd, vs2, rs1 +VI_XI_LOOP_CARRY +({ + auto v0 = P.VU.elt(0, midx); + const uint64_t mmask = UINT64_C(1) << mpos; \ + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; + + uint128_t res = (op_mask & rs1) + (op_mask & vs2) + carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmadd_vv.h b/riscv/insns/vmadd_vv.h new file mode 100644 index 0000000000..a1c0d2ed64 --- /dev/null +++ b/riscv/insns/vmadd_vv.h @@ -0,0 +1,5 @@ +// vmadd: vd[i] = (vd[i] * vs1[i]) + vs2[i] +VI_VV_LOOP +({ + vd = vd * vs1 + vs2; +}) diff --git a/riscv/insns/vmadd_vx.h b/riscv/insns/vmadd_vx.h new file mode 100644 index 0000000000..1a8a001593 --- /dev/null +++ b/riscv/insns/vmadd_vx.h @@ -0,0 +1,5 @@ +// vmadd: vd[i] = (vd[i] * x[rs1]) + vs2[i] +VI_VX_LOOP +({ + vd = vd * rs1 + vs2; +}) diff --git a/riscv/insns/vmand_mm.h b/riscv/insns/vmand_mm.h new file mode 100644 index 0000000000..04615c60fc --- /dev/null +++ b/riscv/insns/vmand_mm.h @@ -0,0 +1,2 @@ +// vmand.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 & vs1); diff --git a/riscv/insns/vmandnot_mm.h b/riscv/insns/vmandnot_mm.h new file mode 100644 index 0000000000..4c26469c7e --- /dev/null +++ b/riscv/insns/vmandnot_mm.h @@ -0,0 +1,2 @@ +// vmandnot.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 & ~vs1); diff --git a/riscv/insns/vmax_vv.h b/riscv/insns/vmax_vv.h new file mode 100644 index 0000000000..b9f15c5f18 --- /dev/null +++ b/riscv/insns/vmax_vv.h @@ -0,0 +1,10 @@ +// vmax.vv vd, vs2, vs1, vm # Vector-vector +VI_VV_LOOP +({ + if (vs1 >= vs2) { + vd = vs1; + } else { + vd = vs2; + } + +}) diff --git a/riscv/insns/vmax_vx.h b/riscv/insns/vmax_vx.h new file mode 100644 index 0000000000..06f3f43160 --- /dev/null +++ b/riscv/insns/vmax_vx.h @@ -0,0 +1,10 @@ +// vmax.vx vd, vs2, rs1, vm # vector-scalar +VI_VX_LOOP +({ + if (rs1 >= vs2) { + vd = rs1; + } else { + vd = vs2; + } + +}) diff --git a/riscv/insns/vmaxu_vv.h b/riscv/insns/vmaxu_vv.h new file mode 100644 index 0000000000..4e6868d19e --- /dev/null +++ b/riscv/insns/vmaxu_vv.h @@ -0,0 +1,9 @@ +// vmaxu.vv vd, vs2, vs1, vm # Vector-vector +VI_VV_ULOOP +({ + if (vs1 >= vs2) { + vd = vs1; + } else { + vd = vs2; + } +}) diff --git a/riscv/insns/vmaxu_vx.h b/riscv/insns/vmaxu_vx.h new file mode 100644 index 0000000000..cab89188f7 --- /dev/null +++ b/riscv/insns/vmaxu_vx.h @@ -0,0 +1,9 @@ +// vmaxu.vx vd, vs2, rs1, vm # vector-scalar +VI_VX_ULOOP +({ + if (rs1 >= vs2) { + vd = rs1; + } else { + vd = vs2; + } +}) diff --git a/riscv/insns/vmerge_vim.h b/riscv/insns/vmerge_vim.h new file mode 100644 index 0000000000..fd6ae1cdc4 --- /dev/null +++ b/riscv/insns/vmerge_vim.h @@ -0,0 +1,11 @@ +// vmerge.vim vd, vs2, simm5 +require_vector(true); +VI_CHECK_SSS(false); +VI_VVXI_MERGE_LOOP +({ + int midx = i / 64; + int mpos = i % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? simm5 : vs2; +}) diff --git a/riscv/insns/vmerge_vvm.h b/riscv/insns/vmerge_vvm.h new file mode 100644 index 0000000000..df416b2c78 --- /dev/null +++ b/riscv/insns/vmerge_vvm.h @@ -0,0 +1,11 @@ +// vmerge.vvm vd, vs2, vs1 +require_vector(true); +VI_CHECK_SSS(true); +VI_VVXI_MERGE_LOOP +({ + int midx = i / 64; + int mpos = i % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? vs1 : vs2; +}) diff --git a/riscv/insns/vmerge_vxm.h b/riscv/insns/vmerge_vxm.h new file mode 100644 index 0000000000..122a7b733e --- /dev/null +++ b/riscv/insns/vmerge_vxm.h @@ -0,0 +1,11 @@ +// vmerge.vxm vd, vs2, rs1 +require_vector(true); +VI_CHECK_SSS(false); +VI_VVXI_MERGE_LOOP +({ + int midx = i / 64; + int mpos = i % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? rs1 : vs2; +}) diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h new file mode 100644 index 0000000000..040f2b0b9d --- /dev/null +++ b/riscv/insns/vmfeq_vf.h @@ -0,0 +1,12 @@ +// vmfeq.vf vd, vs2, fs1 +VI_VFP_LOOP_CMP +({ + res = f16_eq(vs2, rs1); +}, +{ + res = f32_eq(vs2, rs1); +}, +{ + res = f64_eq(vs2, rs1); +}, +false) diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h new file mode 100644 index 0000000000..fb24d1329c --- /dev/null +++ b/riscv/insns/vmfeq_vv.h @@ -0,0 +1,12 @@ +// vmfeq.vv vd, vs2, vs1 +VI_VFP_LOOP_CMP +({ + res = f16_eq(vs2, vs1); +}, +{ + res = f32_eq(vs2, vs1); +}, +{ + res = f64_eq(vs2, vs1); +}, +true) diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h new file mode 100644 index 0000000000..9e69855b51 --- /dev/null +++ b/riscv/insns/vmfge_vf.h @@ -0,0 +1,12 @@ +// vmfge.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f16_le(rs1, vs2); +}, +{ + res = f32_le(rs1, vs2); +}, +{ + res = f64_le(rs1, vs2); +}, +false) diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h new file mode 100644 index 0000000000..bd5d99b70d --- /dev/null +++ b/riscv/insns/vmfgt_vf.h @@ -0,0 +1,12 @@ +// vmfgt.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f16_lt(rs1, vs2); +}, +{ + res = f32_lt(rs1, vs2); +}, +{ + res = f64_lt(rs1, vs2); +}, +false) diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h new file mode 100644 index 0000000000..3d2852fca9 --- /dev/null +++ b/riscv/insns/vmfle_vf.h @@ -0,0 +1,12 @@ +// vmfle.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f16_le(vs2, rs1); +}, +{ + res = f32_le(vs2, rs1); +}, +{ + res = f64_le(vs2, rs1); +}, +false) diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h new file mode 100644 index 0000000000..203ef210ff --- /dev/null +++ b/riscv/insns/vmfle_vv.h @@ -0,0 +1,12 @@ +// vmfle.vv vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f16_le(vs2, vs1); +}, +{ + res = f32_le(vs2, vs1); +}, +{ + res = f64_le(vs2, vs1); +}, +true) diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h new file mode 100644 index 0000000000..4780adc556 --- /dev/null +++ b/riscv/insns/vmflt_vf.h @@ -0,0 +1,12 @@ +// vmflt.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f16_lt(vs2, rs1); +}, +{ + res = f32_lt(vs2, rs1); +}, +{ + res = f64_lt(vs2, rs1); +}, +false) diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h new file mode 100644 index 0000000000..cdfc3fae90 --- /dev/null +++ b/riscv/insns/vmflt_vv.h @@ -0,0 +1,12 @@ +// vmflt.vv vd, vs2, vs1 +VI_VFP_LOOP_CMP +({ + res = f16_lt(vs2, vs1); +}, +{ + res = f32_lt(vs2, vs1); +}, +{ + res = f64_lt(vs2, vs1); +}, +true) diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h new file mode 100644 index 0000000000..84016993ae --- /dev/null +++ b/riscv/insns/vmfne_vf.h @@ -0,0 +1,12 @@ +// vmfne.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = !f16_eq(vs2, rs1); +}, +{ + res = !f32_eq(vs2, rs1); +}, +{ + res = !f64_eq(vs2, rs1); +}, +false) diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h new file mode 100644 index 0000000000..50dfa9c047 --- /dev/null +++ b/riscv/insns/vmfne_vv.h @@ -0,0 +1,12 @@ +// vmfne.vv vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = !f16_eq(vs2, vs1); +}, +{ + res = !f32_eq(vs2, vs1); +}, +{ + res = !f64_eq(vs2, vs1); +}, +true) diff --git a/riscv/insns/vmin_vv.h b/riscv/insns/vmin_vv.h new file mode 100644 index 0000000000..21da0b3c5b --- /dev/null +++ b/riscv/insns/vmin_vv.h @@ -0,0 +1,11 @@ +// vmin.vv vd, vs2, vs1, vm # Vector-vector +VI_VV_LOOP +({ + if (vs1 <= vs2) { + vd = vs1; + } else { + vd = vs2; + } + + +}) diff --git a/riscv/insns/vmin_vx.h b/riscv/insns/vmin_vx.h new file mode 100644 index 0000000000..3291776d05 --- /dev/null +++ b/riscv/insns/vmin_vx.h @@ -0,0 +1,11 @@ +// vminx.vx vd, vs2, rs1, vm # vector-scalar +VI_VX_LOOP +({ + if (rs1 <= vs2) { + vd = rs1; + } else { + vd = vs2; + } + + +}) diff --git a/riscv/insns/vminu_vv.h b/riscv/insns/vminu_vv.h new file mode 100644 index 0000000000..c0ab1958d0 --- /dev/null +++ b/riscv/insns/vminu_vv.h @@ -0,0 +1,9 @@ +// vminu.vv vd, vs2, vs1, vm # Vector-vector +VI_VV_ULOOP +({ + if (vs1 <= vs2) { + vd = vs1; + } else { + vd = vs2; + } +}) diff --git a/riscv/insns/vminu_vx.h b/riscv/insns/vminu_vx.h new file mode 100644 index 0000000000..1055895ac3 --- /dev/null +++ b/riscv/insns/vminu_vx.h @@ -0,0 +1,10 @@ +// vminu.vx vd, vs2, rs1, vm # vector-scalar +VI_VX_ULOOP +({ + if (rs1 <= vs2) { + vd = rs1; + } else { + vd = vs2; + } + +}) diff --git a/riscv/insns/vmnand_mm.h b/riscv/insns/vmnand_mm.h new file mode 100644 index 0000000000..5a3ab090ae --- /dev/null +++ b/riscv/insns/vmnand_mm.h @@ -0,0 +1,2 @@ +// vmnand.mm vd, vs2, vs1 +VI_LOOP_MASK(~(vs2 & vs1)); diff --git a/riscv/insns/vmnor_mm.h b/riscv/insns/vmnor_mm.h new file mode 100644 index 0000000000..ab933786c9 --- /dev/null +++ b/riscv/insns/vmnor_mm.h @@ -0,0 +1,2 @@ +// vmnor.mm vd, vs2, vs1 +VI_LOOP_MASK(~(vs2 | vs1)); diff --git a/riscv/insns/vmor_mm.h b/riscv/insns/vmor_mm.h new file mode 100644 index 0000000000..32e71b934a --- /dev/null +++ b/riscv/insns/vmor_mm.h @@ -0,0 +1,2 @@ +// vmor.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 | vs1); diff --git a/riscv/insns/vmornot_mm.h b/riscv/insns/vmornot_mm.h new file mode 100644 index 0000000000..bdc1d8b6e7 --- /dev/null +++ b/riscv/insns/vmornot_mm.h @@ -0,0 +1,2 @@ +// vmornot.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 | ~vs1); diff --git a/riscv/insns/vmsbc_vvm.h b/riscv/insns/vmsbc_vvm.h new file mode 100644 index 0000000000..ff95464d05 --- /dev/null +++ b/riscv/insns/vmsbc_vvm.h @@ -0,0 +1,13 @@ +// vmsbc.vvm vd, vs2, rs1 +VI_VV_LOOP_CARRY +({ + auto v0 = P.VU.elt(0, midx); + const uint64_t mmask = UINT64_C(1) << mpos; + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; + + uint128_t res = (op_mask & vs2) - (op_mask & vs1) - carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmsbc_vxm.h b/riscv/insns/vmsbc_vxm.h new file mode 100644 index 0000000000..29fa012af9 --- /dev/null +++ b/riscv/insns/vmsbc_vxm.h @@ -0,0 +1,13 @@ +// vmsbc.vxm vd, vs2, rs1 +VI_XI_LOOP_CARRY +({ + auto &v0 = P.VU.elt(0, midx); + const uint64_t mmask = UINT64_C(1) << mpos; \ + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; + + uint128_t res = (op_mask & vs2) - (op_mask & rs1) - carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h new file mode 100644 index 0000000000..a4195cfd08 --- /dev/null +++ b/riscv/insns/vmsbf_m.h @@ -0,0 +1,32 @@ +// vmsbf.m vd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +require(P.VU.vstart == 0); +require_vm; +require(insn.rd() != insn.rs2()); + +reg_t vl = P.VU.vl; +reg_t rd_num = insn.rd(); +reg_t rs2_num = insn.rs2(); + +bool has_one = false; +for (reg_t i = P.VU.vstart; i < vl; ++i) { + const int midx = i / 64; + const int mpos = i % 64; + const uint64_t mmask = UINT64_C(1) << mpos; \ + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx) >> mpos) & 0x1) == 1; + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + + + if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { + auto &vd = P.VU.elt(rd_num, midx, true); + uint64_t res = 0; + if (!has_one && !vs2_lsb) { + res = 1; + } else if(!has_one && vs2_lsb) { + has_one = true; + } + vd = (vd & ~mmask) | ((res << mpos) & mmask); + } +} diff --git a/riscv/insns/vmseq_vi.h b/riscv/insns/vmseq_vi.h new file mode 100644 index 0000000000..cfc16825f2 --- /dev/null +++ b/riscv/insns/vmseq_vi.h @@ -0,0 +1,5 @@ +// vseq.vi vd, vs2, simm5 +VI_VI_LOOP_CMP +({ + res = simm5 == vs2; +}) diff --git a/riscv/insns/vmseq_vv.h b/riscv/insns/vmseq_vv.h new file mode 100644 index 0000000000..91fd204a50 --- /dev/null +++ b/riscv/insns/vmseq_vv.h @@ -0,0 +1,6 @@ +// vseq.vv vd, vs2, vs1 +VI_VV_LOOP_CMP +({ + res = vs2 == vs1; +}) + diff --git a/riscv/insns/vmseq_vx.h b/riscv/insns/vmseq_vx.h new file mode 100644 index 0000000000..ab63323134 --- /dev/null +++ b/riscv/insns/vmseq_vx.h @@ -0,0 +1,5 @@ +// vseq.vx vd, vs2, rs1 +VI_VX_LOOP_CMP +({ + res = rs1 == vs2; +}) diff --git a/riscv/insns/vmsgt_vi.h b/riscv/insns/vmsgt_vi.h new file mode 100644 index 0000000000..4f7dea8e4b --- /dev/null +++ b/riscv/insns/vmsgt_vi.h @@ -0,0 +1,5 @@ +// vsgt.vi vd, vs2, simm5 +VI_VI_LOOP_CMP +({ + res = vs2 > simm5; +}) diff --git a/riscv/insns/vmsgt_vx.h b/riscv/insns/vmsgt_vx.h new file mode 100644 index 0000000000..5f24db6964 --- /dev/null +++ b/riscv/insns/vmsgt_vx.h @@ -0,0 +1,5 @@ +// vsgt.vx vd, vs2, rs1 +VI_VX_LOOP_CMP +({ + res = vs2 > rs1; +}) diff --git a/riscv/insns/vmsgtu_vi.h b/riscv/insns/vmsgtu_vi.h new file mode 100644 index 0000000000..be28fee1e7 --- /dev/null +++ b/riscv/insns/vmsgtu_vi.h @@ -0,0 +1,5 @@ +// vmsgtu.vi vd, vd2, simm5 +VI_VI_ULOOP_CMP +({ + res = vs2 > (insn.v_simm5() & (UINT64_MAX >> (64 - P.VU.vsew))); +}) diff --git a/riscv/insns/vmsgtu_vx.h b/riscv/insns/vmsgtu_vx.h new file mode 100644 index 0000000000..7f39800804 --- /dev/null +++ b/riscv/insns/vmsgtu_vx.h @@ -0,0 +1,5 @@ +// vsgtu.vx vd, vs2, rs1 +VI_VX_ULOOP_CMP +({ + res = vs2 > rs1; +}) diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h new file mode 100644 index 0000000000..a16ef681c4 --- /dev/null +++ b/riscv/insns/vmsif_m.h @@ -0,0 +1,32 @@ +// vmsif.m rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +require(P.VU.vstart == 0); +require_vm; +require(insn.rd() != insn.rs2()); + +reg_t vl = P.VU.vl; +reg_t rd_num = insn.rd(); +reg_t rs2_num = insn.rs2(); + +bool has_one = false; +for (reg_t i = P.VU.vstart ; i < vl; ++i) { + const int midx = i / 64; + const int mpos = i % 64; + const uint64_t mmask = UINT64_C(1) << mpos; \ + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx ) >> mpos) & 0x1) == 1; + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + + if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { + auto &vd = P.VU.elt(rd_num, midx, true); + uint64_t res = 0; + if (!has_one && !vs2_lsb) { + res = 1; + } else if(!has_one && vs2_lsb) { + has_one = true; + res = 1; + } + vd = (vd & ~mmask) | ((res << mpos) & mmask); + } +} diff --git a/riscv/insns/vmsle_vi.h b/riscv/insns/vmsle_vi.h new file mode 100644 index 0000000000..f0f67d0213 --- /dev/null +++ b/riscv/insns/vmsle_vi.h @@ -0,0 +1,5 @@ +// vsle.vi vd, vs2, simm5 +VI_VI_LOOP_CMP +({ + res = vs2 <= simm5; +}) diff --git a/riscv/insns/vmsle_vv.h b/riscv/insns/vmsle_vv.h new file mode 100644 index 0000000000..30aba06d41 --- /dev/null +++ b/riscv/insns/vmsle_vv.h @@ -0,0 +1,5 @@ +// vsle.vv vd, vs2, vs1 +VI_VV_LOOP_CMP +({ + res = vs2 <= vs1; +}) diff --git a/riscv/insns/vmsle_vx.h b/riscv/insns/vmsle_vx.h new file mode 100644 index 0000000000..c26d59692e --- /dev/null +++ b/riscv/insns/vmsle_vx.h @@ -0,0 +1,5 @@ +// vsle.vx vd, vs2, rs1 +VI_VX_LOOP_CMP +({ + res = vs2 <= rs1; +}) diff --git a/riscv/insns/vmsleu_vi.h b/riscv/insns/vmsleu_vi.h new file mode 100644 index 0000000000..0e66b781a0 --- /dev/null +++ b/riscv/insns/vmsleu_vi.h @@ -0,0 +1,5 @@ +// vmsleu.vi vd, vs2, simm5 +VI_VI_ULOOP_CMP +({ + res = vs2 <= (insn.v_simm5() & (UINT64_MAX >> (64 - P.VU.vsew))); +}) diff --git a/riscv/insns/vmsleu_vv.h b/riscv/insns/vmsleu_vv.h new file mode 100644 index 0000000000..0e460326f8 --- /dev/null +++ b/riscv/insns/vmsleu_vv.h @@ -0,0 +1,5 @@ +// vsleu.vv vd, vs2, vs1 +VI_VV_ULOOP_CMP +({ + res = vs2 <= vs1; +}) diff --git a/riscv/insns/vmsleu_vx.h b/riscv/insns/vmsleu_vx.h new file mode 100644 index 0000000000..935b17681c --- /dev/null +++ b/riscv/insns/vmsleu_vx.h @@ -0,0 +1,5 @@ +// vsleu.vx vd, vs2, rs1 +VI_VX_ULOOP_CMP +({ + res = vs2 <= rs1; +}) diff --git a/riscv/insns/vmslt_vv.h b/riscv/insns/vmslt_vv.h new file mode 100644 index 0000000000..71e6f87f1f --- /dev/null +++ b/riscv/insns/vmslt_vv.h @@ -0,0 +1,5 @@ +// vslt.vv vd, vd2, vs1 +VI_VV_LOOP_CMP +({ + res = vs2 < vs1; +}) diff --git a/riscv/insns/vmslt_vx.h b/riscv/insns/vmslt_vx.h new file mode 100644 index 0000000000..b32bb14537 --- /dev/null +++ b/riscv/insns/vmslt_vx.h @@ -0,0 +1,5 @@ +// vslt.vx vd, vs2, vs1 +VI_VX_LOOP_CMP +({ + res = vs2 < rs1; +}) diff --git a/riscv/insns/vmsltu_vv.h b/riscv/insns/vmsltu_vv.h new file mode 100644 index 0000000000..53a570ae9d --- /dev/null +++ b/riscv/insns/vmsltu_vv.h @@ -0,0 +1,5 @@ +// vsltu.vv vd, vs2, vs1 +VI_VV_ULOOP_CMP +({ + res = vs2 < vs1; +}) diff --git a/riscv/insns/vmsltu_vx.h b/riscv/insns/vmsltu_vx.h new file mode 100644 index 0000000000..8082544876 --- /dev/null +++ b/riscv/insns/vmsltu_vx.h @@ -0,0 +1,5 @@ +// vsltu.vx vd, vs2, vs1 +VI_VX_ULOOP_CMP +({ + res = vs2 < rs1; +}) diff --git a/riscv/insns/vmsne_vi.h b/riscv/insns/vmsne_vi.h new file mode 100644 index 0000000000..5e9758ef94 --- /dev/null +++ b/riscv/insns/vmsne_vi.h @@ -0,0 +1,5 @@ +// vsne.vi vd, vs2, simm5 +VI_VI_LOOP_CMP +({ + res = vs2 != simm5; +}) diff --git a/riscv/insns/vmsne_vv.h b/riscv/insns/vmsne_vv.h new file mode 100644 index 0000000000..e6a7174a48 --- /dev/null +++ b/riscv/insns/vmsne_vv.h @@ -0,0 +1,5 @@ +// vneq.vv vd, vs2, vs1 +VI_VV_LOOP_CMP +({ + res = vs2 != vs1; +}) diff --git a/riscv/insns/vmsne_vx.h b/riscv/insns/vmsne_vx.h new file mode 100644 index 0000000000..9e4c155387 --- /dev/null +++ b/riscv/insns/vmsne_vx.h @@ -0,0 +1,5 @@ +// vsne.vx vd, vs2, rs1 +VI_VX_LOOP_CMP +({ + res = vs2 != rs1; +}) diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h new file mode 100644 index 0000000000..5ef0bfd4ac --- /dev/null +++ b/riscv/insns/vmsof_m.h @@ -0,0 +1,30 @@ +// vmsof.m rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +require(P.VU.vstart == 0); +require_vm; +require(insn.rd() != insn.rs2()); + +reg_t vl = P.VU.vl; +reg_t rd_num = insn.rd(); +reg_t rs2_num = insn.rs2(); + +bool has_one = false; +for (reg_t i = P.VU.vstart ; i < vl; ++i) { + const int midx = i / 64; + const int mpos = i % 64; + const uint64_t mmask = UINT64_C(1) << mpos; \ + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx ) >> mpos) & 0x1) == 1; + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + + if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { + uint64_t &vd = P.VU.elt(rd_num, midx, true); + uint64_t res = 0; + if(!has_one && vs2_lsb) { + has_one = true; + res = 1; + } + vd = (vd & ~mmask) | ((res << mpos) & mmask); + } +} diff --git a/riscv/insns/vmul_vv.h b/riscv/insns/vmul_vv.h new file mode 100644 index 0000000000..a3278171dd --- /dev/null +++ b/riscv/insns/vmul_vv.h @@ -0,0 +1,5 @@ +// vmul vd, vs2, vs1 +VI_VV_LOOP +({ + vd = vs2 * vs1; +}) diff --git a/riscv/insns/vmul_vx.h b/riscv/insns/vmul_vx.h new file mode 100644 index 0000000000..8d68390276 --- /dev/null +++ b/riscv/insns/vmul_vx.h @@ -0,0 +1,5 @@ +// vmul vd, vs2, rs1 +VI_VX_LOOP +({ + vd = vs2 * rs1; +}) diff --git a/riscv/insns/vmulh_vv.h b/riscv/insns/vmulh_vv.h new file mode 100644 index 0000000000..e861a3397a --- /dev/null +++ b/riscv/insns/vmulh_vv.h @@ -0,0 +1,5 @@ +// vmulh vd, vs2, vs1 +VI_VV_LOOP +({ + vd = ((int128_t)vs2 * vs1) >> sew; +}) diff --git a/riscv/insns/vmulh_vx.h b/riscv/insns/vmulh_vx.h new file mode 100644 index 0000000000..b6b5503674 --- /dev/null +++ b/riscv/insns/vmulh_vx.h @@ -0,0 +1,5 @@ +// vmulh vd, vs2, rs1 +VI_VX_LOOP +({ + vd = ((int128_t)vs2 * rs1) >> sew; +}) diff --git a/riscv/insns/vmulhsu_vv.h b/riscv/insns/vmulhsu_vv.h new file mode 100644 index 0000000000..f77a7d3f21 --- /dev/null +++ b/riscv/insns/vmulhsu_vv.h @@ -0,0 +1,38 @@ +// vmulhsu.vv vd, vs2, vs1 +VI_CHECK_SSS(true); +VI_LOOP_BASE +switch(sew) { +case e8: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + auto vs1 = P.VU.elt(rs1_num, i); + + vd = ((int16_t)vs2 * (uint16_t)vs1) >> sew; + break; +} +case e16: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + auto vs1 = P.VU.elt(rs1_num, i); + + vd = ((int32_t)vs2 * (uint32_t)vs1) >> sew; + break; +} +case e32: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + auto vs1 = P.VU.elt(rs1_num, i); + + vd = ((int64_t)vs2 * (uint64_t)vs1) >> sew; + break; +} +default: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + auto vs1 = P.VU.elt(rs1_num, i); + + vd = ((int128_t)vs2 * (uint128_t)vs1) >> sew; + break; +} +} +VI_LOOP_END diff --git a/riscv/insns/vmulhsu_vx.h b/riscv/insns/vmulhsu_vx.h new file mode 100644 index 0000000000..b0699f6f93 --- /dev/null +++ b/riscv/insns/vmulhsu_vx.h @@ -0,0 +1,38 @@ +// vmulhsu.vx vd, vs2, rs1 +VI_CHECK_SSS(false); +VI_LOOP_BASE +switch(sew) { +case e8: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + uint8_t rs1 = RS1; + + vd = ((int16_t)vs2 * (uint16_t)rs1) >> sew; + break; +} +case e16: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + uint16_t rs1 = RS1; + + vd = ((int32_t)vs2 * (uint32_t)rs1) >> sew; + break; +} +case e32: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + uint32_t rs1 = RS1; + + vd = ((int64_t)vs2 * (uint64_t)rs1) >> sew; + break; +} +default: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + uint64_t rs1 = RS1; + + vd = ((int128_t)vs2 * (uint128_t)rs1) >> sew; + break; +} +} +VI_LOOP_END diff --git a/riscv/insns/vmulhu_vv.h b/riscv/insns/vmulhu_vv.h new file mode 100644 index 0000000000..8e318edb75 --- /dev/null +++ b/riscv/insns/vmulhu_vv.h @@ -0,0 +1,5 @@ +// vmulhu vd ,vs2, vs1 +VI_VV_ULOOP +({ + vd = ((uint128_t)vs2 * vs1) >> sew; +}) diff --git a/riscv/insns/vmulhu_vx.h b/riscv/insns/vmulhu_vx.h new file mode 100644 index 0000000000..672ad32df2 --- /dev/null +++ b/riscv/insns/vmulhu_vx.h @@ -0,0 +1,5 @@ +// vmulhu vd ,vs2, rs1 +VI_VX_ULOOP +({ + vd = ((uint128_t)vs2 * rs1) >> sew; +}) diff --git a/riscv/insns/vmv1r_v.h b/riscv/insns/vmv1r_v.h new file mode 100644 index 0000000000..bbdeab9a1d --- /dev/null +++ b/riscv/insns/vmv1r_v.h @@ -0,0 +1,2 @@ +// vmv1r.v vd, vs2 +#include "vmvnfr_v.h" diff --git a/riscv/insns/vmv2r_v.h b/riscv/insns/vmv2r_v.h new file mode 100644 index 0000000000..1ac8e09eb0 --- /dev/null +++ b/riscv/insns/vmv2r_v.h @@ -0,0 +1,2 @@ +// vmv2r.v vd, vs2 +#include "vmvnfr_v.h" diff --git a/riscv/insns/vmv4r_v.h b/riscv/insns/vmv4r_v.h new file mode 100644 index 0000000000..2068731a9e --- /dev/null +++ b/riscv/insns/vmv4r_v.h @@ -0,0 +1,2 @@ +// vmv4r.v vd, vs2 +#include "vmvnfr_v.h" diff --git a/riscv/insns/vmv8r_v.h b/riscv/insns/vmv8r_v.h new file mode 100644 index 0000000000..2b205fc79e --- /dev/null +++ b/riscv/insns/vmv8r_v.h @@ -0,0 +1,2 @@ +// vmv8r.v vd, vs2 +#include "vmvnfr_v.h" diff --git a/riscv/insns/vmv_s_x.h b/riscv/insns/vmv_s_x.h new file mode 100644 index 0000000000..0e6a13e56a --- /dev/null +++ b/riscv/insns/vmv_s_x.h @@ -0,0 +1,29 @@ +// vmv_s_x: vd[0] = rs1 +require_vector(true); +require(insn.v_vm() == 1); +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +reg_t vl = P.VU.vl; + +if (vl > 0 && P.VU.vstart < vl) { + reg_t rd_num = insn.rd(); + reg_t sew = P.VU.vsew; + + switch(sew) { + case e8: + P.VU.elt(rd_num, 0, true) = RS1; + break; + case e16: + P.VU.elt(rd_num, 0, true) = RS1; + break; + case e32: + P.VU.elt(rd_num, 0, true) = RS1; + break; + default: + P.VU.elt(rd_num, 0, true) = RS1; + break; + } + + vl = 0; +} + +P.VU.vstart = 0; diff --git a/riscv/insns/vmv_v_i.h b/riscv/insns/vmv_v_i.h new file mode 100644 index 0000000000..a760779107 --- /dev/null +++ b/riscv/insns/vmv_v_i.h @@ -0,0 +1,7 @@ +// vmv.v.i vd, simm5 +require_vector(true); +VI_CHECK_SSS(false); +VI_VVXI_MERGE_LOOP +({ + vd = simm5; +}) diff --git a/riscv/insns/vmv_v_v.h b/riscv/insns/vmv_v_v.h new file mode 100644 index 0000000000..d7f47d0882 --- /dev/null +++ b/riscv/insns/vmv_v_v.h @@ -0,0 +1,7 @@ +// vvmv.v.v vd, vs1 +require_vector(true); +VI_CHECK_SSS(true); +VI_VVXI_MERGE_LOOP +({ + vd = vs1; +}) diff --git a/riscv/insns/vmv_v_x.h b/riscv/insns/vmv_v_x.h new file mode 100644 index 0000000000..fa7c920be1 --- /dev/null +++ b/riscv/insns/vmv_v_x.h @@ -0,0 +1,7 @@ +// vmv.v.x vd, rs1 +require_vector(true); +VI_CHECK_SSS(false); +VI_VVXI_MERGE_LOOP +({ + vd = rs1; +}) diff --git a/riscv/insns/vmv_x_s.h b/riscv/insns/vmv_x_s.h new file mode 100644 index 0000000000..2c03e43e8c --- /dev/null +++ b/riscv/insns/vmv_x_s.h @@ -0,0 +1,31 @@ +// vmv_x_s: rd = vs2[rs1] +require_vector(true); +require(insn.v_vm() == 1); +uint64_t xmask = UINT64_MAX >> (64 - P.get_max_xlen()); +reg_t rs1 = RS1; +reg_t sew = P.VU.vsew; +reg_t rs2_num = insn.rs2(); + +if (!(rs1 >= 0 && rs1 < (P.VU.get_vlen() / sew))) { + WRITE_RD(0); +} else { + switch(sew) { + case e8: + WRITE_RD(P.VU.elt(rs2_num, rs1)); + break; + case e16: + WRITE_RD(P.VU.elt(rs2_num, rs1)); + break; + case e32: + WRITE_RD(P.VU.elt(rs2_num, rs1)); + break; + case e64: + if (P.get_max_xlen() <= sew) + WRITE_RD(P.VU.elt(rs2_num, rs1) & xmask); + else + WRITE_RD(P.VU.elt(rs2_num, rs1)); + break; + } +} + +P.VU.vstart = 0; diff --git a/riscv/insns/vmvnfr_v.h b/riscv/insns/vmvnfr_v.h new file mode 100644 index 0000000000..96f0074ce1 --- /dev/null +++ b/riscv/insns/vmvnfr_v.h @@ -0,0 +1,27 @@ +// vmv1r.v vd, vs2 +require_vector_novtype(true, true); +const reg_t baseAddr = RS1; +const reg_t vd = insn.rd(); +const reg_t vs2 = insn.rs2(); +const reg_t len = insn.rs1() + 1; +require_align(vd, len); +require_align(vs2, len); +const reg_t size = len * P.VU.vlenb; + +//register needs one-by-one copy to keep commitlog correct +if (vd != vs2 && P.VU.vstart < size) { + reg_t i = P.VU.vstart / P.VU.vlenb; + reg_t off = P.VU.vstart % P.VU.vlenb; + if (off) { + memcpy(&P.VU.elt(vd + i, off, true), + &P.VU.elt(vs2 + i, off), P.VU.vlenb - off); + i++; + } + + for (; i < len; ++i) { + memcpy(&P.VU.elt(vd + i, 0, true), + &P.VU.elt(vs2 + i, 0), P.VU.vlenb); + } +} + +P.VU.vstart = 0; diff --git a/riscv/insns/vmxnor_mm.h b/riscv/insns/vmxnor_mm.h new file mode 100644 index 0000000000..0736d5b21e --- /dev/null +++ b/riscv/insns/vmxnor_mm.h @@ -0,0 +1,2 @@ +// vmnxor.mm vd, vs2, vs1 +VI_LOOP_MASK(~(vs2 ^ vs1)); diff --git a/riscv/insns/vmxor_mm.h b/riscv/insns/vmxor_mm.h new file mode 100644 index 0000000000..7f0c576e37 --- /dev/null +++ b/riscv/insns/vmxor_mm.h @@ -0,0 +1,2 @@ +// vmxor.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 ^ vs1); diff --git a/riscv/insns/vnclip_wi.h b/riscv/insns/vnclip_wi.h new file mode 100644 index 0000000000..1647212392 --- /dev/null +++ b/riscv/insns/vnclip_wi.h @@ -0,0 +1,25 @@ +// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> simm) +VRM xrm = P.VU.get_vround_mode(); +int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); +int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); +VI_VVXI_LOOP_NARROW +({ + int128_t result = vs2; + unsigned shift = zimm5 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); + + result = result >> shift; + + // saturation + if (result < int_min) { + result = int_min; + P.VU.vxsat = 1; + } else if (result > int_max) { + result = int_max; + P.VU.vxsat = 1; + } + + vd = result; +}, false) diff --git a/riscv/insns/vnclip_wv.h b/riscv/insns/vnclip_wv.h new file mode 100644 index 0000000000..d87a3378e7 --- /dev/null +++ b/riscv/insns/vnclip_wv.h @@ -0,0 +1,25 @@ +// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> vs1[i]) +VRM xrm = P.VU.get_vround_mode(); +int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); +int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); +VI_VVXI_LOOP_NARROW +({ + int128_t result = vs2; + unsigned shift = vs1 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); + + result = result >> shift; + + // saturation + if (result < int_min) { + result = int_min; + P.VU.vxsat = 1; + } else if (result > int_max) { + result = int_max; + P.VU.vxsat = 1; + } + + vd = result; +}, true) diff --git a/riscv/insns/vnclip_wx.h b/riscv/insns/vnclip_wx.h new file mode 100644 index 0000000000..9dbfcd7084 --- /dev/null +++ b/riscv/insns/vnclip_wx.h @@ -0,0 +1,25 @@ +// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> rs1[i]) +VRM xrm = P.VU.get_vround_mode(); +int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); +int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); +VI_VVXI_LOOP_NARROW +({ + int128_t result = vs2; + unsigned shift = rs1 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); + + result = result >> shift; + + // saturation + if (result < int_min) { + result = int_min; + P.VU.vxsat = 1; + } else if (result > int_max) { + result = int_max; + P.VU.vxsat = 1; + } + + vd = result; +}, false) diff --git a/riscv/insns/vnclipu_wi.h b/riscv/insns/vnclipu_wi.h new file mode 100644 index 0000000000..8e4e0dad27 --- /dev/null +++ b/riscv/insns/vnclipu_wi.h @@ -0,0 +1,23 @@ +// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> simm) +VRM xrm = P.VU.get_vround_mode(); +uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew); +uint64_t sign_mask = UINT64_MAX << P.VU.vsew; +VI_VVXI_LOOP_NARROW +({ + uint128_t result = vs2_u; + unsigned shift = zimm5 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); + + // unsigned shifting to rs1 + result = result >> shift; + + // saturation + if (result & sign_mask) { + result = uint_max; + P.VU.vxsat = 1; + } + + vd = result; +}, false) diff --git a/riscv/insns/vnclipu_wv.h b/riscv/insns/vnclipu_wv.h new file mode 100644 index 0000000000..f045964f71 --- /dev/null +++ b/riscv/insns/vnclipu_wv.h @@ -0,0 +1,22 @@ +// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> vs1[i]) +VRM xrm = P.VU.get_vround_mode(); +uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew); +uint64_t sign_mask = UINT64_MAX << P.VU.vsew; +VI_VVXI_LOOP_NARROW +({ + uint128_t result = vs2_u; + unsigned shift = vs1 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); + + result = result >> shift; + + // saturation + if (result & sign_mask) { + result = uint_max; + P.VU.vxsat = 1; + } + + vd = result; +}, true) diff --git a/riscv/insns/vnclipu_wx.h b/riscv/insns/vnclipu_wx.h new file mode 100644 index 0000000000..d5155c11b3 --- /dev/null +++ b/riscv/insns/vnclipu_wx.h @@ -0,0 +1,22 @@ +// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> rs1[i]) +VRM xrm = P.VU.get_vround_mode(); +uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew); +uint64_t sign_mask = UINT64_MAX << P.VU.vsew; +VI_VVXI_LOOP_NARROW +({ + uint128_t result = vs2_u; + unsigned shift = rs1 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); + + result = result >> shift; + + // saturation + if (result & sign_mask) { + result = uint_max; + P.VU.vxsat = 1; + } + + vd = result; +}, false) diff --git a/riscv/insns/vnmsac_vv.h b/riscv/insns/vnmsac_vv.h new file mode 100644 index 0000000000..7c10f29af7 --- /dev/null +++ b/riscv/insns/vnmsac_vv.h @@ -0,0 +1,5 @@ +// vmsac.vv: vd[i] = -(vs1[i] * vs2[i]) + vd[i] +VI_VV_LOOP +({ + vd = -(vs1 * vs2) + vd; +}) diff --git a/riscv/insns/vnmsac_vx.h b/riscv/insns/vnmsac_vx.h new file mode 100644 index 0000000000..44920be4b2 --- /dev/null +++ b/riscv/insns/vnmsac_vx.h @@ -0,0 +1,5 @@ +// vmsac: vd[i] = -(x[rs1] * vs2[i]) + vd[i] +VI_VX_LOOP +({ + vd = -(rs1 * vs2) + vd; +}) diff --git a/riscv/insns/vnmsub_vv.h b/riscv/insns/vnmsub_vv.h new file mode 100644 index 0000000000..37f82286c4 --- /dev/null +++ b/riscv/insns/vnmsub_vv.h @@ -0,0 +1,5 @@ +// vnmsub.vv: vd[i] = -(vd[i] * vs1[i]) + vs2[i] +VI_VV_LOOP +({ + vd = -(vd * vs1) + vs2; +}) diff --git a/riscv/insns/vnmsub_vx.h b/riscv/insns/vnmsub_vx.h new file mode 100644 index 0000000000..2e00d22e4a --- /dev/null +++ b/riscv/insns/vnmsub_vx.h @@ -0,0 +1,5 @@ +// vnmsub.vx: vd[i] = -(vd[i] * x[rs1]) + vs2[i] +VI_VX_LOOP +({ + vd = -(vd * rs1) + vs2; +}) diff --git a/riscv/insns/vnsra_wi.h b/riscv/insns/vnsra_wi.h new file mode 100644 index 0000000000..f41979edff --- /dev/null +++ b/riscv/insns/vnsra_wi.h @@ -0,0 +1,5 @@ +// vnsra.vi vd, vs2, zimm5 +VI_VI_LOOP_NSHIFT +({ + vd = vs2 >> (zimm5 & (sew * 2 - 1) & 0x1f); +}, false) diff --git a/riscv/insns/vnsra_wv.h b/riscv/insns/vnsra_wv.h new file mode 100644 index 0000000000..59f255ef30 --- /dev/null +++ b/riscv/insns/vnsra_wv.h @@ -0,0 +1,5 @@ +// vnsra.vv vd, vs2, vs1 +VI_VV_LOOP_NSHIFT +({ + vd = vs2 >> (vs1 & (sew * 2 - 1)); +}, true) diff --git a/riscv/insns/vnsra_wx.h b/riscv/insns/vnsra_wx.h new file mode 100644 index 0000000000..adaa24c384 --- /dev/null +++ b/riscv/insns/vnsra_wx.h @@ -0,0 +1,5 @@ +// vnsra.vx vd, vs2, rs1 +VI_VX_LOOP_NSHIFT +({ + vd = vs2 >> (rs1 & (sew * 2 - 1)); +}, false) diff --git a/riscv/insns/vnsrl_wi.h b/riscv/insns/vnsrl_wi.h new file mode 100644 index 0000000000..91402c0c2a --- /dev/null +++ b/riscv/insns/vnsrl_wi.h @@ -0,0 +1,5 @@ +// vnsrl.vi vd, vs2, zimm5 +VI_VI_LOOP_NSHIFT +({ + vd = vs2_u >> (zimm5 & (sew * 2 - 1)); +}, false) diff --git a/riscv/insns/vnsrl_wv.h b/riscv/insns/vnsrl_wv.h new file mode 100644 index 0000000000..609299faf8 --- /dev/null +++ b/riscv/insns/vnsrl_wv.h @@ -0,0 +1,5 @@ +// vnsrl.vv vd, vs2, vs1 +VI_VV_LOOP_NSHIFT +({ + vd = vs2_u >> (vs1 & (sew * 2 - 1)); +}, true) diff --git a/riscv/insns/vnsrl_wx.h b/riscv/insns/vnsrl_wx.h new file mode 100644 index 0000000000..8356a2bd77 --- /dev/null +++ b/riscv/insns/vnsrl_wx.h @@ -0,0 +1,5 @@ +// vnsrl.vx vd, vs2, rs1 +VI_VX_LOOP_NSHIFT +({ + vd = vs2_u >> (rs1 & (sew * 2 - 1)); +}, false) diff --git a/riscv/insns/vor_vi.h b/riscv/insns/vor_vi.h new file mode 100644 index 0000000000..f759607497 --- /dev/null +++ b/riscv/insns/vor_vi.h @@ -0,0 +1,5 @@ +// vor +VI_VI_LOOP +({ + vd = simm5 | vs2; +}) diff --git a/riscv/insns/vor_vv.h b/riscv/insns/vor_vv.h new file mode 100644 index 0000000000..0c460662bf --- /dev/null +++ b/riscv/insns/vor_vv.h @@ -0,0 +1,5 @@ +// vor +VI_VV_LOOP +({ + vd = vs1 | vs2; +}) diff --git a/riscv/insns/vor_vx.h b/riscv/insns/vor_vx.h new file mode 100644 index 0000000000..01c003ab35 --- /dev/null +++ b/riscv/insns/vor_vx.h @@ -0,0 +1,5 @@ +// vor +VI_VX_LOOP +({ + vd = rs1 | vs2; +}) diff --git a/riscv/insns/vpopc_m.h b/riscv/insns/vpopc_m.h new file mode 100644 index 0000000000..c204b2c0e2 --- /dev/null +++ b/riscv/insns/vpopc_m.h @@ -0,0 +1,23 @@ +// vmpopc rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs2_num = insn.rs2(); +require(P.VU.vstart == 0); +reg_t popcount = 0; +for (reg_t i=P.VU.vstart; i(rs2_num, midx ) >> mpos) & 0x1) == 1; + if (insn.v_vm() == 1) { + popcount += vs2_lsb; + } else { + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + popcount += (vs2_lsb && do_mask); + } +} +P.VU.vstart = 0; +WRITE_RD(popcount); diff --git a/riscv/insns/vredand_vs.h b/riscv/insns/vredand_vs.h new file mode 100644 index 0000000000..6c2d9089fa --- /dev/null +++ b/riscv/insns/vredand_vs.h @@ -0,0 +1,5 @@ +// vredand.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res &= vs2; +}) diff --git a/riscv/insns/vredmax_vs.h b/riscv/insns/vredmax_vs.h new file mode 100644 index 0000000000..be2e76ab3a --- /dev/null +++ b/riscv/insns/vredmax_vs.h @@ -0,0 +1,5 @@ +// vredmax.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res = (vd_0_res >= vs2) ? vd_0_res : vs2; +}) diff --git a/riscv/insns/vredmaxu_vs.h b/riscv/insns/vredmaxu_vs.h new file mode 100644 index 0000000000..960f486181 --- /dev/null +++ b/riscv/insns/vredmaxu_vs.h @@ -0,0 +1,5 @@ +// vredmaxu.vs vd, vs2 ,vs1 +VI_VV_ULOOP_REDUCTION +({ + vd_0_res = (vd_0_res >= vs2) ? vd_0_res : vs2; +}) diff --git a/riscv/insns/vredmin_vs.h b/riscv/insns/vredmin_vs.h new file mode 100644 index 0000000000..50359b7a53 --- /dev/null +++ b/riscv/insns/vredmin_vs.h @@ -0,0 +1,5 @@ +// vredmin.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res = (vd_0_res <= vs2) ? vd_0_res : vs2; +}) diff --git a/riscv/insns/vredminu_vs.h b/riscv/insns/vredminu_vs.h new file mode 100644 index 0000000000..708247592f --- /dev/null +++ b/riscv/insns/vredminu_vs.h @@ -0,0 +1,5 @@ +// vredminu.vs vd, vs2 ,vs1 +VI_VV_ULOOP_REDUCTION +({ + vd_0_res = (vd_0_res <= vs2) ? vd_0_res : vs2; +}) diff --git a/riscv/insns/vredor_vs.h b/riscv/insns/vredor_vs.h new file mode 100644 index 0000000000..f7acd9aa9d --- /dev/null +++ b/riscv/insns/vredor_vs.h @@ -0,0 +1,5 @@ +// vredor.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res |= vs2; +}) diff --git a/riscv/insns/vredsum_vs.h b/riscv/insns/vredsum_vs.h new file mode 100644 index 0000000000..c4fefe57f6 --- /dev/null +++ b/riscv/insns/vredsum_vs.h @@ -0,0 +1,5 @@ +// vredsum.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res += vs2; +}) diff --git a/riscv/insns/vredxor_vs.h b/riscv/insns/vredxor_vs.h new file mode 100644 index 0000000000..bb81ad9a4f --- /dev/null +++ b/riscv/insns/vredxor_vs.h @@ -0,0 +1,5 @@ +// vredxor.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res ^= vs2; +}) diff --git a/riscv/insns/vrem_vv.h b/riscv/insns/vrem_vv.h new file mode 100644 index 0000000000..260716a0eb --- /dev/null +++ b/riscv/insns/vrem_vv.h @@ -0,0 +1,11 @@ +// vrem.vv vd, vs2, vs1 +VI_VV_LOOP +({ + if (vs1 == 0) + vd = vs2; + else if(vs2 == -(((intmax_t)1) << (sew - 1)) && vs1 == -1) + vd = 0; + else { + vd = vs2 % vs1; + } +}) diff --git a/riscv/insns/vrem_vx.h b/riscv/insns/vrem_vx.h new file mode 100644 index 0000000000..3702f02f41 --- /dev/null +++ b/riscv/insns/vrem_vx.h @@ -0,0 +1,10 @@ +// vrem.vx vd, vs2, rs1 +VI_VX_LOOP +({ + if (rs1 == 0) + vd = vs2; + else if (vs2 == -(((intmax_t)1) << (sew - 1)) && rs1 == -1) + vd = 0; + else + vd = vs2 % rs1; +}) diff --git a/riscv/insns/vremu_vv.h b/riscv/insns/vremu_vv.h new file mode 100644 index 0000000000..7e1507235a --- /dev/null +++ b/riscv/insns/vremu_vv.h @@ -0,0 +1,8 @@ +// vremu.vv vd, vs2, vs1 +VI_VV_ULOOP +({ + if (vs1 == 0) + vd = vs2; + else + vd = vs2 % vs1; +}) diff --git a/riscv/insns/vremu_vx.h b/riscv/insns/vremu_vx.h new file mode 100644 index 0000000000..a87a8200a8 --- /dev/null +++ b/riscv/insns/vremu_vx.h @@ -0,0 +1,8 @@ +// vremu.vx vd, vs2, rs1 +VI_VX_ULOOP +({ + if (rs1 == 0) + vd = vs2; + else + vd = vs2 % rs1; +}) diff --git a/riscv/insns/vrgather_vi.h b/riscv/insns/vrgather_vi.h new file mode 100644 index 0000000000..385e9be973 --- /dev/null +++ b/riscv/insns/vrgather_vi.h @@ -0,0 +1,30 @@ +// vrgather.vi vd, vs2, zimm5 vm # vd[i] = (zimm5 >= VLMAX) ? 0 : vs2[zimm5]; +require_align(insn.rd(), P.VU.vflmul); +require_align(insn.rs2(), P.VU.vflmul); +require(insn.rd() != insn.rs2()); +require_vm; + +reg_t zimm5 = insn.v_zimm5(); + +VI_LOOP_BASE + +for (reg_t i = P.VU.vstart; i < vl; ++i) { + VI_LOOP_ELEMENT_SKIP(); + + switch (sew) { + case e8: + P.VU.elt(rd_num, i, true) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, zimm5); + break; + case e16: + P.VU.elt(rd_num, i, true) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, zimm5); + break; + case e32: + P.VU.elt(rd_num, i, true) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, zimm5); + break; + default: + P.VU.elt(rd_num, i, true) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, zimm5); + break; + } +} + +VI_LOOP_END; diff --git a/riscv/insns/vrgather_vv.h b/riscv/insns/vrgather_vv.h new file mode 100644 index 0000000000..a3a32f560f --- /dev/null +++ b/riscv/insns/vrgather_vv.h @@ -0,0 +1,32 @@ +// vrgather.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; +require_align(insn.rd(), P.VU.vflmul); +require_align(insn.rs2(), P.VU.vflmul); +require_align(insn.rs1(), P.VU.vflmul); +require(insn.rd() != insn.rs2() && insn.rd() != insn.rs1()); +require_vm; + +VI_LOOP_BASE + switch (sew) { + case e8: { + auto vs1 = P.VU.elt(rs1_num, i); + //if (i > 255) continue; + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + case e16: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + case e32: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + default: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + } +VI_LOOP_END; diff --git a/riscv/insns/vrgather_vx.h b/riscv/insns/vrgather_vx.h new file mode 100644 index 0000000000..058ffae104 --- /dev/null +++ b/riscv/insns/vrgather_vx.h @@ -0,0 +1,24 @@ +// vrgather.vx vd, vs2, rs1, vm # vd[i] = (rs1 >= VLMAX) ? 0 : vs2[rs1]; +require_align(insn.rd(), P.VU.vflmul); +require_align(insn.rs2(), P.VU.vflmul); +require(insn.rd() != insn.rs2()); +require_vm; + +reg_t rs1 = RS1; + +VI_LOOP_BASE + switch (sew) { + case e8: + P.VU.elt(rd_num, i, true) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, rs1); + break; + case e16: + P.VU.elt(rd_num, i, true) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, rs1); + break; + case e32: + P.VU.elt(rd_num, i, true) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, rs1); + break; + default: + P.VU.elt(rd_num, i, true) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, rs1); + break; + } +VI_LOOP_END; diff --git a/riscv/insns/vrgatherei16_vv.h b/riscv/insns/vrgatherei16_vv.h new file mode 100644 index 0000000000..3bb166a237 --- /dev/null +++ b/riscv/insns/vrgatherei16_vv.h @@ -0,0 +1,34 @@ +// vrgatherei16.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; +float vemul = (16.0 / P.VU.vsew * P.VU.vflmul); +require(vemul >= 0.125 && vemul <= 8); +require_align(insn.rd(), P.VU.vflmul); +require_align(insn.rs2(), P.VU.vflmul); +require_align(insn.rs1(), vemul); +require_noover(insn.rd(), P.VU.vflmul, insn.rs1(), vemul); +require(insn.rd() != insn.rs2()); +require_vm; + +VI_LOOP_BASE + switch (sew) { + case e8: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + case e16: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + case e32: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + default: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + } +VI_LOOP_END; diff --git a/riscv/insns/vrsub_vi.h b/riscv/insns/vrsub_vi.h new file mode 100644 index 0000000000..198c33f927 --- /dev/null +++ b/riscv/insns/vrsub_vi.h @@ -0,0 +1,5 @@ +// vrsub.vi vd, vs2, imm, vm # vd[i] = imm - vs2[i] +VI_VI_LOOP +({ + vd = simm5 - vs2; +}) diff --git a/riscv/insns/vrsub_vx.h b/riscv/insns/vrsub_vx.h new file mode 100644 index 0000000000..bfd62594db --- /dev/null +++ b/riscv/insns/vrsub_vx.h @@ -0,0 +1,5 @@ +// vrsub.vx vd, vs2, rs1, vm # vd[i] = rs1 - vs2[i] +VI_VX_LOOP +({ + vd = rs1 - vs2; +}) diff --git a/riscv/insns/vs1r_v.h b/riscv/insns/vs1r_v.h new file mode 100644 index 0000000000..1932ec0b88 --- /dev/null +++ b/riscv/insns/vs1r_v.h @@ -0,0 +1,2 @@ +// vs1r.v vs3, (rs1) +VI_ST_WHOLE diff --git a/riscv/insns/vs2r_v.h b/riscv/insns/vs2r_v.h new file mode 100644 index 0000000000..2e515b476c --- /dev/null +++ b/riscv/insns/vs2r_v.h @@ -0,0 +1,2 @@ +// vs2r.v vs3, (rs1) +VI_ST_WHOLE diff --git a/riscv/insns/vs4r_v.h b/riscv/insns/vs4r_v.h new file mode 100644 index 0000000000..161bf89a31 --- /dev/null +++ b/riscv/insns/vs4r_v.h @@ -0,0 +1,2 @@ +// vs4r.v vs3, (rs1) +VI_ST_WHOLE diff --git a/riscv/insns/vs8r_v.h b/riscv/insns/vs8r_v.h new file mode 100644 index 0000000000..1ad2575638 --- /dev/null +++ b/riscv/insns/vs8r_v.h @@ -0,0 +1,2 @@ +// vs8r.v vs3, (rs1) +VI_ST_WHOLE diff --git a/riscv/insns/vsadd_vi.h b/riscv/insns/vsadd_vi.h new file mode 100644 index 0000000000..c361f087f7 --- /dev/null +++ b/riscv/insns/vsadd_vi.h @@ -0,0 +1,28 @@ +// vsadd.vi vd, vs2 simm5 +VI_CHECK_SSS(false); +VI_LOOP_BASE +bool sat = false; +switch(sew) { +case e8: { + VI_PARAMS(e8); + vd = sat_add(vs2, vsext(simm5, sew), sat); + break; +} +case e16: { + VI_PARAMS(e16); + vd = sat_add(vs2, vsext(simm5, sew), sat); + break; +} +case e32: { + VI_PARAMS(e32); + vd = sat_add(vs2, vsext(simm5, sew), sat); + break; +} +default: { + VI_PARAMS(e64); + vd = sat_add(vs2, vsext(simm5, sew), sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vsadd_vv.h b/riscv/insns/vsadd_vv.h new file mode 100644 index 0000000000..ce0ef4071b --- /dev/null +++ b/riscv/insns/vsadd_vv.h @@ -0,0 +1,29 @@ +// vsadd.vv vd, vs2, vs1 +VI_CHECK_SSS(true); +VI_LOOP_BASE +bool sat = false; +switch(sew) { +case e8: { + VV_PARAMS(e8); + vd = sat_add(vs2, vs1, sat); + break; +} +case e16: { + VV_PARAMS(e16); + vd = sat_add(vs2, vs1, sat); + break; +} +case e32: { + VV_PARAMS(e32); + vd = sat_add(vs2, vs1, sat); + break; +} +default: { + VV_PARAMS(e64); + vd = sat_add(vs2, vs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END + diff --git a/riscv/insns/vsadd_vx.h b/riscv/insns/vsadd_vx.h new file mode 100644 index 0000000000..691f017fff --- /dev/null +++ b/riscv/insns/vsadd_vx.h @@ -0,0 +1,28 @@ +// vsadd.vx vd, vs2, rs1 +VI_CHECK_SSS(false); +VI_LOOP_BASE +bool sat = false; +switch(sew) { +case e8: { + VX_PARAMS(e8); + vd = sat_add(vs2, rs1, sat); + break; +} +case e16: { + VX_PARAMS(e16); + vd = sat_add(vs2, rs1, sat); + break; +} +case e32: { + VX_PARAMS(e32); + vd = sat_add(vs2, rs1, sat); + break; +} +default: { + VX_PARAMS(e64); + vd = sat_add(vs2, rs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vsaddu_vi.h b/riscv/insns/vsaddu_vi.h new file mode 100644 index 0000000000..7a200dff74 --- /dev/null +++ b/riscv/insns/vsaddu_vi.h @@ -0,0 +1,11 @@ +// vsaddu vd, vs2, zimm5 +VI_VI_ULOOP +({ + bool sat = false; + vd = vs2 + (insn.v_simm5() & (UINT64_MAX >> (64 - P.VU.vsew))); + + sat = vd < vs2; + vd |= -(vd < vs2); + + P.VU.vxsat |= sat; +}) diff --git a/riscv/insns/vsaddu_vv.h b/riscv/insns/vsaddu_vv.h new file mode 100644 index 0000000000..e5d7025f05 --- /dev/null +++ b/riscv/insns/vsaddu_vv.h @@ -0,0 +1,11 @@ +// vsaddu vd, vs2, vs1 +VI_VV_ULOOP +({ + bool sat = false; + vd = vs2 + vs1; + + sat = vd < vs2; + vd |= -(vd < vs2); + + P.VU.vxsat |= sat; +}) diff --git a/riscv/insns/vsaddu_vx.h b/riscv/insns/vsaddu_vx.h new file mode 100644 index 0000000000..46ec29d9b7 --- /dev/null +++ b/riscv/insns/vsaddu_vx.h @@ -0,0 +1,12 @@ +// vsaddu vd, vs2, rs1 +VI_VX_ULOOP +({ + bool sat = false; + vd = vs2 + rs1; + + sat = vd < vs2; + vd |= -(vd < vs2); + + P.VU.vxsat |= sat; + +}) diff --git a/riscv/insns/vsbc_vvm.h b/riscv/insns/vsbc_vvm.h new file mode 100644 index 0000000000..96b8bb8041 --- /dev/null +++ b/riscv/insns/vsbc_vvm.h @@ -0,0 +1,10 @@ +// vsbc.vvm vd, vs2, rs1, v0 +VI_VV_LOOP_WITH_CARRY +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & vs2) - (op_mask & vs1) - carry; + vd = res; +}) diff --git a/riscv/insns/vsbc_vxm.h b/riscv/insns/vsbc_vxm.h new file mode 100644 index 0000000000..c6f9ca82a2 --- /dev/null +++ b/riscv/insns/vsbc_vxm.h @@ -0,0 +1,10 @@ +// vsbc.vxm vd, vs2, rs1, v0 +VI_XI_LOOP_WITH_CARRY +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & vs2) - (op_mask & rs1) - carry; + vd = res; +}) diff --git a/riscv/insns/vse16_v.h b/riscv/insns/vse16_v.h new file mode 100644 index 0000000000..20b04c869b --- /dev/null +++ b/riscv/insns/vse16_v.h @@ -0,0 +1,2 @@ +// vse16.v and vsseg[2-8]e16.v +VI_ST(0, (i * nf + fn), uint16); diff --git a/riscv/insns/vse32_v.h b/riscv/insns/vse32_v.h new file mode 100644 index 0000000000..efd2973d07 --- /dev/null +++ b/riscv/insns/vse32_v.h @@ -0,0 +1,2 @@ +// vse32.v and vsseg[2-8]e32.v +VI_ST(0, (i * nf + fn), uint32); diff --git a/riscv/insns/vse64_v.h b/riscv/insns/vse64_v.h new file mode 100644 index 0000000000..9b36c8d8d2 --- /dev/null +++ b/riscv/insns/vse64_v.h @@ -0,0 +1,2 @@ +// vse64.v and vsseg[2-8]e64.v +VI_ST(0, (i * nf + fn), uint64); diff --git a/riscv/insns/vse8_v.h b/riscv/insns/vse8_v.h new file mode 100644 index 0000000000..32dee14b4c --- /dev/null +++ b/riscv/insns/vse8_v.h @@ -0,0 +1,2 @@ +// vse8.v and vsseg[2-8]e8.v +VI_ST(0, (i * nf + fn), uint8); diff --git a/riscv/insns/vsetvl.h b/riscv/insns/vsetvl.h new file mode 100644 index 0000000000..2969edc6ce --- /dev/null +++ b/riscv/insns/vsetvl.h @@ -0,0 +1,2 @@ +require_vector_novtype(false, false); +WRITE_RD(P.VU.set_vl(insn.rd(), insn.rs1(), RS1, RS2)); diff --git a/riscv/insns/vsetvli.h b/riscv/insns/vsetvli.h new file mode 100644 index 0000000000..7b1f1d716c --- /dev/null +++ b/riscv/insns/vsetvli.h @@ -0,0 +1,2 @@ +require_vector_novtype(false, false); +WRITE_RD(P.VU.set_vl(insn.rd(), insn.rs1(), RS1, insn.v_zimm11())); diff --git a/riscv/insns/vsext_vf2.h b/riscv/insns/vsext_vf2.h new file mode 100644 index 0000000000..16ccfac607 --- /dev/null +++ b/riscv/insns/vsext_vf2.h @@ -0,0 +1 @@ +VI_VV_EXT(2, int); diff --git a/riscv/insns/vsext_vf4.h b/riscv/insns/vsext_vf4.h new file mode 100644 index 0000000000..d4476a310f --- /dev/null +++ b/riscv/insns/vsext_vf4.h @@ -0,0 +1 @@ +VI_VV_EXT(4, int); diff --git a/riscv/insns/vsext_vf8.h b/riscv/insns/vsext_vf8.h new file mode 100644 index 0000000000..09fdc2c75c --- /dev/null +++ b/riscv/insns/vsext_vf8.h @@ -0,0 +1 @@ +VI_VV_EXT(8, int); diff --git a/riscv/insns/vslide1down_vx.h b/riscv/insns/vslide1down_vx.h new file mode 100644 index 0000000000..e867722fa8 --- /dev/null +++ b/riscv/insns/vslide1down_vx.h @@ -0,0 +1,44 @@ +//vslide1down.vx vd, vs2, rs1 +VI_CHECK_SLIDE(false); + +VI_LOOP_BASE +if (i != vl - 1) { + switch (sew) { + case e8: { + VI_XI_SLIDEDOWN_PARAMS(e8, 1); + vd = vs2; + } + break; + case e16: { + VI_XI_SLIDEDOWN_PARAMS(e16, 1); + vd = vs2; + } + break; + case e32: { + VI_XI_SLIDEDOWN_PARAMS(e32, 1); + vd = vs2; + } + break; + default: { + VI_XI_SLIDEDOWN_PARAMS(e64, 1); + vd = vs2; + } + break; + } +} else { + switch (sew) { + case e8: + P.VU.elt(rd_num, vl - 1, true) = RS1; + break; + case e16: + P.VU.elt(rd_num, vl - 1, true) = RS1; + break; + case e32: + P.VU.elt(rd_num, vl - 1, true) = RS1; + break; + default: + P.VU.elt(rd_num, vl - 1, true) = RS1; + break; + } +} +VI_LOOP_END diff --git a/riscv/insns/vslide1up_vx.h b/riscv/insns/vslide1up_vx.h new file mode 100644 index 0000000000..33cb9ed641 --- /dev/null +++ b/riscv/insns/vslide1up_vx.h @@ -0,0 +1,30 @@ +//vslide1up.vx vd, vs2, rs1 +VI_CHECK_SLIDE(true); + +VI_LOOP_BASE +if (i != 0) { + if (sew == e8) { + VI_XI_SLIDEUP_PARAMS(e8, 1); + vd = vs2; + } else if(sew == e16) { + VI_XI_SLIDEUP_PARAMS(e16, 1); + vd = vs2; + } else if(sew == e32) { + VI_XI_SLIDEUP_PARAMS(e32, 1); + vd = vs2; + } else if(sew == e64) { + VI_XI_SLIDEUP_PARAMS(e64, 1); + vd = vs2; + } +} else { + if (sew == e8) { + P.VU.elt(rd_num, 0, true) = RS1; + } else if(sew == e16) { + P.VU.elt(rd_num, 0, true) = RS1; + } else if(sew == e32) { + P.VU.elt(rd_num, 0, true) = RS1; + } else if(sew == e64) { + P.VU.elt(rd_num, 0, true) = RS1; + } +} +VI_LOOP_END diff --git a/riscv/insns/vslidedown_vi.h b/riscv/insns/vslidedown_vi.h new file mode 100644 index 0000000000..bc440cf2bd --- /dev/null +++ b/riscv/insns/vslidedown_vi.h @@ -0,0 +1,36 @@ +// vslidedown.vi vd, vs2, rs1 +VI_CHECK_SLIDE(false); + +const reg_t sh = insn.v_zimm5(); +VI_LOOP_BASE + +reg_t offset = 0; +bool is_valid = (i + sh) < P.VU.vlmax; + +if (is_valid) { + offset = sh; +} + +switch (sew) { +case e8: { + VI_XI_SLIDEDOWN_PARAMS(e8, offset); + vd = is_valid ? vs2 : 0; +} +break; +case e16: { + VI_XI_SLIDEDOWN_PARAMS(e16, offset); + vd = is_valid ? vs2 : 0; +} +break; +case e32: { + VI_XI_SLIDEDOWN_PARAMS(e32, offset); + vd = is_valid ? vs2 : 0; +} +break; +default: { + VI_XI_SLIDEDOWN_PARAMS(e64, offset); + vd = is_valid ? vs2 : 0; +} +break; +} +VI_LOOP_END diff --git a/riscv/insns/vslidedown_vx.h b/riscv/insns/vslidedown_vx.h new file mode 100644 index 0000000000..074aa50868 --- /dev/null +++ b/riscv/insns/vslidedown_vx.h @@ -0,0 +1,36 @@ +//vslidedown.vx vd, vs2, rs1 +VI_CHECK_SLIDE(false); + +const uint128_t sh = RS1; +VI_LOOP_BASE + +reg_t offset = 0; +bool is_valid = (i + sh) < P.VU.vlmax; + +if (is_valid) { + offset = sh; +} + +switch (sew) { +case e8: { + VI_XI_SLIDEDOWN_PARAMS(e8, offset); + vd = is_valid ? vs2 : 0; +} +break; +case e16: { + VI_XI_SLIDEDOWN_PARAMS(e16, offset); + vd = is_valid ? vs2 : 0; +} +break; +case e32: { + VI_XI_SLIDEDOWN_PARAMS(e32, offset); + vd = is_valid ? vs2 : 0; +} +break; +default: { + VI_XI_SLIDEDOWN_PARAMS(e64, offset); + vd = is_valid ? vs2 : 0; +} +break; +} +VI_LOOP_END diff --git a/riscv/insns/vslideup_vi.h b/riscv/insns/vslideup_vi.h new file mode 100644 index 0000000000..42657892af --- /dev/null +++ b/riscv/insns/vslideup_vi.h @@ -0,0 +1,31 @@ +// vslideup.vi vd, vs2, rs1 +VI_CHECK_SLIDE(true); + +const reg_t offset = insn.v_zimm5(); +VI_LOOP_BASE +if (P.VU.vstart < offset && i < offset) + continue; + +switch (sew) { +case e8: { + VI_XI_SLIDEUP_PARAMS(e8, offset); + vd = vs2; +} +break; +case e16: { + VI_XI_SLIDEUP_PARAMS(e16, offset); + vd = vs2; +} +break; +case e32: { + VI_XI_SLIDEUP_PARAMS(e32, offset); + vd = vs2; +} +break; +default: { + VI_XI_SLIDEUP_PARAMS(e64, offset); + vd = vs2; +} +break; +} +VI_LOOP_END diff --git a/riscv/insns/vslideup_vx.h b/riscv/insns/vslideup_vx.h new file mode 100644 index 0000000000..720d2ab53b --- /dev/null +++ b/riscv/insns/vslideup_vx.h @@ -0,0 +1,31 @@ +//vslideup.vx vd, vs2, rs1 +VI_CHECK_SLIDE(true); + +const reg_t offset = RS1; +VI_LOOP_BASE +if (P.VU.vstart < offset && i < offset) + continue; + +switch (sew) { +case e8: { + VI_XI_SLIDEUP_PARAMS(e8, offset); + vd = vs2; +} +break; +case e16: { + VI_XI_SLIDEUP_PARAMS(e16, offset); + vd = vs2; +} +break; +case e32: { + VI_XI_SLIDEUP_PARAMS(e32, offset); + vd = vs2; +} +break; +default: { + VI_XI_SLIDEUP_PARAMS(e64, offset); + vd = vs2; +} +break; +} +VI_LOOP_END diff --git a/riscv/insns/vsll_vi.h b/riscv/insns/vsll_vi.h new file mode 100644 index 0000000000..be4650669f --- /dev/null +++ b/riscv/insns/vsll_vi.h @@ -0,0 +1,5 @@ +// vsll.vi vd, vs2, zimm5 +VI_VI_LOOP +({ + vd = vs2 << (simm5 & (sew - 1) & 0x1f); +}) diff --git a/riscv/insns/vsll_vv.h b/riscv/insns/vsll_vv.h new file mode 100644 index 0000000000..ce82022504 --- /dev/null +++ b/riscv/insns/vsll_vv.h @@ -0,0 +1,5 @@ +// vsll +VI_VV_LOOP +({ + vd = vs2 << (vs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsll_vx.h b/riscv/insns/vsll_vx.h new file mode 100644 index 0000000000..823510b2c5 --- /dev/null +++ b/riscv/insns/vsll_vx.h @@ -0,0 +1,5 @@ +// vsll +VI_VX_LOOP +({ + vd = vs2 << (rs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsmul_vv.h b/riscv/insns/vsmul_vv.h new file mode 100644 index 0000000000..e7ce306e9e --- /dev/null +++ b/riscv/insns/vsmul_vv.h @@ -0,0 +1,32 @@ +// vsmul.vv vd, vs2, vs1 +VRM xrm = P.VU.get_vround_mode(); +int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); +int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); +int64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1); + +VI_VV_LOOP +({ + int64_t vs1_sign; + int64_t vs2_sign; + int64_t result_sign; + + vs1_sign = vs1 & sign_mask; + vs2_sign = vs2 & sign_mask; + bool overflow = vs1 == vs2 && vs1 == int_min; + + int128_t result = (int128_t)vs1 * (int128_t)vs2; + result_sign = (vs1_sign ^ vs2_sign) & sign_mask; + + // rounding + INT_ROUNDING(result, xrm, sew - 1); + // remove guard bits + result = result >> (sew - 1); + + // saturation + if (overflow) { + result = int_max; + P.VU.vxsat |= 1; + } + + vd = result; +}) diff --git a/riscv/insns/vsmul_vx.h b/riscv/insns/vsmul_vx.h new file mode 100644 index 0000000000..cf4b511423 --- /dev/null +++ b/riscv/insns/vsmul_vx.h @@ -0,0 +1,33 @@ +// vsmul.vx vd, vs2, rs1 +VRM xrm = P.VU.get_vround_mode(); +int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); +int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); +int64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1); + +VI_VX_LOOP +({ + int64_t rs1_sign; + int64_t vs2_sign; + int64_t result_sign; + + rs1_sign = rs1 & sign_mask; + vs2_sign = vs2 & sign_mask; + bool overflow = rs1 == vs2 && rs1 == int_min; + + int128_t result = (int128_t)rs1 * (int128_t)vs2; + result_sign = (rs1_sign ^ vs2_sign) & sign_mask; + + // rounding + INT_ROUNDING(result, xrm, sew - 1); + + // remove guard bits + result = result >> (sew - 1); + + // max saturation + if (overflow) { + result = int_max; + P.VU.vxsat |= 1; + } + + vd = result; +}) diff --git a/riscv/insns/vsra_vi.h b/riscv/insns/vsra_vi.h new file mode 100644 index 0000000000..5c589274ef --- /dev/null +++ b/riscv/insns/vsra_vi.h @@ -0,0 +1,5 @@ +// vsra.vi vd, vs2, zimm5 +VI_VI_LOOP +({ + vd = vs2 >> (simm5 & (sew - 1) & 0x1f); +}) diff --git a/riscv/insns/vsra_vv.h b/riscv/insns/vsra_vv.h new file mode 100644 index 0000000000..8889af9c08 --- /dev/null +++ b/riscv/insns/vsra_vv.h @@ -0,0 +1,5 @@ +// vsra.vv vd, vs2, vs1 +VI_VV_LOOP +({ + vd = vs2 >> (vs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsra_vx.h b/riscv/insns/vsra_vx.h new file mode 100644 index 0000000000..c1b0c10728 --- /dev/null +++ b/riscv/insns/vsra_vx.h @@ -0,0 +1,5 @@ +// vsra.vx vd, vs2, rs1 +VI_VX_LOOP +({ + vd = vs2 >> (rs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsrl_vi.h b/riscv/insns/vsrl_vi.h new file mode 100644 index 0000000000..fe5d272025 --- /dev/null +++ b/riscv/insns/vsrl_vi.h @@ -0,0 +1,5 @@ +// vsrl.vi vd, vs2, zimm5 +VI_VI_ULOOP +({ + vd = vs2 >> (zimm5 & (sew - 1) & 0x1f); +}) diff --git a/riscv/insns/vsrl_vv.h b/riscv/insns/vsrl_vv.h new file mode 100644 index 0000000000..6376af36bc --- /dev/null +++ b/riscv/insns/vsrl_vv.h @@ -0,0 +1,5 @@ +// vsrl.vv vd, vs2, vs1 +VI_VV_ULOOP +({ + vd = vs2 >> (vs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsrl_vx.h b/riscv/insns/vsrl_vx.h new file mode 100644 index 0000000000..a4f899ca2c --- /dev/null +++ b/riscv/insns/vsrl_vx.h @@ -0,0 +1,5 @@ +// vsrl.vx vd, vs2, rs1 +VI_VX_ULOOP +({ + vd = vs2 >> (rs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsse16_v.h b/riscv/insns/vsse16_v.h new file mode 100644 index 0000000000..adbbcf5c5d --- /dev/null +++ b/riscv/insns/vsse16_v.h @@ -0,0 +1,2 @@ +// vsse16v and vssseg[2-8]e16.v +VI_ST(i * RS2, fn, uint16); diff --git a/riscv/insns/vsse32_v.h b/riscv/insns/vsse32_v.h new file mode 100644 index 0000000000..73bd272b0a --- /dev/null +++ b/riscv/insns/vsse32_v.h @@ -0,0 +1,2 @@ +// vsse32.v and vssseg[2-8]e32.v +VI_ST(i * RS2, fn, uint32); diff --git a/riscv/insns/vsse64_v.h b/riscv/insns/vsse64_v.h new file mode 100644 index 0000000000..1785a56877 --- /dev/null +++ b/riscv/insns/vsse64_v.h @@ -0,0 +1,2 @@ +// vsse64.v and vssseg[2-8]e64.v +VI_ST(i * RS2, fn, uint64); diff --git a/riscv/insns/vsse8_v.h b/riscv/insns/vsse8_v.h new file mode 100644 index 0000000000..c5daf0bce8 --- /dev/null +++ b/riscv/insns/vsse8_v.h @@ -0,0 +1,2 @@ +// vsse8.v and vssseg[2-8]e8.v +VI_ST(i * RS2, fn, uint8); diff --git a/riscv/insns/vssra_vi.h b/riscv/insns/vssra_vi.h new file mode 100644 index 0000000000..ff2e1c585c --- /dev/null +++ b/riscv/insns/vssra_vi.h @@ -0,0 +1,10 @@ +// vssra.vi vd, vs2, simm5 +VRM xrm = P.VU.get_vround_mode(); +VI_VI_LOOP +({ + int sh = simm5 & (sew - 1) & 0x1f; + int128_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; +}) diff --git a/riscv/insns/vssra_vv.h b/riscv/insns/vssra_vv.h new file mode 100644 index 0000000000..7bbc766ff1 --- /dev/null +++ b/riscv/insns/vssra_vv.h @@ -0,0 +1,10 @@ +// vssra.vv vd, vs2, vs1 +VRM xrm = P.VU.get_vround_mode(); +VI_VV_LOOP +({ + int sh = vs1 & (sew - 1); + int128_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; +}) diff --git a/riscv/insns/vssra_vx.h b/riscv/insns/vssra_vx.h new file mode 100644 index 0000000000..068a22b692 --- /dev/null +++ b/riscv/insns/vssra_vx.h @@ -0,0 +1,10 @@ +// vssra.vx vd, vs2, rs1 +VRM xrm = P.VU.get_vround_mode(); +VI_VX_LOOP +({ + int sh = rs1 & (sew - 1); + int128_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; +}) diff --git a/riscv/insns/vssrl_vi.h b/riscv/insns/vssrl_vi.h new file mode 100644 index 0000000000..d125164d6a --- /dev/null +++ b/riscv/insns/vssrl_vi.h @@ -0,0 +1,10 @@ +// vssra.vi vd, vs2, simm5 +VRM xrm = P.VU.get_vround_mode(); +VI_VI_ULOOP +({ + int sh = zimm5 & (sew - 1) & 0x1f; + uint128_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; +}) diff --git a/riscv/insns/vssrl_vv.h b/riscv/insns/vssrl_vv.h new file mode 100644 index 0000000000..a8e5d16423 --- /dev/null +++ b/riscv/insns/vssrl_vv.h @@ -0,0 +1,10 @@ +// vssrl.vv vd, vs2, vs1 +VRM xrm = P.VU.get_vround_mode(); +VI_VV_ULOOP +({ + int sh = vs1 & (sew - 1); + uint128_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; +}) diff --git a/riscv/insns/vssrl_vx.h b/riscv/insns/vssrl_vx.h new file mode 100644 index 0000000000..ee3cb3462f --- /dev/null +++ b/riscv/insns/vssrl_vx.h @@ -0,0 +1,10 @@ +// vssrl.vx vd, vs2, rs1 +VRM xrm = P.VU.get_vround_mode(); +VI_VX_ULOOP +({ + int sh = rs1 & (sew - 1); + uint128_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; +}) diff --git a/riscv/insns/vssub_vv.h b/riscv/insns/vssub_vv.h new file mode 100644 index 0000000000..18fe4fb5f0 --- /dev/null +++ b/riscv/insns/vssub_vv.h @@ -0,0 +1,29 @@ +// vssub.vv vd, vs2, vs1 +VI_CHECK_SSS(true); +VI_LOOP_BASE +bool sat = false; + +switch (sew) { +case e8: { + VV_PARAMS(e8); + vd = sat_sub(vs2, vs1, sat); + break; +} +case e16: { + VV_PARAMS(e16); + vd = sat_sub(vs2, vs1, sat); + break; +} +case e32: { + VV_PARAMS(e32); + vd = sat_sub(vs2, vs1, sat); + break; +} +default: { + VV_PARAMS(e64); + vd = sat_sub(vs2, vs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vssub_vx.h b/riscv/insns/vssub_vx.h new file mode 100644 index 0000000000..7a01125644 --- /dev/null +++ b/riscv/insns/vssub_vx.h @@ -0,0 +1,29 @@ +// vssub.vx vd, vs2, rs1 +VI_CHECK_SSS(false); +VI_LOOP_BASE +bool sat = false; + +switch (sew) { +case e8: { + VX_PARAMS(e8); + vd = sat_sub(vs2, rs1, sat); + break; +} +case e16: { + VX_PARAMS(e16); + vd = sat_sub(vs2, rs1, sat); + break; +} +case e32: { + VX_PARAMS(e32); + vd = sat_sub(vs2, rs1, sat); + break; +} +default: { + VX_PARAMS(e64); + vd = sat_sub(vs2, rs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vssubu_vv.h b/riscv/insns/vssubu_vv.h new file mode 100644 index 0000000000..e58076ebe4 --- /dev/null +++ b/riscv/insns/vssubu_vv.h @@ -0,0 +1,30 @@ +// vssubu.vv vd, vs2, vs1 +VI_CHECK_SSS(true); +VI_LOOP_BASE +bool sat = false; + +switch (sew) { +case e8: { + VV_U_PARAMS(e8); + vd = sat_subu(vs2, vs1, sat); + break; +} +case e16: { + VV_U_PARAMS(e16); + vd = sat_subu(vs2, vs1, sat); + break; +} +case e32: { + VV_U_PARAMS(e32); + vd = sat_subu(vs2, vs1, sat); + break; +} +default: { + VV_U_PARAMS(e64); + vd = sat_subu(vs2, vs1, sat); + break; +} +} +P.VU.vxsat |= sat; + +VI_LOOP_END diff --git a/riscv/insns/vssubu_vx.h b/riscv/insns/vssubu_vx.h new file mode 100644 index 0000000000..556c759f59 --- /dev/null +++ b/riscv/insns/vssubu_vx.h @@ -0,0 +1,29 @@ +// vssubu.vx vd, vs2, rs1 +VI_CHECK_SSS(false); +VI_LOOP_BASE +bool sat = false; + +switch (sew) { +case e8: { + VX_U_PARAMS(e8); + vd = sat_subu(vs2, rs1, sat); + break; +} +case e16: { + VX_U_PARAMS(e16); + vd = sat_subu(vs2, rs1, sat); + break; +} +case e32: { + VX_U_PARAMS(e32); + vd = sat_subu(vs2, rs1, sat); + break; +} +default: { + VX_U_PARAMS(e64); + vd = sat_subu(vs2, rs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vsub_vv.h b/riscv/insns/vsub_vv.h new file mode 100644 index 0000000000..7d119d50fd --- /dev/null +++ b/riscv/insns/vsub_vv.h @@ -0,0 +1,5 @@ +// vsub +VI_VV_LOOP +({ + vd = vs2 - vs1; +}) diff --git a/riscv/insns/vsub_vx.h b/riscv/insns/vsub_vx.h new file mode 100644 index 0000000000..e075b42370 --- /dev/null +++ b/riscv/insns/vsub_vx.h @@ -0,0 +1,5 @@ +// vsub: vd[i] = (vd[i] * x[rs1]) - vs2[i] +VI_VX_LOOP +({ + vd = vs2 - rs1; +}) diff --git a/riscv/insns/vsuxei16_v.h b/riscv/insns/vsuxei16_v.h new file mode 100644 index 0000000000..f361c03f81 --- /dev/null +++ b/riscv/insns/vsuxei16_v.h @@ -0,0 +1,2 @@ +// vsuxe16.v +VI_ST_INDEX(e16, false); diff --git a/riscv/insns/vsuxei32_v.h b/riscv/insns/vsuxei32_v.h new file mode 100644 index 0000000000..c1c8dc7c51 --- /dev/null +++ b/riscv/insns/vsuxei32_v.h @@ -0,0 +1,2 @@ +// vsuxe32.v +VI_ST_INDEX(e32, false); diff --git a/riscv/insns/vsuxei64_v.h b/riscv/insns/vsuxei64_v.h new file mode 100644 index 0000000000..0c619cf1d8 --- /dev/null +++ b/riscv/insns/vsuxei64_v.h @@ -0,0 +1,2 @@ +// vsuxe64.v +VI_ST_INDEX(e64, false); diff --git a/riscv/insns/vsuxei8_v.h b/riscv/insns/vsuxei8_v.h new file mode 100644 index 0000000000..36d2a11c01 --- /dev/null +++ b/riscv/insns/vsuxei8_v.h @@ -0,0 +1,2 @@ +// vsuxe8.v +VI_ST_INDEX(e8, false); diff --git a/riscv/insns/vsxei16_v.h b/riscv/insns/vsxei16_v.h new file mode 100644 index 0000000000..42c3c78dbb --- /dev/null +++ b/riscv/insns/vsxei16_v.h @@ -0,0 +1,2 @@ +// vsxei16.v and vsxseg[2-8]ei16.v +VI_ST_INDEX(e16, true); diff --git a/riscv/insns/vsxei32_v.h b/riscv/insns/vsxei32_v.h new file mode 100644 index 0000000000..f0aed6bd6e --- /dev/null +++ b/riscv/insns/vsxei32_v.h @@ -0,0 +1,2 @@ +// vsxei32.v and vsxseg[2-8]ei32.v +VI_ST_INDEX(e32, true); diff --git a/riscv/insns/vsxei64_v.h b/riscv/insns/vsxei64_v.h new file mode 100644 index 0000000000..88ddaf3fd7 --- /dev/null +++ b/riscv/insns/vsxei64_v.h @@ -0,0 +1,2 @@ +// vsxei64.v and vsxseg[2-8]ei64.v +VI_ST_INDEX(e64, true); diff --git a/riscv/insns/vsxei8_v.h b/riscv/insns/vsxei8_v.h new file mode 100644 index 0000000000..621512c50c --- /dev/null +++ b/riscv/insns/vsxei8_v.h @@ -0,0 +1,2 @@ +// vsxei8.v and vsxseg[2-8]ei8.v +VI_ST_INDEX(e8, true); diff --git a/riscv/insns/vwadd_vv.h b/riscv/insns/vwadd_vv.h new file mode 100644 index 0000000000..df4a13534d --- /dev/null +++ b/riscv/insns/vwadd_vv.h @@ -0,0 +1,6 @@ +// vwadd.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, +, +, int); +}) diff --git a/riscv/insns/vwadd_vx.h b/riscv/insns/vwadd_vx.h new file mode 100644 index 0000000000..c226389342 --- /dev/null +++ b/riscv/insns/vwadd_vx.h @@ -0,0 +1,6 @@ +// vwadd.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, +, +, int); +}) diff --git a/riscv/insns/vwadd_wv.h b/riscv/insns/vwadd_wv.h new file mode 100644 index 0000000000..54d2ba4072 --- /dev/null +++ b/riscv/insns/vwadd_wv.h @@ -0,0 +1,6 @@ +// vwadd.wv vd, vs2, vs1 +VI_CHECK_DDS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(vs1, +, int); +}) diff --git a/riscv/insns/vwadd_wx.h b/riscv/insns/vwadd_wx.h new file mode 100644 index 0000000000..bb4cee5100 --- /dev/null +++ b/riscv/insns/vwadd_wx.h @@ -0,0 +1,6 @@ +// vwaddu.wx vd, vs2, rs1 +VI_CHECK_DDS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(rs1, +, int); +}) diff --git a/riscv/insns/vwaddu_vv.h b/riscv/insns/vwaddu_vv.h new file mode 100644 index 0000000000..286ebc858e --- /dev/null +++ b/riscv/insns/vwaddu_vv.h @@ -0,0 +1,6 @@ +// vwaddu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, +, +, uint); +}) diff --git a/riscv/insns/vwaddu_vx.h b/riscv/insns/vwaddu_vx.h new file mode 100644 index 0000000000..61cddfc897 --- /dev/null +++ b/riscv/insns/vwaddu_vx.h @@ -0,0 +1,6 @@ +// vwaddu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, +, +, uint); +}) diff --git a/riscv/insns/vwaddu_wv.h b/riscv/insns/vwaddu_wv.h new file mode 100644 index 0000000000..fee813657e --- /dev/null +++ b/riscv/insns/vwaddu_wv.h @@ -0,0 +1,6 @@ +// vwaddu.wv vd, vs2, vs1 +VI_CHECK_DDS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(vs1, +, uint); +}) diff --git a/riscv/insns/vwaddu_wx.h b/riscv/insns/vwaddu_wx.h new file mode 100644 index 0000000000..0073ac35c5 --- /dev/null +++ b/riscv/insns/vwaddu_wx.h @@ -0,0 +1,6 @@ +// vwaddu.wx vd, vs2, rs1 +VI_CHECK_DDS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(rs1, +, uint); +}) diff --git a/riscv/insns/vwmacc_vv.h b/riscv/insns/vwmacc_vv.h new file mode 100644 index 0000000000..7208c6d696 --- /dev/null +++ b/riscv/insns/vwmacc_vv.h @@ -0,0 +1,6 @@ +// vwmacc.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, vd_w, *, +, int); +}) diff --git a/riscv/insns/vwmacc_vx.h b/riscv/insns/vwmacc_vx.h new file mode 100644 index 0000000000..5ae597a267 --- /dev/null +++ b/riscv/insns/vwmacc_vx.h @@ -0,0 +1,6 @@ +// vwmacc.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, vd_w, *, +, int); +}) diff --git a/riscv/insns/vwmaccsu_vv.h b/riscv/insns/vwmaccsu_vv.h new file mode 100644 index 0000000000..3aa43ef44d --- /dev/null +++ b/riscv/insns/vwmaccsu_vv.h @@ -0,0 +1,6 @@ +// vwmaccsu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN_MIX(vs2, vs1, vd_w, *, +, int, uint, int); +}) diff --git a/riscv/insns/vwmaccsu_vx.h b/riscv/insns/vwmaccsu_vx.h new file mode 100644 index 0000000000..e00a21ddc1 --- /dev/null +++ b/riscv/insns/vwmaccsu_vx.h @@ -0,0 +1,6 @@ +// vwmaccsu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN_MIX(vs2, rs1, vd_w, *, +, int, uint, int); +}) diff --git a/riscv/insns/vwmaccu_vv.h b/riscv/insns/vwmaccu_vv.h new file mode 100644 index 0000000000..2cbdaa312b --- /dev/null +++ b/riscv/insns/vwmaccu_vv.h @@ -0,0 +1,6 @@ +// vwmaccu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, vd_w, *, +, uint); +}) diff --git a/riscv/insns/vwmaccu_vx.h b/riscv/insns/vwmaccu_vx.h new file mode 100644 index 0000000000..533297f3fa --- /dev/null +++ b/riscv/insns/vwmaccu_vx.h @@ -0,0 +1,6 @@ +// vwmaccu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, vd_w, *, +, uint); +}) diff --git a/riscv/insns/vwmaccus_vx.h b/riscv/insns/vwmaccus_vx.h new file mode 100644 index 0000000000..5310f0e9be --- /dev/null +++ b/riscv/insns/vwmaccus_vx.h @@ -0,0 +1,6 @@ +// vwmaccus.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN_MIX(vs2, rs1, vd_w, *, +, int, int, uint); +}) diff --git a/riscv/insns/vwmul_vv.h b/riscv/insns/vwmul_vv.h new file mode 100644 index 0000000000..2197edbfbf --- /dev/null +++ b/riscv/insns/vwmul_vv.h @@ -0,0 +1,6 @@ +// vwmul.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, *, +, int); +}) diff --git a/riscv/insns/vwmul_vx.h b/riscv/insns/vwmul_vx.h new file mode 100644 index 0000000000..bc1422d400 --- /dev/null +++ b/riscv/insns/vwmul_vx.h @@ -0,0 +1,6 @@ +// vwmul.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, *, +, int); +}) diff --git a/riscv/insns/vwmulsu_vv.h b/riscv/insns/vwmulsu_vv.h new file mode 100644 index 0000000000..5f84721f6f --- /dev/null +++ b/riscv/insns/vwmulsu_vv.h @@ -0,0 +1,16 @@ +// vwmulsu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + switch(P.VU.vsew) { + case e8: + P.VU.elt(rd_num, i, true) = (int16_t)(int8_t)vs2 * (int16_t)(uint8_t)vs1; + break; + case e16: + P.VU.elt(rd_num, i, true) = (int32_t)(int16_t)vs2 * (int32_t)(uint16_t)vs1; + break; + default: + P.VU.elt(rd_num, i, true) = (int64_t)(int32_t)vs2 * (int64_t)(uint32_t)vs1; + break; + } +}) diff --git a/riscv/insns/vwmulsu_vx.h b/riscv/insns/vwmulsu_vx.h new file mode 100644 index 0000000000..68d6d276ea --- /dev/null +++ b/riscv/insns/vwmulsu_vx.h @@ -0,0 +1,16 @@ +// vwmulsu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + switch(P.VU.vsew) { + case e8: + P.VU.elt(rd_num, i, true) = (int16_t)(int8_t)vs2 * (int16_t)(uint8_t)rs1; + break; + case e16: + P.VU.elt(rd_num, i, true) = (int32_t)(int16_t)vs2 * (int32_t)(uint16_t)rs1; + break; + default: + P.VU.elt(rd_num, i, true) = (int64_t)(int32_t)vs2 * (int64_t)(uint32_t)rs1; + break; + } +}) diff --git a/riscv/insns/vwmulu_vv.h b/riscv/insns/vwmulu_vv.h new file mode 100644 index 0000000000..8ddbb4b488 --- /dev/null +++ b/riscv/insns/vwmulu_vv.h @@ -0,0 +1,6 @@ +// vwmulu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, *, +, uint); +}) diff --git a/riscv/insns/vwmulu_vx.h b/riscv/insns/vwmulu_vx.h new file mode 100644 index 0000000000..1ce77eefdc --- /dev/null +++ b/riscv/insns/vwmulu_vx.h @@ -0,0 +1,6 @@ +// vwmul.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, *, +, uint); +}) diff --git a/riscv/insns/vwredsum_vs.h b/riscv/insns/vwredsum_vs.h new file mode 100644 index 0000000000..c7a87db431 --- /dev/null +++ b/riscv/insns/vwredsum_vs.h @@ -0,0 +1,5 @@ +// vwredsum.vs vd, vs2, vs1 +VI_VV_LOOP_WIDE_REDUCTION +({ + vd_0_res += vs2; +}) diff --git a/riscv/insns/vwredsumu_vs.h b/riscv/insns/vwredsumu_vs.h new file mode 100644 index 0000000000..889a77d310 --- /dev/null +++ b/riscv/insns/vwredsumu_vs.h @@ -0,0 +1,5 @@ +// vwredsum.vs vd, vs2, vs1 +VI_VV_ULOOP_WIDE_REDUCTION +({ + vd_0_res += vs2; +}) diff --git a/riscv/insns/vwsub_vv.h b/riscv/insns/vwsub_vv.h new file mode 100644 index 0000000000..99f9348985 --- /dev/null +++ b/riscv/insns/vwsub_vv.h @@ -0,0 +1,6 @@ +// vwsub.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, -, +, int); +}) diff --git a/riscv/insns/vwsub_vx.h b/riscv/insns/vwsub_vx.h new file mode 100644 index 0000000000..affdf62ce0 --- /dev/null +++ b/riscv/insns/vwsub_vx.h @@ -0,0 +1,6 @@ +// vwsub.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, -, +, int); +}) diff --git a/riscv/insns/vwsub_wv.h b/riscv/insns/vwsub_wv.h new file mode 100644 index 0000000000..10db7308e2 --- /dev/null +++ b/riscv/insns/vwsub_wv.h @@ -0,0 +1,6 @@ +// vwsub.wv vd, vs2, vs1 +VI_CHECK_DDS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(vs1, -, int); +}) diff --git a/riscv/insns/vwsub_wx.h b/riscv/insns/vwsub_wx.h new file mode 100644 index 0000000000..f72341ba80 --- /dev/null +++ b/riscv/insns/vwsub_wx.h @@ -0,0 +1,6 @@ +// vwsub.wx vd, vs2, rs1 +VI_CHECK_DDS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(rs1, -, int); +}) diff --git a/riscv/insns/vwsubu_vv.h b/riscv/insns/vwsubu_vv.h new file mode 100644 index 0000000000..cf68adb9fa --- /dev/null +++ b/riscv/insns/vwsubu_vv.h @@ -0,0 +1,6 @@ +// vwsubu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, -, +, uint); +}) diff --git a/riscv/insns/vwsubu_vx.h b/riscv/insns/vwsubu_vx.h new file mode 100644 index 0000000000..3e972dd211 --- /dev/null +++ b/riscv/insns/vwsubu_vx.h @@ -0,0 +1,6 @@ +// vwsubu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, -, +, uint); +}) diff --git a/riscv/insns/vwsubu_wv.h b/riscv/insns/vwsubu_wv.h new file mode 100644 index 0000000000..3687c3d237 --- /dev/null +++ b/riscv/insns/vwsubu_wv.h @@ -0,0 +1,6 @@ +// vwsubu.wv vd, vs2, vs1 +VI_CHECK_DDS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(vs1, -, uint); +}) diff --git a/riscv/insns/vwsubu_wx.h b/riscv/insns/vwsubu_wx.h new file mode 100644 index 0000000000..c7f20edd79 --- /dev/null +++ b/riscv/insns/vwsubu_wx.h @@ -0,0 +1,6 @@ +// vwsubu.wx vd, vs2, rs1 +VI_CHECK_DDS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(rs1, -, uint); +}) diff --git a/riscv/insns/vxor_vi.h b/riscv/insns/vxor_vi.h new file mode 100644 index 0000000000..b2dcf946dc --- /dev/null +++ b/riscv/insns/vxor_vi.h @@ -0,0 +1,5 @@ +// vxor +VI_VI_LOOP +({ + vd = simm5 ^ vs2; +}) diff --git a/riscv/insns/vxor_vv.h b/riscv/insns/vxor_vv.h new file mode 100644 index 0000000000..c37b6ab729 --- /dev/null +++ b/riscv/insns/vxor_vv.h @@ -0,0 +1,5 @@ +// vxor +VI_VV_LOOP +({ + vd = vs1 ^ vs2; +}) diff --git a/riscv/insns/vxor_vx.h b/riscv/insns/vxor_vx.h new file mode 100644 index 0000000000..8021e0e851 --- /dev/null +++ b/riscv/insns/vxor_vx.h @@ -0,0 +1,5 @@ +// vxor +VI_VX_LOOP +({ + vd = rs1 ^ vs2; +}) diff --git a/riscv/insns/vzext_vf2.h b/riscv/insns/vzext_vf2.h new file mode 100644 index 0000000000..100f2e359a --- /dev/null +++ b/riscv/insns/vzext_vf2.h @@ -0,0 +1 @@ +VI_VV_EXT(2, uint); diff --git a/riscv/insns/vzext_vf4.h b/riscv/insns/vzext_vf4.h new file mode 100644 index 0000000000..6ff920e0bc --- /dev/null +++ b/riscv/insns/vzext_vf4.h @@ -0,0 +1 @@ +VI_VV_EXT(4, uint); diff --git a/riscv/insns/vzext_vf8.h b/riscv/insns/vzext_vf8.h new file mode 100644 index 0000000000..b1762fbf67 --- /dev/null +++ b/riscv/insns/vzext_vf8.h @@ -0,0 +1 @@ +VI_VV_EXT(8, uint); diff --git a/riscv/insns/wfi.h b/riscv/insns/wfi.h index 6504b78c60..59ed35bb6f 100644 --- a/riscv/insns/wfi.h +++ b/riscv/insns/wfi.h @@ -1,2 +1,11 @@ -require_privilege(get_field(STATE.mstatus, MSTATUS_TW) ? PRV_M : PRV_S); +if (STATE.v && STATE.prv == PRV_U) { + require_novirt(); +} else if (get_field(STATE.mstatus, MSTATUS_TW)) { + require_privilege(PRV_M); +} else if (STATE.v) { // VS-mode + if (get_field(STATE.hstatus, HSTATUS_VTW)) + require_novirt(); +} else { + require_privilege(PRV_S); +} wfi(); diff --git a/riscv/interactive.cc b/riscv/interactive.cc index c96c71ace7..00e505d896 100644 --- a/riscv/interactive.cc +++ b/riscv/interactive.cc @@ -66,8 +66,10 @@ void sim_t::interactive() funcs["run"] = &sim_t::interactive_run_noisy; funcs["r"] = funcs["run"]; funcs["rs"] = &sim_t::interactive_run_silent; + funcs["vreg"] = &sim_t::interactive_vreg; funcs["reg"] = &sim_t::interactive_reg; funcs["freg"] = &sim_t::interactive_freg; + funcs["fregh"] = &sim_t::interactive_fregh; funcs["fregs"] = &sim_t::interactive_fregs; funcs["fregd"] = &sim_t::interactive_fregd; funcs["pc"] = &sim_t::interactive_pc; @@ -107,7 +109,7 @@ void sim_t::interactive() else fprintf(stderr, "Unknown command %s\n", cmd.c_str()); } - catch(trap_t t) {} + catch(trap_t& t) {} } ctrlc_pressed = false; } @@ -117,8 +119,10 @@ void sim_t::interactive_help(const std::string& cmd, const std::vector [reg] # Display [reg] (all if omitted) in \n" + "fregh # Display half precision in \n" "fregs # Display single precision in \n" "fregd # Display double precision in \n" + "vreg [reg] # Display vector [reg] (all if omitted) in \n" "pc # Show current PC in \n" "mem # Show contents of physical memory\n" "str # Show NUL-terminated C string\n" @@ -218,6 +222,54 @@ freg_t sim_t::get_freg(const std::vector& args) return p->get_state()->FPR[r]; } +void sim_t::interactive_vreg(const std::string& cmd, const std::vector& args) +{ + int rstart = 0; + int rend = NVPR; + if (args.size() >= 2) { + rstart = strtol(args[1].c_str(), NULL, 0); + if (!(rstart >= 0 && rstart < NVPR)) { + rstart = 0; + } else { + rend = rstart + 1; + } + } + + // Show all the regs! + processor_t *p = get_core(args[0]); + const int vlen = (int)(p->VU.get_vlen()) >> 3; + const int elen = (int)(p->VU.get_elen()) >> 3; + const int num_elem = vlen/elen; + fprintf(stderr, "VLEN=%d bits; ELEN=%d bits\n", vlen << 3, elen << 3); + + for (int r = rstart; r < rend; ++r) { + fprintf(stderr, "%-4s: ", vr_name[r]); + for (int e = num_elem-1; e >= 0; --e){ + uint64_t val; + switch(elen){ + case 8: + val = P.VU.elt(r, e); + fprintf(stderr, "[%d]: 0x%016" PRIx64 " ", e, val); + break; + case 4: + val = P.VU.elt(r, e); + fprintf(stderr, "[%d]: 0x%08" PRIx32 " ", e, (uint32_t)val); + break; + case 2: + val = P.VU.elt(r, e); + fprintf(stderr, "[%d]: 0x%08" PRIx16 " ", e, (uint16_t)val); + break; + case 1: + val = P.VU.elt(r, e); + fprintf(stderr, "[%d]: 0x%08" PRIx8 " ", e, (uint8_t)val); + break; + } + } + fprintf(stderr, "\n"); + } +} + + void sim_t::interactive_reg(const std::string& cmd, const std::vector& args) { if (args.size() == 1) { @@ -246,6 +298,13 @@ void sim_t::interactive_freg(const std::string& cmd, const std::vector& args) +{ + fpr f; + f.r = freg(f16_to_f32(f16(get_freg(args)))); + fprintf(stderr, "%g\n", isBoxedF32(f.r) ? (double)f.s : NAN); +} + void sim_t::interactive_fregs(const std::string& cmd, const std::vector& args) { fpr f; @@ -361,7 +420,7 @@ void sim_t::interactive_until(const std::string& cmd, const std::vector 0) + rti_remaining--; + dm->run_test_idle(); + break; case TEST_LOGIC_RESET: ir = IR_IDCODE; break; @@ -136,7 +145,12 @@ void jtag_dtm_t::capture_dr() dr_length = 32; break; case IR_DBUS: - dr = dmi; + if (rti_remaining > 0 || busy_stuck) { + dr = DMI_OP_STATUS_BUSY; + busy_stuck = true; + } else { + dr = dmi; + } dr_length = abits + 34; break; default: @@ -151,34 +165,37 @@ void jtag_dtm_t::update_dr() { D(fprintf(stderr, "Update DR; IR=0x%x, DR=0x%lx (%d bits)\n", ir, dr, dr_length)); - switch (ir) { - case IR_DBUS: - { - unsigned op = get_field(dr, DMI_OP); - uint32_t data = get_field(dr, DMI_DATA); - unsigned address = get_field(dr, DMI_ADDRESS); - - dmi = dr; - - bool success = true; - if (op == DMI_OP_READ) { - uint32_t value; - if (dm->dmi_read(address, &value)) { - dmi = set_field(dmi, DMI_DATA, value); - } else { - success = false; - } - } else if (op == DMI_OP_WRITE) { - success = dm->dmi_write(address, data); - } - - if (success) { - dmi = set_field(dmi, DMI_OP, DMI_OP_STATUS_SUCCESS); - } else { - dmi = set_field(dmi, DMI_OP, DMI_OP_STATUS_FAILED); - } - D(fprintf(stderr, "dmi=0x%lx\n", dmi)); + if (ir == IR_DTMCONTROL) { + if (dr & DTMCONTROL_DMIRESET) + busy_stuck = false; + if (dr & DTMCONTROL_DMIHARDRESET) + reset(); + } else if (ir == IR_DBUS && !busy_stuck) { + unsigned op = get_field(dr, DMI_OP); + uint32_t data = get_field(dr, DMI_DATA); + unsigned address = get_field(dr, DMI_ADDRESS); + + dmi = dr; + + bool success = true; + if (op == DMI_OP_READ) { + uint32_t value; + if (dm->dmi_read(address, &value)) { + dmi = set_field(dmi, DMI_DATA, value); + } else { + success = false; } - break; + } else if (op == DMI_OP_WRITE) { + success = dm->dmi_write(address, data); + } + + if (success) { + dmi = set_field(dmi, DMI_OP, DMI_OP_STATUS_SUCCESS); + } else { + dmi = set_field(dmi, DMI_OP, DMI_OP_STATUS_FAILED); + } + D(fprintf(stderr, "dmi=0x%lx\n", dmi)); + + rti_remaining = required_rti_cycles; } } diff --git a/riscv/jtag_dtm.h b/riscv/jtag_dtm.h index 063e3f40b3..3482b8a06b 100644 --- a/riscv/jtag_dtm.h +++ b/riscv/jtag_dtm.h @@ -29,7 +29,7 @@ class jtag_dtm_t static const unsigned idcode = 0xdeadbeef; public: - jtag_dtm_t(debug_module_t *dm); + jtag_dtm_t(debug_module_t *dm, unsigned required_rti_cycles); void reset(); void set_pins(bool tck, bool tms, bool tdi); @@ -40,6 +40,9 @@ class jtag_dtm_t private: debug_module_t *dm; + // The number of Run-Test/Idle cycles required before a DMI access is + // complete. + unsigned required_rti_cycles; bool _tck, _tms, _tdi, _tdo; uint32_t ir; const unsigned ir_length = 5; @@ -51,6 +54,10 @@ class jtag_dtm_t const unsigned abits = 6; uint32_t dtmcontrol; uint64_t dmi; + // Number of Run-Test/Idle cycles needed before we call this access + // complete. + unsigned rti_remaining; + bool busy_stuck; jtag_state_t _state; diff --git a/riscv/log_file.h b/riscv/log_file.h new file mode 100644 index 0000000000..d039859dc6 --- /dev/null +++ b/riscv/log_file.h @@ -0,0 +1,37 @@ +// See LICENSE for license details. +#ifndef _RISCV_LOGFILE_H +#define _RISCV_LOGFILE_H + +#include +#include +#include +#include + +// Header-only class wrapping a log file. When constructed with an +// actual path, it opens the named file for writing. When constructed +// with the null path, it wraps stderr. +class log_file_t +{ +public: + log_file_t(const char *path) + : wrapped_file (nullptr, &fclose) + { + if (!path) + return; + + wrapped_file.reset(fopen(path, "w")); + if (! wrapped_file) { + std::ostringstream oss; + oss << "Failed to open log file at `" << path << "': " + << strerror (errno); + throw std::runtime_error(oss.str()); + } + } + + FILE *get() { return wrapped_file ? wrapped_file.get() : stderr; } + +private: + std::unique_ptr wrapped_file; +}; + +#endif diff --git a/riscv/mmio_plugin.h b/riscv/mmio_plugin.h new file mode 100644 index 0000000000..f14470bf38 --- /dev/null +++ b/riscv/mmio_plugin.h @@ -0,0 +1,91 @@ +#ifndef _RISCV_MMIO_PLUGIN_H +#define _RISCV_MMIO_PLUGIN_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + +typedef uint64_t reg_t; + +typedef struct { + // Allocate user data for an instance of the plugin. The parameter is a simple + // c-string containing arguments used to construct the plugin. It returns a + // void* to the allocated data. + void* (*alloc)(const char*); + + // Load a memory address of the MMIO plugin. The parameters are the user_data + // (void*), memory offset (reg_t), number of bytes to load (size_t), and the + // buffer into which the loaded data should be written (uint8_t*). Return true + // if the load is successful and false otherwise. + bool (*load)(void*, reg_t, size_t, uint8_t*); + + // Store some bytes to a memory address of the MMIO plugin. The parameters are + // the user_data (void*), memory offset (reg_t), number of bytes to store + // (size_t), and the buffer containing the data to be stored (const uint8_t*). + // Return true if the store is successful and false otherwise. + bool (*store)(void*, reg_t, size_t, const uint8_t*); + + // Deallocate the data allocated during the call to alloc. The parameter is a + // pointer to the user data allocated during the call to alloc. + void (*dealloc)(void*); +} mmio_plugin_t; + +// Register an mmio plugin with the application. This should be called by +// plugins as part of their loading process. +extern void register_mmio_plugin(const char* name_cstr, + const mmio_plugin_t* mmio_plugin); + +#ifdef __cplusplus +} + +#include + +// Wrapper around the C plugin API that makes registering a C++ class with +// correctly formed constructor, load, and store functions easier. The template +// type should be the type that implements the MMIO plugin interface. Simply +// make a global mmio_plugin_registration_t and your plugin should register +// itself with the application when it is loaded because the +// mmio_plugin_registration_t constructor will be called. +template +struct mmio_plugin_registration_t +{ + static void* alloc(const char* args) + { + return reinterpret_cast(new T(std::string(args))); + } + + static bool load(void* self, reg_t addr, size_t len, uint8_t* bytes) + { + return reinterpret_cast(self)->load(addr, len, bytes); + } + + static bool store(void* self, reg_t addr, size_t len, const uint8_t* bytes) + { + return reinterpret_cast(self)->store(addr, len, bytes); + } + + static void dealloc(void* self) + { + delete reinterpret_cast(self); + } + + mmio_plugin_registration_t(const std::string& name) + { + mmio_plugin_t plugin = { + mmio_plugin_registration_t::alloc, + mmio_plugin_registration_t::load, + mmio_plugin_registration_t::store, + mmio_plugin_registration_t::dealloc, + }; + + register_mmio_plugin(name.c_str(), &plugin); + } +}; +#endif // __cplusplus + +#endif diff --git a/riscv/mmu.cc b/riscv/mmu.cc index 3e1fc25552..e8dca6a85d 100644 --- a/riscv/mmu.cc +++ b/riscv/mmu.cc @@ -37,39 +37,51 @@ void mmu_t::flush_tlb() static void throw_access_exception(reg_t addr, access_type type) { switch (type) { - case FETCH: throw trap_instruction_access_fault(addr); - case LOAD: throw trap_load_access_fault(addr); - case STORE: throw trap_store_access_fault(addr); + case FETCH: throw trap_instruction_access_fault(addr, 0, 0); + case LOAD: throw trap_load_access_fault(addr, 0, 0); + case STORE: throw trap_store_access_fault(addr, 0, 0); default: abort(); } } -reg_t mmu_t::translate(reg_t addr, reg_t len, access_type type) +reg_t mmu_t::translate(reg_t addr, reg_t len, access_type type, uint32_t xlate_flags) { if (!proc) return addr; + bool mxr = get_field(proc->state.mstatus, MSTATUS_MXR); + bool virt = proc->state.v; reg_t mode = proc->state.prv; if (type != FETCH) { - if (!proc->state.dcsr.cause && get_field(proc->state.mstatus, MSTATUS_MPRV)) + if (!proc->state.debug_mode && get_field(proc->state.mstatus, MSTATUS_MPRV)) { mode = get_field(proc->state.mstatus, MSTATUS_MPP); + if (get_field(proc->state.mstatus, MSTATUS_MPV)) + virt = true; + } + if (!proc->state.debug_mode && (xlate_flags & RISCV_XLATE_VIRT)) { + virt = true; + mode = get_field(proc->state.hstatus, HSTATUS_SPVP); + if (type == LOAD && (xlate_flags & RISCV_XLATE_VIRT_MXR)) { + mxr = true; + } + } } - reg_t paddr = walk(addr, type, mode) | (addr & (PGSIZE-1)); - if (!pmp_ok(paddr, type, mode) || !pmp_homogeneous(paddr, len)) + reg_t paddr = walk(addr, type, mode, virt, mxr) | (addr & (PGSIZE-1)); + if (!pmp_ok(paddr, len, type, mode)) throw_access_exception(addr, type); return paddr; } tlb_entry_t mmu_t::fetch_slow_path(reg_t vaddr) { - reg_t paddr = translate(vaddr, sizeof(fetch_temp), FETCH); + reg_t paddr = translate(vaddr, sizeof(fetch_temp), FETCH, 0); if (auto host_addr = sim->addr_to_mem(paddr)) { return refill_tlb(vaddr, paddr, host_addr, FETCH); } else { - if (!sim->mmio_load(paddr, sizeof fetch_temp, (uint8_t*)&fetch_temp)) - throw trap_instruction_access_fault(vaddr); + if (!mmio_load(paddr, sizeof fetch_temp, (uint8_t*)&fetch_temp)) + throw trap_instruction_access_fault(vaddr, 0, 0); tlb_entry_t entry = {(char*)&fetch_temp - vaddr, paddr - vaddr}; return entry; } @@ -101,9 +113,34 @@ reg_t reg_from_bytes(size_t len, const uint8_t* bytes) abort(); } -void mmu_t::load_slow_path(reg_t addr, reg_t len, uint8_t* bytes) +bool mmu_t::mmio_ok(reg_t addr, access_type type) +{ + // Disallow access to debug region when not in debug mode + if (addr >= DEBUG_START && addr <= DEBUG_END && proc && !proc->state.debug_mode) + return false; + + return true; +} + +bool mmu_t::mmio_load(reg_t addr, size_t len, uint8_t* bytes) +{ + if (!mmio_ok(addr, LOAD)) + return false; + + return sim->mmio_load(addr, len, bytes); +} + +bool mmu_t::mmio_store(reg_t addr, size_t len, const uint8_t* bytes) +{ + if (!mmio_ok(addr, STORE)) + return false; + + return sim->mmio_store(addr, len, bytes); +} + +void mmu_t::load_slow_path(reg_t addr, reg_t len, uint8_t* bytes, uint32_t xlate_flags) { - reg_t paddr = translate(addr, len, LOAD); + reg_t paddr = translate(addr, len, LOAD, xlate_flags); if (auto host_addr = sim->addr_to_mem(paddr)) { memcpy(bytes, host_addr, len); @@ -111,8 +148,8 @@ void mmu_t::load_slow_path(reg_t addr, reg_t len, uint8_t* bytes) tracer.trace(paddr, len, LOAD); else refill_tlb(addr, paddr, host_addr, LOAD); - } else if (!sim->mmio_load(paddr, len, bytes)) { - throw trap_load_access_fault(addr); + } else if (!mmio_load(paddr, len, bytes)) { + throw trap_load_access_fault(addr, 0, 0); } if (!matched_trigger) { @@ -123,9 +160,9 @@ void mmu_t::load_slow_path(reg_t addr, reg_t len, uint8_t* bytes) } } -void mmu_t::store_slow_path(reg_t addr, reg_t len, const uint8_t* bytes) +void mmu_t::store_slow_path(reg_t addr, reg_t len, const uint8_t* bytes, uint32_t xlate_flags) { - reg_t paddr = translate(addr, len, STORE); + reg_t paddr = translate(addr, len, STORE, xlate_flags); if (!matched_trigger) { reg_t data = reg_from_bytes(len, bytes); @@ -140,8 +177,8 @@ void mmu_t::store_slow_path(reg_t addr, reg_t len, const uint8_t* bytes) tracer.trace(paddr, len, STORE); else refill_tlb(addr, paddr, host_addr, STORE); - } else if (!sim->mmio_store(paddr, len, bytes)) { - throw trap_store_access_fault(addr); + } else if (!mmio_store(paddr, len, bytes)) { + throw trap_store_access_fault(addr, 0, 0); } } @@ -173,26 +210,40 @@ tlb_entry_t mmu_t::refill_tlb(reg_t vaddr, reg_t paddr, char* host_addr, access_ return entry; } -reg_t mmu_t::pmp_ok(reg_t addr, access_type type, reg_t mode) +reg_t mmu_t::pmp_ok(reg_t addr, reg_t len, access_type type, reg_t mode) { - if (!proc) + if (!proc || proc->n_pmp == 0) return true; reg_t base = 0; - for (size_t i = 0; i < proc->state.n_pmp; i++) { - reg_t tor = proc->state.pmpaddr[i] << PMP_SHIFT; + for (size_t i = 0; i < proc->n_pmp; i++) { + reg_t tor = (proc->state.pmpaddr[i] & proc->pmp_tor_mask()) << PMP_SHIFT; uint8_t cfg = proc->state.pmpcfg[i]; if (cfg & PMP_A) { bool is_tor = (cfg & PMP_A) == PMP_TOR; bool is_na4 = (cfg & PMP_A) == PMP_NA4; - reg_t mask = (proc->state.pmpaddr[i] << 1) | (!is_na4); + reg_t mask = (proc->state.pmpaddr[i] << 1) | (!is_na4) | ~proc->pmp_tor_mask(); mask = ~(mask & ~(mask + 1)) << PMP_SHIFT; - bool napot_match = ((addr ^ tor) & mask) == 0; - bool tor_match = base <= addr && addr < tor; - if (is_tor ? tor_match : napot_match) { + // Check each 4-byte sector of the access + bool any_match = false; + bool all_match = true; + for (reg_t offset = 0; offset < len; offset += 1 << PMP_SHIFT) { + reg_t cur_addr = addr + offset; + bool napot_match = ((cur_addr ^ tor) & mask) == 0; + bool tor_match = base <= cur_addr && cur_addr < tor; + bool match = is_tor ? tor_match : napot_match; + any_match |= match; + all_match &= match; + } + + if (any_match) { + // If the PMP matches only a strict subset of the access, fail it + if (!all_match) + return false; + return (mode == PRV_M && !(cfg & PMP_L)) || (type == LOAD && (cfg & PMP_R)) || @@ -216,8 +267,8 @@ reg_t mmu_t::pmp_homogeneous(reg_t addr, reg_t len) return true; reg_t base = 0; - for (size_t i = 0; i < proc->state.n_pmp; i++) { - reg_t tor = proc->state.pmpaddr[i] << PMP_SHIFT; + for (size_t i = 0; i < proc->n_pmp; i++) { + reg_t tor = (proc->state.pmpaddr[i] & proc->pmp_tor_mask()) << PMP_SHIFT; uint8_t cfg = proc->state.pmpcfg[i]; if (cfg & PMP_A) { @@ -231,7 +282,7 @@ reg_t mmu_t::pmp_homogeneous(reg_t addr, reg_t len) bool tor_homogeneous = ends_before_lower || begins_after_upper || (begins_after_lower && ends_before_upper); - reg_t mask = (proc->state.pmpaddr[i] << 1) | (!is_na4); + reg_t mask = (proc->state.pmpaddr[i] << 1) | (!is_na4) | ~proc->pmp_tor_mask(); mask = ~(mask & ~(mask + 1)) << PMP_SHIFT; bool mask_homogeneous = ~(mask << 1) & len; bool napot_homogeneous = mask_homogeneous || ((addr ^ tor) / len) != 0; @@ -246,15 +297,82 @@ reg_t mmu_t::pmp_homogeneous(reg_t addr, reg_t len) return true; } -reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode) +reg_t mmu_t::s2xlate(reg_t gva, reg_t gpa, access_type type, bool virt, bool mxr) +{ + if (!virt) + return gpa; + + vm_info vm = decode_vm_info(proc->max_xlen, true, 0, proc->get_state()->hgatp); + if (vm.levels == 0) + return gpa; + + reg_t base = vm.ptbase; + for (int i = vm.levels - 1; i >= 0; i--) { + int ptshift = i * vm.idxbits; + int idxbits = (i == (vm.levels - 1)) ? vm.idxbits + vm.widenbits : vm.idxbits; + reg_t idx = (gpa >> (PGSHIFT + ptshift)) & ((reg_t(1) << idxbits) - 1); + + // check that physical address of PTE is legal + auto pte_paddr = base + idx * vm.ptesize; + auto ppte = sim->addr_to_mem(pte_paddr); + if (!ppte || !pmp_ok(pte_paddr, vm.ptesize, LOAD, PRV_S)) { + throw_access_exception(gva, type); + } + + reg_t pte = vm.ptesize == 4 ? from_le(*(uint32_t*)ppte) : from_le(*(uint64_t*)ppte); + reg_t ppn = pte >> PTE_PPN_SHIFT; + + if (PTE_TABLE(pte)) { // next level of page table + base = ppn << PGSHIFT; + } else if (!(pte & PTE_V) || (!(pte & PTE_R) && (pte & PTE_W))) { + break; + } else if (!(pte & PTE_U)) { + break; + } else if (type == FETCH ? !(pte & PTE_X) : + type == LOAD ? !(pte & PTE_R) && !(mxr && (pte & PTE_X)) : + !((pte & PTE_R) && (pte & PTE_W))) { + break; + } else if ((ppn & ((reg_t(1) << ptshift) - 1)) != 0) { + break; + } else { + reg_t ad = PTE_A | ((type == STORE) * PTE_D); +#ifdef RISCV_ENABLE_DIRTY + // set accessed and possibly dirty bits. + if ((pte & ad) != ad) { + if (!pmp_ok(pte_paddr, vm.ptesize, STORE, PRV_S)) + throw_access_exception(gva, type); + *(uint32_t*)ppte |= to_le((uint32_t)ad); + } +#else + // take exception if access or possibly dirty bit is not set. + if ((pte & ad) != ad) + break; +#endif + reg_t vpn = gpa >> PGSHIFT; + reg_t page_mask = (reg_t(1) << PGSHIFT) - 1; + reg_t page_base = (ppn | (vpn & ((reg_t(1) << ptshift) - 1))) << PGSHIFT; + return page_base | (gpa & page_mask); + } + } + + switch (type) { + case FETCH: throw trap_instruction_guest_page_fault(gva, gpa >> 2, 0); + case LOAD: throw trap_load_guest_page_fault(gva, gpa >> 2, 0); + case STORE: throw trap_store_guest_page_fault(gva, gpa >> 2, 0); + default: abort(); + } +} + +reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode, bool virt, bool mxr) { - vm_info vm = decode_vm_info(proc->max_xlen, mode, proc->get_state()->satp); + reg_t page_mask = (reg_t(1) << PGSHIFT) - 1; + reg_t satp = (virt) ? proc->get_state()->vsatp : proc->get_state()->satp; + vm_info vm = decode_vm_info(proc->max_xlen, false, mode, satp); if (vm.levels == 0) - return addr & ((reg_t(2) << (proc->xlen-1))-1); // zero-extend from xlen + return s2xlate(addr, addr & ((reg_t(2) << (proc->xlen-1))-1), type, virt, mxr) & ~page_mask; // zero-extend from xlen bool s_mode = mode == PRV_S; bool sum = get_field(proc->state.mstatus, MSTATUS_SUM); - bool mxr = get_field(proc->state.mstatus, MSTATUS_MXR); // verify bits xlen-1:va_bits-1 are all equal int va_bits = PGSHIFT + vm.levels * vm.idxbits; @@ -269,12 +387,12 @@ reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode) reg_t idx = (addr >> (PGSHIFT + ptshift)) & ((1 << vm.idxbits) - 1); // check that physical address of PTE is legal - auto pte_paddr = base + idx * vm.ptesize; + auto pte_paddr = s2xlate(addr, base + idx * vm.ptesize, LOAD, virt, false); auto ppte = sim->addr_to_mem(pte_paddr); - if (!ppte || !pmp_ok(pte_paddr, LOAD, PRV_S)) + if (!ppte || !pmp_ok(pte_paddr, vm.ptesize, LOAD, PRV_S)) throw_access_exception(addr, type); - reg_t pte = vm.ptesize == 4 ? *(uint32_t*)ppte : *(uint64_t*)ppte; + reg_t pte = vm.ptesize == 4 ? from_le(*(uint32_t*)ppte) : from_le(*(uint64_t*)ppte); reg_t ppn = pte >> PTE_PPN_SHIFT; if (PTE_TABLE(pte)) { // next level of page table @@ -294,9 +412,9 @@ reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode) #ifdef RISCV_ENABLE_DIRTY // set accessed and possibly dirty bits. if ((pte & ad) != ad) { - if (!pmp_ok(pte_paddr, STORE, PRV_S)) + if (!pmp_ok(pte_paddr, vm.ptesize, STORE, PRV_S)) throw_access_exception(addr, type); - *(uint32_t*)ppte |= ad; + *(uint32_t*)ppte |= to_le((uint32_t)ad); } #else // take exception if access or possibly dirty bit is not set. @@ -305,15 +423,16 @@ reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode) #endif // for superpage mappings, make a fake leaf PTE for the TLB's benefit. reg_t vpn = addr >> PGSHIFT; - reg_t value = (ppn | (vpn & ((reg_t(1) << ptshift) - 1))) << PGSHIFT; - return value; + reg_t page_base = (ppn | (vpn & ((reg_t(1) << ptshift) - 1))) << PGSHIFT; + reg_t phys = page_base | (addr & page_mask); + return s2xlate(addr, phys, type, virt, mxr) & ~page_mask; } } switch (type) { - case FETCH: throw trap_instruction_page_fault(addr); - case LOAD: throw trap_load_page_fault(addr); - case STORE: throw trap_store_page_fault(addr); + case FETCH: throw trap_instruction_page_fault(addr, 0, 0); + case LOAD: throw trap_load_page_fault(addr, 0, 0); + case STORE: throw trap_store_page_fault(addr, 0, 0); default: abort(); } } diff --git a/riscv/mmu.h b/riscv/mmu.h index 7617367436..990f137287 100644 --- a/riscv/mmu.h +++ b/riscv/mmu.h @@ -10,6 +10,7 @@ #include "simif.h" #include "processor.h" #include "memtracer.h" +#include "byteorder.h" #include #include @@ -17,6 +18,7 @@ #define PGSHIFT 12 const reg_t PGSIZE = 1 << PGSHIFT; const reg_t PGMASK = ~(PGSIZE-1); +#define MAX_PADDR_BITS 56 // imposed by Sv39 / Sv48 struct insn_fetch_t { @@ -64,7 +66,7 @@ class mmu_t res += (reg_t)load_uint8(addr + i) << (i * 8); return res; #else - throw trap_load_address_misaligned(addr); + throw trap_load_address_misaligned(addr, 0, 0); #endif } @@ -74,80 +76,131 @@ class mmu_t for (size_t i = 0; i < size; i++) store_uint8(addr + i, data >> (i * 8)); #else - throw trap_store_address_misaligned(addr); + throw trap_store_address_misaligned(addr, 0, 0); #endif } +#ifndef RISCV_ENABLE_COMMITLOG +# define READ_MEM(addr, size) ({}) +#else +# define READ_MEM(addr, size) \ + proc->state.log_mem_read.push_back(std::make_tuple(addr, 0, size)); +#endif + +#define RISCV_XLATE_VIRT (1U << 0) +#define RISCV_XLATE_VIRT_MXR (1U << 1) + // template for functions that load an aligned value from memory - #define load_func(type) \ - inline type##_t load_##type(reg_t addr) { \ + #define load_func(type, prefix, xlate_flags) \ + inline type##_t prefix##_##type(reg_t addr) { \ + if (xlate_flags) \ + flush_tlb(); \ if (unlikely(addr & (sizeof(type##_t)-1))) \ return misaligned_load(addr, sizeof(type##_t)); \ reg_t vpn = addr >> PGSHIFT; \ - if (likely(tlb_load_tag[vpn % TLB_ENTRIES] == vpn)) \ - return *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr); \ + size_t size = sizeof(type##_t); \ + if (likely(tlb_load_tag[vpn % TLB_ENTRIES] == vpn)) { \ + if (proc) READ_MEM(addr, size); \ + return from_le(*(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr)); \ + } \ if (unlikely(tlb_load_tag[vpn % TLB_ENTRIES] == (vpn | TLB_CHECK_TRIGGERS))) { \ - type##_t data = *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr); \ + type##_t data = from_le(*(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr)); \ if (!matched_trigger) { \ matched_trigger = trigger_exception(OPERATION_LOAD, addr, data); \ if (matched_trigger) \ throw *matched_trigger; \ } \ + if (proc) READ_MEM(addr, size); \ return data; \ } \ type##_t res; \ - load_slow_path(addr, sizeof(type##_t), (uint8_t*)&res); \ - return res; \ + load_slow_path(addr, sizeof(type##_t), (uint8_t*)&res, (xlate_flags)); \ + if (proc) READ_MEM(addr, size); \ + if (xlate_flags) \ + flush_tlb(); \ + return from_le(res); \ } // load value from memory at aligned address; zero extend to register width - load_func(uint8) - load_func(uint16) - load_func(uint32) - load_func(uint64) + load_func(uint8, load, 0) + load_func(uint16, load, 0) + load_func(uint32, load, 0) + load_func(uint64, load, 0) + + // load value from guest memory at aligned address; zero extend to register width + load_func(uint8, guest_load, RISCV_XLATE_VIRT) + load_func(uint16, guest_load, RISCV_XLATE_VIRT) + load_func(uint32, guest_load, RISCV_XLATE_VIRT) + load_func(uint64, guest_load, RISCV_XLATE_VIRT) + load_func(uint16, guest_load_x, RISCV_XLATE_VIRT|RISCV_XLATE_VIRT_MXR) + load_func(uint32, guest_load_x, RISCV_XLATE_VIRT|RISCV_XLATE_VIRT_MXR) // load value from memory at aligned address; sign extend to register width - load_func(int8) - load_func(int16) - load_func(int32) - load_func(int64) + load_func(int8, load, 0) + load_func(int16, load, 0) + load_func(int32, load, 0) + load_func(int64, load, 0) + + // load value from guest memory at aligned address; sign extend to register width + load_func(int8, guest_load, RISCV_XLATE_VIRT) + load_func(int16, guest_load, RISCV_XLATE_VIRT) + load_func(int32, guest_load, RISCV_XLATE_VIRT) + load_func(int64, guest_load, RISCV_XLATE_VIRT) + +#ifndef RISCV_ENABLE_COMMITLOG +# define WRITE_MEM(addr, value, size) ({}) +#else +# define WRITE_MEM(addr, val, size) \ + proc->state.log_mem_write.push_back(std::make_tuple(addr, val, size)); +#endif // template for functions that store an aligned value to memory - #define store_func(type) \ - void store_##type(reg_t addr, type##_t val) { \ + #define store_func(type, prefix, xlate_flags) \ + void prefix##_##type(reg_t addr, type##_t val) { \ + if (xlate_flags) \ + flush_tlb(); \ if (unlikely(addr & (sizeof(type##_t)-1))) \ return misaligned_store(addr, val, sizeof(type##_t)); \ reg_t vpn = addr >> PGSHIFT; \ - if (likely(tlb_store_tag[vpn % TLB_ENTRIES] == vpn)) \ - *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = val; \ + size_t size = sizeof(type##_t); \ + if (likely(tlb_store_tag[vpn % TLB_ENTRIES] == vpn)) { \ + if (proc) WRITE_MEM(addr, val, size); \ + *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = to_le(val); \ + } \ else if (unlikely(tlb_store_tag[vpn % TLB_ENTRIES] == (vpn | TLB_CHECK_TRIGGERS))) { \ if (!matched_trigger) { \ matched_trigger = trigger_exception(OPERATION_STORE, addr, val); \ if (matched_trigger) \ throw *matched_trigger; \ } \ - *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = val; \ + if (proc) WRITE_MEM(addr, val, size); \ + *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = to_le(val); \ } \ - else \ - store_slow_path(addr, sizeof(type##_t), (const uint8_t*)&val); \ - } + else { \ + type##_t le_val = to_le(val); \ + store_slow_path(addr, sizeof(type##_t), (const uint8_t*)&le_val, (xlate_flags)); \ + if (proc) WRITE_MEM(addr, val, size); \ + } \ + if (xlate_flags) \ + flush_tlb(); \ + } // template for functions that perform an atomic memory operation #define amo_func(type) \ template \ type##_t amo_##type(reg_t addr, op f) { \ if (addr & (sizeof(type##_t)-1)) \ - throw trap_store_address_misaligned(addr); \ + throw trap_store_address_misaligned(addr, 0, 0); \ try { \ auto lhs = load_##type(addr); \ store_##type(addr, f(lhs)); \ return lhs; \ } catch (trap_load_page_fault& t) { \ /* AMO faults should be reported as store faults */ \ - throw trap_store_page_fault(t.get_tval()); \ + throw trap_store_page_fault(t.get_tval(), t.get_tval2(), t.get_tinst()); \ } catch (trap_load_access_fault& t) { \ /* AMO faults should be reported as store faults */ \ - throw trap_store_access_fault(t.get_tval()); \ + throw trap_store_access_fault(t.get_tval(), t.get_tval2(), t.get_tinst()); \ } \ } @@ -155,7 +208,7 @@ class mmu_t { #ifndef RISCV_ENABLE_MISALIGNED if (unlikely(addr & (sizeof(float128_t)-1))) - throw trap_store_address_misaligned(addr); + throw trap_store_address_misaligned(addr, 0, 0); #endif store_uint64(addr, val.v[0]); store_uint64(addr + 8, val.v[1]); @@ -165,16 +218,22 @@ class mmu_t { #ifndef RISCV_ENABLE_MISALIGNED if (unlikely(addr & (sizeof(float128_t)-1))) - throw trap_load_address_misaligned(addr); + throw trap_load_address_misaligned(addr, 0, 0); #endif return (float128_t){load_uint64(addr), load_uint64(addr + 8)}; } // store value to memory at aligned address - store_func(uint8) - store_func(uint16) - store_func(uint32) - store_func(uint64) + store_func(uint8, store, 0) + store_func(uint16, store, 0) + store_func(uint32, store, 0) + store_func(uint64, store, 0) + + // store value to guest memory at aligned address + store_func(uint8, guest_store, RISCV_XLATE_VIRT) + store_func(uint16, guest_store, RISCV_XLATE_VIRT) + store_func(uint32, guest_store, RISCV_XLATE_VIRT) + store_func(uint64, guest_store, RISCV_XLATE_VIRT) // perform an atomic memory operation at an aligned address amo_func(uint32) @@ -187,20 +246,23 @@ class mmu_t inline void acquire_load_reservation(reg_t vaddr) { - reg_t paddr = translate(vaddr, 1, LOAD); + reg_t paddr = translate(vaddr, 1, LOAD, 0); if (auto host_addr = sim->addr_to_mem(paddr)) load_reservation_address = refill_tlb(vaddr, paddr, host_addr, LOAD).target_offset + vaddr; else - throw trap_load_access_fault(vaddr); // disallow LR to I/O space + throw trap_load_access_fault(vaddr, 0, 0); // disallow LR to I/O space } - inline bool check_load_reservation(reg_t vaddr) + inline bool check_load_reservation(reg_t vaddr, size_t size) { - reg_t paddr = translate(vaddr, 1, STORE); + if (vaddr & (size-1)) + throw trap_store_address_misaligned(vaddr, 0, 0); + + reg_t paddr = translate(vaddr, 1, STORE, 0); if (auto host_addr = sim->addr_to_mem(paddr)) return load_reservation_address == refill_tlb(vaddr, paddr, host_addr, STORE).target_offset + vaddr; else - throw trap_store_access_fault(vaddr); // disallow SC to I/O space + throw trap_store_access_fault(vaddr, 0, 0); // disallow SC to I/O space } static const reg_t ICACHE_ENTRIES = 1024; @@ -213,21 +275,21 @@ class mmu_t inline icache_entry_t* refill_icache(reg_t addr, icache_entry_t* entry) { auto tlb_entry = translate_insn_addr(addr); - insn_bits_t insn = *(uint16_t*)(tlb_entry.host_offset + addr); + insn_bits_t insn = from_le(*(uint16_t*)(tlb_entry.host_offset + addr)); int length = insn_length(insn); if (likely(length == 4)) { - insn |= (insn_bits_t)*(const int16_t*)translate_insn_addr_to_host(addr + 2) << 16; + insn |= (insn_bits_t)from_le(*(const int16_t*)translate_insn_addr_to_host(addr + 2)) << 16; } else if (length == 2) { insn = (int16_t)insn; } else if (length == 6) { - insn |= (insn_bits_t)*(const int16_t*)translate_insn_addr_to_host(addr + 4) << 32; - insn |= (insn_bits_t)*(const uint16_t*)translate_insn_addr_to_host(addr + 2) << 16; + insn |= (insn_bits_t)from_le(*(const int16_t*)translate_insn_addr_to_host(addr + 4)) << 32; + insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 2)) << 16; } else { static_assert(sizeof(insn_bits_t) == 8, "insn_bits_t must be uint64_t"); - insn |= (insn_bits_t)*(const int16_t*)translate_insn_addr_to_host(addr + 6) << 48; - insn |= (insn_bits_t)*(const uint16_t*)translate_insn_addr_to_host(addr + 4) << 32; - insn |= (insn_bits_t)*(const uint16_t*)translate_insn_addr_to_host(addr + 2) << 16; + insn |= (insn_bits_t)from_le(*(const int16_t*)translate_insn_addr_to_host(addr + 6)) << 48; + insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 4)) << 32; + insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 2)) << 16; } insn_fetch_t fetch = {proc->decode_insn(insn), insn}; @@ -304,14 +366,20 @@ class mmu_t tlb_entry_t refill_tlb(reg_t vaddr, reg_t paddr, char* host_addr, access_type type); const char* fill_from_mmio(reg_t vaddr, reg_t paddr); + // perform a stage2 translation for a given guest address + reg_t s2xlate(reg_t gva, reg_t gpa, access_type type, bool virt, bool mxr); + // perform a page table walk for a given VA; set referenced/dirty bits - reg_t walk(reg_t addr, access_type type, reg_t prv); + reg_t walk(reg_t addr, access_type type, reg_t prv, bool virt, bool mxr); // handle uncommon cases: TLB misses, page faults, MMIO tlb_entry_t fetch_slow_path(reg_t addr); - void load_slow_path(reg_t addr, reg_t len, uint8_t* bytes); - void store_slow_path(reg_t addr, reg_t len, const uint8_t* bytes); - reg_t translate(reg_t addr, reg_t len, access_type type); + void load_slow_path(reg_t addr, reg_t len, uint8_t* bytes, uint32_t xlate_flags); + void store_slow_path(reg_t addr, reg_t len, const uint8_t* bytes, uint32_t xlate_flags); + bool mmio_load(reg_t addr, size_t len, uint8_t* bytes); + bool mmio_store(reg_t addr, size_t len, const uint8_t* bytes); + bool mmio_ok(reg_t addr, access_type type); + reg_t translate(reg_t addr, reg_t len, access_type type, uint32_t xlate_flags); // ITLB lookup inline tlb_entry_t translate_insn_addr(reg_t addr) { @@ -326,9 +394,9 @@ class mmu_t } if (unlikely(tlb_insn_tag[vpn % TLB_ENTRIES] == (vpn | TLB_CHECK_TRIGGERS))) { uint16_t* ptr = (uint16_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr); - int match = proc->trigger_match(OPERATION_EXECUTE, addr, *ptr); + int match = proc->trigger_match(OPERATION_EXECUTE, addr, from_le(*ptr)); if (match >= 0) { - throw trigger_matched_t(match, OPERATION_EXECUTE, addr, *ptr); + throw trigger_matched_t(match, OPERATION_EXECUTE, addr, from_le(*ptr)); } } return result; @@ -354,7 +422,7 @@ class mmu_t } reg_t pmp_homogeneous(reg_t addr, reg_t len); - reg_t pmp_ok(reg_t addr, access_type type, reg_t mode); + reg_t pmp_ok(reg_t addr, reg_t len, access_type type, reg_t mode); bool check_triggers_fetch; bool check_triggers_load; @@ -368,27 +436,41 @@ class mmu_t struct vm_info { int levels; int idxbits; + int widenbits; int ptesize; reg_t ptbase; }; -inline vm_info decode_vm_info(int xlen, reg_t prv, reg_t satp) +inline vm_info decode_vm_info(int xlen, bool stage2, reg_t prv, reg_t satp) { if (prv == PRV_M) { - return {0, 0, 0, 0}; - } else if (prv <= PRV_S && xlen == 32) { + return {0, 0, 0, 0, 0}; + } else if (!stage2 && prv <= PRV_S && xlen == 32) { switch (get_field(satp, SATP32_MODE)) { - case SATP_MODE_OFF: return {0, 0, 0, 0}; - case SATP_MODE_SV32: return {2, 10, 4, (satp & SATP32_PPN) << PGSHIFT}; + case SATP_MODE_OFF: return {0, 0, 0, 0, 0}; + case SATP_MODE_SV32: return {2, 10, 0, 4, (satp & SATP32_PPN) << PGSHIFT}; default: abort(); } - } else if (prv <= PRV_S && xlen == 64) { + } else if (!stage2 && prv <= PRV_S && xlen == 64) { switch (get_field(satp, SATP64_MODE)) { - case SATP_MODE_OFF: return {0, 0, 0, 0}; - case SATP_MODE_SV39: return {3, 9, 8, (satp & SATP64_PPN) << PGSHIFT}; - case SATP_MODE_SV48: return {4, 9, 8, (satp & SATP64_PPN) << PGSHIFT}; - case SATP_MODE_SV57: return {5, 9, 8, (satp & SATP64_PPN) << PGSHIFT}; - case SATP_MODE_SV64: return {6, 9, 8, (satp & SATP64_PPN) << PGSHIFT}; + case SATP_MODE_OFF: return {0, 0, 0, 0, 0}; + case SATP_MODE_SV39: return {3, 9, 0, 8, (satp & SATP64_PPN) << PGSHIFT}; + case SATP_MODE_SV48: return {4, 9, 0, 8, (satp & SATP64_PPN) << PGSHIFT}; + case SATP_MODE_SV57: return {5, 9, 0, 8, (satp & SATP64_PPN) << PGSHIFT}; + case SATP_MODE_SV64: return {6, 9, 0, 8, (satp & SATP64_PPN) << PGSHIFT}; + default: abort(); + } + } else if (stage2 && xlen == 32) { + switch (get_field(satp, HGATP32_MODE)) { + case HGATP_MODE_OFF: return {0, 0, 0, 0, 0}; + case HGATP_MODE_SV32X4: return {2, 10, 2, 4, (satp & HGATP32_PPN) << PGSHIFT}; + default: abort(); + } + } else if (stage2 && xlen == 64) { + switch (get_field(satp, HGATP64_MODE)) { + case HGATP_MODE_OFF: return {0, 0, 0, 0, 0}; + case HGATP_MODE_SV39X4: return {3, 9, 2, 8, (satp & HGATP64_PPN) << PGSHIFT}; + case HGATP_MODE_SV48X4: return {4, 9, 2, 8, (satp & HGATP64_PPN) << PGSHIFT}; default: abort(); } } else { diff --git a/riscv/mulhi.h b/riscv/mulhi.h deleted file mode 100644 index bb4a484a6d..0000000000 --- a/riscv/mulhi.h +++ /dev/null @@ -1,43 +0,0 @@ -// See LICENSE for license details. - -#ifndef _RISCV_MULHI_H -#define _RISCV_MULHI_H - -#include - -inline uint64_t mulhu(uint64_t a, uint64_t b) -{ - uint64_t t; - uint32_t y1, y2, y3; - uint64_t a0 = (uint32_t)a, a1 = a >> 32; - uint64_t b0 = (uint32_t)b, b1 = b >> 32; - - t = a1*b0 + ((a0*b0) >> 32); - y1 = t; - y2 = t >> 32; - - t = a0*b1 + y1; - y1 = t; - - t = a1*b1 + y2 + (t >> 32); - y2 = t; - y3 = t >> 32; - - return ((uint64_t)y3 << 32) | y2; -} - -inline int64_t mulh(int64_t a, int64_t b) -{ - int negate = (a < 0) != (b < 0); - uint64_t res = mulhu(a < 0 ? -a : a, b < 0 ? -b : b); - return negate ? ~res + (a * b == 0) : res; -} - -inline int64_t mulhsu(int64_t a, uint64_t b) -{ - int negate = a < 0; - uint64_t res = mulhu(a < 0 ? -a : a, b); - return negate ? ~res + (a * b == 0) : res; -} - -#endif diff --git a/riscv/opcodes.h b/riscv/opcodes.h index 34c089ebb7..065934a238 100644 --- a/riscv/opcodes.h +++ b/riscv/opcodes.h @@ -125,6 +125,11 @@ static uint32_t csrr(unsigned int rd, unsigned int csr) { return (csr << 20) | (rd << 7) | MATCH_CSRRS; } +static uint32_t csrrs(unsigned int rd, unsigned int rs1, unsigned int csr) __attribute__ ((unused)); +static uint32_t csrrs(unsigned int rd, unsigned int rs1, unsigned int csr) { + return (csr << 20) | (rs1 << 15) | (rd << 7) | MATCH_CSRRS; +} + static uint32_t fsw(unsigned int src, unsigned int base, uint16_t offset) __attribute__ ((unused)); static uint32_t fsw(unsigned int src, unsigned int base, uint16_t offset) { @@ -177,7 +182,6 @@ static uint32_t fence_i(void) return MATCH_FENCE_I; } -/* static uint32_t lui(unsigned int dest, uint32_t imm) __attribute__ ((unused)); static uint32_t lui(unsigned int dest, uint32_t imm) { @@ -186,6 +190,7 @@ static uint32_t lui(unsigned int dest, uint32_t imm) MATCH_LUI; } +/* static uint32_t csrci(unsigned int csr, uint16_t imm) __attribute__ ((unused)); static uint32_t csrci(unsigned int csr, uint16_t imm) { return (csr << 20) | diff --git a/riscv/processor.cc b/riscv/processor.cc index 58bca7efb2..84be372a9a 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -1,11 +1,11 @@ // See LICENSE for license details. +#include "arith.h" #include "processor.h" #include "extension.h" #include "common.h" #include "config.h" #include "simif.h" -#include "ust_tracer.h" #include "mmu.h" #include "disasm.h" #include @@ -15,19 +15,27 @@ #include #include #include +#include #include #undef STATE #define STATE state -processor_t::processor_t(const char* isa, simif_t* sim, uint32_t id, - bool halt_on_reset) - : debug(false), trace(false), halt_request(false), sim(sim), ext(NULL), id(id), - halt_on_reset(halt_on_reset), last_pc(1), executions(1) +processor_t::processor_t(const char* isa, const char* priv, const char* varch, + simif_t* sim, uint32_t id, bool halt_on_reset, + FILE* log_file) + : debug(false), halt_request(HR_NONE), sim(sim), ext(NULL), id(id), xlen(0), + histogram_enabled(false), log_commits_enabled(false), + log_file(log_file), halt_on_reset(halt_on_reset), + extension_table(256, false), last_pc(1), executions(1) { + VU.p = this; + parse_isa_string(isa); - register_base_instructions(); + parse_priv_string(priv); + parse_varch_string(varch); + register_base_instructions(); mmu = new mmu_t(sim, this); disassembler = new disassembler_t(max_xlen); @@ -35,8 +43,8 @@ processor_t::processor_t(const char* isa, simif_t* sim, uint32_t id, for (auto disasm_insn : ext->get_disasms()) disassembler->add_insn(disasm_insn); - xlen = 0; - + set_pmp_granularity(1 << PMP_SHIFT); + set_pmp_num(state.max_pmp); reset(); } @@ -55,26 +63,160 @@ processor_t::~processor_t() delete disassembler; } -static void bad_isa_string(const char* isa) +static void bad_option_string(const char *option, const char *value, + const char *msg) { - fprintf(stderr, "error: bad --isa option %s\n", isa); + fprintf(stderr, "error: bad %s option '%s'. %s\n", option, value, msg); abort(); } -void processor_t::parse_isa_string(const char* str) +static void bad_isa_string(const char* isa, const char* msg) +{ + bad_option_string("--isa", isa, msg); +} + +static void bad_priv_string(const char* priv) +{ + fprintf(stderr, "error: bad --priv option %s\n", priv); + abort(); +} + +static void bad_varch_string(const char* varch, const char *msg) +{ + bad_option_string("--varch", varch, msg); +} + +static std::string get_string_token(std::string str, const char delimiter, size_t& pos) +{ + size_t _pos = pos; + while (pos < str.length() && str[pos] != delimiter) ++pos; + return str.substr(_pos, pos - _pos); +} + +static int get_int_token(std::string str, const char delimiter, size_t& pos) +{ + size_t _pos = pos; + while (pos < str.length() && str[pos] != delimiter) { + if (!isdigit(str[pos])) + bad_varch_string(str.c_str(), "Unsupported value"); // An integer is expected + ++pos; + } + return (pos == _pos) ? 0 : stoi(str.substr(_pos, pos - _pos)); +} + +static bool check_pow2(int val) +{ + return ((val & (val - 1))) == 0; +} + +void processor_t::parse_varch_string(const char* s) { - std::string lowercase, tmp; + std::string str, tmp; + for (const char *r = s; *r; r++) + str += std::tolower(*r); + + size_t pos = 0; + size_t len = str.length(); + int vlen = 0; + int elen = 0; + int slen = 0; + int vstart_alu = 1; + + while (pos < len) { + std::string attr = get_string_token(str, ':', pos); + + ++pos; + + if (attr == "vlen") + vlen = get_int_token(str, ',', pos); + else if (attr == "slen") + slen = get_int_token(str, ',', pos); + else if (attr == "elen") + elen = get_int_token(str, ',', pos); + else if (attr == "vstartalu") + vstart_alu = get_int_token(str, ',', pos); + else + bad_varch_string(s, "Unsupported token"); + + ++pos; + } + + // The integer should be the power of 2 + if (!check_pow2(vlen) || !check_pow2(elen) || !check_pow2(slen)){ + bad_varch_string(s, "The integer value should be the power of 2"); + } + + if (slen == 0) + slen = vlen; + + /* Vector spec requirements. */ + if (vlen < elen) + bad_varch_string(s, "vlen must be >= elen"); + if ((unsigned) elen < std::max(max_xlen, get_flen())) + bad_varch_string(s, "elen must be >= max(xlen, flen)"); + if (vlen != slen) + bad_varch_string(s, "vlen must be == slen for current limitation"); + + /* spike requirements. */ + if (vlen > 4096) + bad_varch_string(s, "vlen must be <= 4096"); + + VU.VLEN = vlen; + VU.ELEN = elen; + VU.vlenb = vlen / 8; + VU.vstart_alu = vstart_alu; +} + +static std::string strtolower(const char* str) +{ + std::string res; for (const char *r = str; *r; r++) - lowercase += std::tolower(*r); + res += std::tolower(*r); + return res; +} + +void processor_t::parse_priv_string(const char* str) +{ + std::string lowercase = strtolower(str); + bool user = false, supervisor = false; + + if (lowercase == "m") + ; + else if (lowercase == "mu") + user = true; + else if (lowercase == "msu") + user = supervisor = true; + else + bad_priv_string(str); + + if (user) { + max_isa |= reg_t(user) << ('u' - 'a'); + extension_table['U'] = true; + } + + if (supervisor) { + max_isa |= reg_t(supervisor) << ('s' - 'a'); + extension_table['S'] = true; + } +} + +void processor_t::parse_isa_string(const char* str) +{ + std::string lowercase = strtolower(str), tmp; + char error_msg[256]; const char* p = lowercase.c_str(); - const char* all_subsets = "imafdqc"; + const char* all_subsets = "imafdqch" +#ifdef __SIZEOF_INT128__ + "v" +#endif + ""; max_xlen = 64; - state.misa = reg_t(2) << 62; + max_isa = reg_t(2) << 62; if (strncmp(p, "rv32", 4) == 0) - max_xlen = 32, state.misa = reg_t(1) << 30, p += 4; + max_xlen = 32, max_isa = reg_t(1) << 30, p += 4; else if (strncmp(p, "rv64", 4) == 0) p += 4; else if (strncmp(p, "rv", 2) == 0) @@ -85,57 +227,196 @@ void processor_t::parse_isa_string(const char* str) } else if (*p == 'g') { // treat "G" as "IMAFD" tmp = std::string("imafd") + (p+1); p = &tmp[0]; - } else if (*p != 'i') { - bad_isa_string(str); } isa_string = "rv" + std::to_string(max_xlen) + p; - state.misa |= 1L << ('s' - 'a'); // advertise support for supervisor mode - state.misa |= 1L << ('u' - 'a'); // advertise support for user mode while (*p) { - state.misa |= 1L << (*p - 'a'); + if (islower(*p)) { + max_isa |= 1L << (*p - 'a'); + extension_table[toupper(*p)] = true; + + if (strchr(all_subsets, *p)) { + p++; + } else if (*p == 'x') { + const char* ext = p + 1, *end = ext; + while (islower(*end) || *end == '_') + end++; + + auto ext_str = std::string(ext, end - ext); + if (ext_str != "dummy") + register_extension(find_extension(ext_str.c_str())()); + + p = end; + } else { + sprintf(error_msg, "unsupported extension '%c'", *p); + bad_isa_string(str, error_msg); + } + } else if (*p == '_') { + const char* ext = p + 1, *end = ext; + if (*ext == 'x') { + p++; + continue; + } - if (auto next = strchr(all_subsets, *p)) { - all_subsets = next + 1; - p++; - } else if (*p == 'x') { - const char* ext = p+1, *end = ext; while (islower(*end)) end++; - register_extension(find_extension(std::string(ext, end - ext).c_str())()); + + auto ext_str = std::string(ext, end - ext); + if (ext_str == "zfh") { + extension_table[EXT_ZFH] = true; + } else { + sprintf(error_msg, "unsupported extension '%s'", ext_str.c_str()); + bad_isa_string(str, error_msg); + } + p = end; } else { - bad_isa_string(str); + sprintf(error_msg, "can't parse '%c(%d)'", *p, *p); + bad_isa_string(str, error_msg); } } - if (supports_extension('D') && !supports_extension('F')) - bad_isa_string(str); + state.misa = max_isa; - if (supports_extension('Q') && !supports_extension('D')) - bad_isa_string(str); + if (!supports_extension('I')) + bad_isa_string(str, "'I' extension is required"); + + if (supports_extension(EXT_ZFH) && !supports_extension('F')) + bad_isa_string(str, "'Zfh' extension requires 'F'"); - if (supports_extension('Q') && max_xlen < 64) - bad_isa_string(str); + if (supports_extension('D') && !supports_extension('F')) + bad_isa_string(str, "'D' extension requires 'F'"); - max_isa = state.misa; + if (supports_extension('Q') && !supports_extension('D')) + bad_isa_string(str, "'Q' extension requires 'D'"); } void state_t::reset(reg_t max_isa) { - memset(this, 0, sizeof(*this)); - misa = max_isa; - prv = PRV_M; pc = DEFAULT_RSTVEC; + XPR.reset(); + FPR.reset(); + + prv = PRV_M; + v = false; + misa = max_isa; mstatus = 0; - dcsr.cause = 0; + mepc = 0; + mtval = 0; + mscratch = 0; + mtvec = 0; + mcause = 0; + minstret = 0; + mie = 0; + mip = 0; + medeleg = 0; + mideleg = 0; + mcounteren = 0; + scounteren = 0; + sepc = 0; + stval = 0; + sscratch = 0; + stvec = 0; + satp = 0; + scause = 0; + mtval2 = 0; + mtinst = 0; + hstatus = 0; + hideleg = 0; + hedeleg = 0; + hcounteren = 0; + htval = 0; + htinst = 0; + hgatp = 0; + vsstatus = 0; + vstvec = 0; + vsscratch = 0; + vsepc = 0; + vscause = 0; + vstval = 0; + vsatp = 0; + + dpc = 0; + dscratch0 = 0; + dscratch1 = 0; + memset(&this->dcsr, 0, sizeof(this->dcsr)); + tselect = 0; - for (unsigned int i = 0; i < num_triggers; i++) - mcontrol[i].type = 2; + memset(this->mcontrol, 0, sizeof(this->mcontrol)); + for (auto &item : mcontrol) + item.type = 2; + + memset(this->tdata2, 0, sizeof(this->tdata2)); + debug_mode = false; + single_step = STEP_NONE; + + memset(this->pmpcfg, 0, sizeof(this->pmpcfg)); + memset(this->pmpaddr, 0, sizeof(this->pmpaddr)); + + fflags = 0; + frm = 0; + serialized = false; + +#ifdef RISCV_ENABLE_COMMITLOG + log_reg_write.clear(); + log_mem_read.clear(); + log_mem_write.clear(); + last_inst_priv = 0; + last_inst_xlen = 0; + last_inst_flen = 0; +#endif +} - pmpcfg[0] = PMP_R | PMP_W | PMP_X | PMP_NAPOT; - pmpaddr[0] = ~reg_t(0); +void processor_t::vectorUnit_t::reset(){ + free(reg_file); + VLEN = get_vlen(); + ELEN = get_elen(); + reg_file = malloc(NVPR * vlenb); + memset(reg_file, 0, NVPR * vlenb); + + vtype = 0; + set_vl(0, 0, 0, -1); // default to illegal configuration +} + +reg_t processor_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t newType){ + int new_vlmul = 0; + if (vtype != newType){ + vtype = newType; + vsew = 1 << (extract64(newType, 3, 3) + 3); + new_vlmul = int8_t(extract64(newType, 0, 3) << 5) >> 5; + vflmul = new_vlmul >= 0 ? 1 << new_vlmul : 1.0 / (1 << -new_vlmul); + vlmax = (VLEN/vsew) * vflmul; + vta = extract64(newType, 6, 1); + vma = extract64(newType, 7, 1); + vediv = 1 << extract64(newType, 8, 2); + + vill = !(vflmul >= 0.125 && vflmul <= 8) + || vsew > ELEN + || vflmul < ((float)vsew / ELEN) + || vediv != 1 + || (newType >> 8) != 0; + + if (vill) { + vlmax = 0; + vtype = UINT64_MAX << (p->get_xlen() - 1); + } + } + + // set vl + if (vlmax == 0) { + vl = 0; + } else if (rd == 0 && rs1 == 0) { + vl = vl > vlmax ? vlmax : vl; + } else if (rd != 0 && rs1 == 0) { + vl = vlmax; + } else if (rs1 != 0) { + vl = reqVL > vlmax ? vlmax : reqVL; + } + + vstart = 0; + setvl_count++; + return vl; } void processor_t::set_debug(bool value) @@ -145,30 +426,42 @@ void processor_t::set_debug(bool value) ext->set_debug(value); } -void processor_t::set_trace(bool value) -{ - trace = value; - if (ext) - ext->set_trace(value); -} - void processor_t::set_histogram(bool value) { histogram_enabled = value; #ifndef RISCV_ENABLE_HISTOGRAM if (value) { fprintf(stderr, "PC Histogram support has not been properly enabled;"); - fprintf(stderr, " please re-build the riscv-isa-run project using \"configure --enable-histogram\".\n"); + fprintf(stderr, " please re-build the riscv-isa-sim project using \"configure --enable-histogram\".\n"); + abort(); } #endif } +#ifdef RISCV_ENABLE_COMMITLOG +void processor_t::enable_log_commits() +{ + log_commits_enabled = true; +} +#endif + void processor_t::reset() { state.reset(max_isa); + + state.mideleg = supports_extension('H') ? MIDELEG_FORCED_MASK : 0; + state.dcsr.halt = halt_on_reset; halt_on_reset = false; set_csr(CSR_MSTATUS, state.mstatus); + VU.reset(); + + if (n_pmp > 0) { + // For backwards compatibility with software that is unaware of PMP, + // initialize PMP to permit unprivileged access to all of memory. + set_csr(CSR_PMPADDR0, ~reg_t(0)); + set_csr(CSR_PMPCFG0, PMP_R | PMP_W | PMP_X | PMP_NAPOT); + } if (ext) ext->reset(); // reset the extension @@ -187,31 +480,79 @@ static int ctz(reg_t val) return res; } +void processor_t::set_pmp_num(reg_t n) +{ + // check the number of pmp is in a reasonable range + if (n > state.max_pmp) { + fprintf(stderr, "error: bad number of pmp regions: '%ld' from the dtb\n", (unsigned long)n); + abort(); + } + n_pmp = n; +} + +void processor_t::set_pmp_granularity(reg_t gran) { + // check the pmp granularity is set from dtb(!=0) and is power of 2 + if (gran < (1 << PMP_SHIFT) || (gran & (gran - 1)) != 0) { + fprintf(stderr, "error: bad pmp granularity '%ld' from the dtb\n", (unsigned long)gran); + abort(); + } + + lg_pmp_granularity = ctz(gran); +} + void processor_t::take_interrupt(reg_t pending_interrupts) { - reg_t mie = get_field(state.mstatus, MSTATUS_MIE); - reg_t m_enabled = state.prv < PRV_M || (state.prv == PRV_M && mie); - reg_t enabled_interrupts = pending_interrupts & ~state.mideleg & -m_enabled; + reg_t enabled_interrupts, deleg, status, mie, m_enabled; + reg_t hsie, hs_enabled, vsie, vs_enabled; - reg_t sie = get_field(state.mstatus, MSTATUS_SIE); - reg_t s_enabled = state.prv < PRV_S || (state.prv == PRV_S && sie); - // M-ints have highest priority; consider S-ints only if no M-ints pending - if (enabled_interrupts == 0) - enabled_interrupts = pending_interrupts & state.mideleg & -s_enabled; + // Do nothing if no pending interrupts + if (!pending_interrupts) { + return; + } + + // M-ints have higher priority over HS-ints and VS-ints + mie = get_field(state.mstatus, MSTATUS_MIE); + m_enabled = state.prv < PRV_M || (state.prv == PRV_M && mie); + enabled_interrupts = pending_interrupts & ~state.mideleg & -m_enabled; + if (enabled_interrupts == 0) { + // HS-ints have higher priority over VS-ints + deleg = state.mideleg & ~MIP_VS_MASK; + status = (state.v) ? state.vsstatus : state.mstatus; + hsie = get_field(status, MSTATUS_SIE); + hs_enabled = state.prv < PRV_S || (state.prv == PRV_S && hsie); + enabled_interrupts = pending_interrupts & deleg & -hs_enabled; + if (state.v && enabled_interrupts == 0) { + // VS-ints have least priority and can only be taken with virt enabled + deleg = state.mideleg & state.hideleg; + vsie = get_field(state.mstatus, MSTATUS_SIE); + vs_enabled = state.prv < PRV_S || (state.prv == PRV_S && vsie); + enabled_interrupts = pending_interrupts & deleg & -vs_enabled; + } + } - if (state.dcsr.cause == 0 && enabled_interrupts) { + if (!state.debug_mode && enabled_interrupts) { // nonstandard interrupts have highest priority if (enabled_interrupts >> IRQ_M_EXT) enabled_interrupts = enabled_interrupts >> IRQ_M_EXT << IRQ_M_EXT; - // external interrupts have next-highest priority - else if (enabled_interrupts & (MIP_MEIP | MIP_SEIP)) - enabled_interrupts = enabled_interrupts & (MIP_MEIP | MIP_SEIP); - // software interrupts have next-highest priority - else if (enabled_interrupts & (MIP_MSIP | MIP_SSIP)) - enabled_interrupts = enabled_interrupts & (MIP_MSIP | MIP_SSIP); - // timer interrupts have next-highest priority - else if (enabled_interrupts & (MIP_MTIP | MIP_STIP)) - enabled_interrupts = enabled_interrupts & (MIP_MTIP | MIP_STIP); + // standard interrupt priority is MEI, MSI, MTI, SEI, SSI, STI + else if (enabled_interrupts & MIP_MEIP) + enabled_interrupts = MIP_MEIP; + else if (enabled_interrupts & MIP_MSIP) + enabled_interrupts = MIP_MSIP; + else if (enabled_interrupts & MIP_MTIP) + enabled_interrupts = MIP_MTIP; + else if (enabled_interrupts & MIP_SEIP) + enabled_interrupts = MIP_SEIP; + else if (enabled_interrupts & MIP_SSIP) + enabled_interrupts = MIP_SSIP; + else if (enabled_interrupts & MIP_STIP) + enabled_interrupts = MIP_STIP; + else if (enabled_interrupts & MIP_VSEIP) + enabled_interrupts = MIP_VSEIP; + else if (enabled_interrupts & MIP_VSSIP) + enabled_interrupts = MIP_VSSIP; + else if (enabled_interrupts & MIP_VSTIP) + enabled_interrupts = MIP_VSTIP; else abort(); @@ -235,7 +576,7 @@ reg_t processor_t::legalize_privilege(reg_t prv) if (!supports_extension('U')) return PRV_M; - if (prv == PRV_H || !supports_extension('S')) + if ((prv == PRV_HS && !supports_extension('H')) || (prv == PRV_S && !supports_extension('S'))) return PRV_U; return prv; @@ -247,8 +588,52 @@ void processor_t::set_privilege(reg_t prv) state.prv = legalize_privilege(prv); } +void processor_t::set_virt(bool virt) +{ + reg_t tmp, mask; + + if (state.prv == PRV_M) + return; + + if (state.v != virt) { + /* + * Ideally, we should flush TLB here but we don't need it because + * set_virt() is always used in conjucter with set_privilege() and + * set_privilege() will flush TLB unconditionally. + */ + if (state.v and !virt) { + /* + * When transitioning from virt-on (VS/VU) to virt-off (HS/M) + * we should sync Guest/VM FS, VS, and XS state with Host FS, + * VS, and XS state. + */ + if ((state.mstatus & SSTATUS_FS) == SSTATUS_FS) { + state.vsstatus |= SSTATUS_FS; + state.vsstatus |= (xlen == 64 ? SSTATUS64_SD : SSTATUS32_SD); + } + if ((state.mstatus & SSTATUS_VS) == SSTATUS_VS) { + state.vsstatus |= SSTATUS_VS; + state.vsstatus |= (xlen == 64 ? SSTATUS64_SD : SSTATUS32_SD); + } + if ((state.mstatus & SSTATUS_XS) == SSTATUS_XS) { + state.vsstatus |= SSTATUS_XS; + state.vsstatus |= (xlen == 64 ? SSTATUS64_SD : SSTATUS32_SD); + } + } + mask = SSTATUS_VS_MASK; + mask |= (supports_extension('F') ? SSTATUS_FS : 0); + mask |= (supports_extension('V') ? SSTATUS_VS : 0); + mask |= (xlen == 64 ? SSTATUS64_SD : SSTATUS32_SD); + tmp = state.mstatus & mask; + state.mstatus = (state.mstatus & ~mask) | (state.vsstatus & mask); + state.vsstatus = tmp; + state.v = virt; + } +} + void processor_t::enter_debug_mode(uint8_t cause) { + state.debug_mode = true; state.dcsr.cause = cause; state.dcsr.prv = state.prv; set_privilege(PRV_M); @@ -259,21 +644,14 @@ void processor_t::enter_debug_mode(uint8_t cause) void processor_t::take_trap(trap_t& t, reg_t epc) { if (debug) { - fprintf(stderr, "core %3d: exception %s, epc 0x%016" PRIx64 "\n", + fprintf(log_file, "core %3d: exception %s, epc 0x%016" PRIx64 "\n", id, t.name(), epc); if (t.has_tval()) - fprintf(stderr, "core %3d: tval 0x%016" PRIx64 "\n", id, - t.get_tval()); - } - - if (trace) { - ust_set_exception(t.cause()); - if (t.has_tval()) - ust_set_tval(t.get_tval()); - ust_set_interrupt(0); + fprintf(log_file, "core %3d: tval 0x%016" PRIx64 "\n", + id, t.get_tval()); } - if (state.dcsr.cause) { + if (state.debug_mode) { if (t.cause() == CAUSE_BREAKPOINT) { state.pc = DEBUG_ROM_ENTRY; } else { @@ -290,41 +668,72 @@ void processor_t::take_trap(trap_t& t, reg_t epc) return; } - // by default, trap to M-mode, unless delegated to S-mode + // By default, trap to M-mode, unless delegated to HS-mode or VS-mode + reg_t vsdeleg, hsdeleg; reg_t bit = t.cause(); - reg_t deleg = state.medeleg; + bool curr_virt = state.v; bool interrupt = (bit & ((reg_t)1 << (max_xlen-1))) != 0; - - if (trace) { - ust_set_interrupt(interrupt); + if (interrupt) { + vsdeleg = (curr_virt && state.prv <= PRV_S) ? (state.mideleg & state.hideleg) : 0; + hsdeleg = (state.prv <= PRV_S) ? state.mideleg : 0; + bit &= ~((reg_t)1 << (max_xlen-1)); + } else { + vsdeleg = (curr_virt && state.prv <= PRV_S) ? (state.medeleg & state.hedeleg) : 0; + hsdeleg = (state.prv <= PRV_S) ? state.medeleg : 0; } + if (state.prv <= PRV_S && bit < max_xlen && ((vsdeleg >> bit) & 1)) { + // Handle the trap in VS-mode + reg_t vector = (state.vstvec & 1) && interrupt ? 4*bit : 0; + state.pc = (state.vstvec & ~(reg_t)1) + vector; + state.vscause = (interrupt) ? (t.cause() - 1) : t.cause(); + state.vsepc = epc; + state.vstval = t.get_tval(); - if (interrupt) - deleg = state.mideleg, bit &= ~((reg_t)1 << (max_xlen-1)); - if (state.prv <= PRV_S && bit < max_xlen && ((deleg >> bit) & 1)) { - // handle the trap in S-mode - state.pc = state.stvec; + reg_t s = state.mstatus; + s = set_field(s, MSTATUS_SPIE, get_field(s, MSTATUS_SIE)); + s = set_field(s, MSTATUS_SPP, state.prv); + s = set_field(s, MSTATUS_SIE, 0); + set_csr(CSR_MSTATUS, s); + set_privilege(PRV_S); + } else if (state.prv <= PRV_S && bit < max_xlen && ((hsdeleg >> bit) & 1)) { + // Handle the trap in HS-mode + set_virt(false); + reg_t vector = (state.stvec & 1) && interrupt ? 4*bit : 0; + state.pc = (state.stvec & ~(reg_t)1) + vector; state.scause = t.cause(); state.sepc = epc; state.stval = t.get_tval(); + state.htval = t.get_tval2(); + state.htinst = t.get_tinst(); reg_t s = state.mstatus; s = set_field(s, MSTATUS_SPIE, get_field(s, MSTATUS_SIE)); s = set_field(s, MSTATUS_SPP, state.prv); s = set_field(s, MSTATUS_SIE, 0); set_csr(CSR_MSTATUS, s); + s = state.hstatus; + s = set_field(s, HSTATUS_SPVP, state.prv); + s = set_field(s, HSTATUS_SPV, curr_virt); + s = set_field(s, HSTATUS_GVA, t.has_gva()); + set_csr(CSR_HSTATUS, s); set_privilege(PRV_S); } else { + // Handle the trap in M-mode + set_virt(false); reg_t vector = (state.mtvec & 1) && interrupt ? 4*bit : 0; state.pc = (state.mtvec & ~(reg_t)1) + vector; state.mepc = epc; state.mcause = t.cause(); state.mtval = t.get_tval(); + state.mtval2 = t.get_tval2(); + state.mtinst = t.get_tinst(); reg_t s = state.mstatus; s = set_field(s, MSTATUS_MPIE, get_field(s, MSTATUS_MIE)); s = set_field(s, MSTATUS_MPP, state.prv); s = set_field(s, MSTATUS_MIE, 0); + s = set_field(s, MSTATUS_MPV, curr_virt); + s = set_field(s, MSTATUS_GVA, t.has_gva()); set_csr(CSR_MSTATUS, s); set_privilege(PRV_M); } @@ -334,22 +743,21 @@ void processor_t::disasm(insn_t insn) { uint64_t bits = insn.bits() & ((1ULL << (8 * insn_length(insn.bits()))) - 1); if (last_pc != state.pc || last_bits != bits) { - if (debug && executions != 1) { - fprintf(stderr, "core %3d: Executed %" PRIx64 " times\n", id, executions); - } - if (debug) { - fprintf(stderr, "core %3d: 0x%016" PRIx64 " (0x%08" PRIx64 ") %s\n", - id, state.pc, bits, disassembler->disassemble(insn).c_str()); +#ifdef RISCV_ENABLE_COMMITLOG + const char* sym = get_symbol(state.pc); + if (sym != nullptr) + { + fprintf(log_file, "core %3d: >>>> %s\n", id, sym); } +#endif - if (trace) { - ust_step(); - ust_set_addr(state.pc); - ust_set_insn(bits); - ust_set_priv(state.prv); + if (executions != 1) { + fprintf(log_file, "core %3d: Executed %" PRIx64 " times\n", id, executions); } + fprintf(log_file, "core %3d: 0x%016" PRIx64 " (0x%08" PRIx64 ") %s\n", + id, state.pc, bits, disassembler->disassemble(insn).c_str()); last_pc = state.pc; last_bits = bits; executions = 1; @@ -366,26 +774,52 @@ int processor_t::paddr_bits() void processor_t::set_csr(int which, reg_t val) { +#if defined(RISCV_ENABLE_COMMITLOG) +#define LOG_CSR(rd) \ + STATE.log_reg_write[((which) << 4) | 4] = {get_csr(rd), 0}; +#else +#define LOG_CSR(rd) +#endif + val = zext_xlen(val); - reg_t delegable_ints = MIP_SSIP | MIP_STIP | MIP_SEIP - | ((ext != NULL) << IRQ_COP); - reg_t all_ints = delegable_ints | MIP_MSIP | MIP_MTIP; + reg_t supervisor_ints = supports_extension('S') ? MIP_SSIP | MIP_STIP | MIP_SEIP : 0; + reg_t vssip_int = supports_extension('H') ? MIP_VSSIP : 0; + reg_t hypervisor_ints = supports_extension('H') ? MIP_HS_MASK : 0; + reg_t coprocessor_ints = (ext != NULL) << IRQ_COP; + reg_t delegable_ints = supervisor_ints | coprocessor_ints; + reg_t all_ints = delegable_ints | hypervisor_ints | MIP_MSIP | MIP_MTIP | MIP_MEIP; + + if (which >= CSR_PMPADDR0 && which < CSR_PMPADDR0 + state.max_pmp) { + // If no PMPs are configured, disallow access to all. Otherwise, allow + // access to all, but unimplemented ones are hardwired to zero. + if (n_pmp == 0) + return; - if (which >= CSR_PMPADDR0 && which < CSR_PMPADDR0 + state.n_pmp) { size_t i = which - CSR_PMPADDR0; bool locked = state.pmpcfg[i] & PMP_L; - bool next_locked = i+1 < state.n_pmp && (state.pmpcfg[i+1] & PMP_L); - bool next_tor = i+1 < state.n_pmp && (state.pmpcfg[i+1] & PMP_A) == PMP_TOR; - if (!locked && !(next_locked && next_tor)) - state.pmpaddr[i] = val; + bool next_locked = i+1 < state.max_pmp && (state.pmpcfg[i+1] & PMP_L); + bool next_tor = i+1 < state.max_pmp && (state.pmpcfg[i+1] & PMP_A) == PMP_TOR; + if (i < n_pmp && !locked && !(next_locked && next_tor)) { + state.pmpaddr[i] = val & ((reg_t(1) << (MAX_PADDR_BITS - PMP_SHIFT)) - 1); + LOG_CSR(which); + } mmu->flush_tlb(); } - if (which >= CSR_PMPCFG0 && which < CSR_PMPCFG0 + state.n_pmp / 4) { + if (which >= CSR_PMPCFG0 && which < CSR_PMPCFG0 + state.max_pmp / 4) { + if (n_pmp == 0) + return; + for (size_t i0 = (which - CSR_PMPCFG0) * 4, i = i0; i < i0 + xlen / 8; i++) { - if (!(state.pmpcfg[i] & PMP_L)) - state.pmpcfg[i] = (val >> (8 * (i - i0))) & (PMP_R | PMP_W | PMP_X | PMP_A | PMP_L); + if (i < n_pmp && !(state.pmpcfg[i] & PMP_L)) { + uint8_t cfg = (val >> (8 * (i - i0))) & (PMP_R | PMP_W | PMP_X | PMP_A | PMP_L); + cfg &= ~PMP_W | ((cfg & PMP_R) ? PMP_W : 0); // Disallow R=0 W=1 + if (lg_pmp_granularity != PMP_SHIFT && (cfg & PMP_A) == PMP_NA4) + cfg |= PMP_NAPOT; // Disallow A=NA4 when granularity > 4 + state.pmpcfg[i] = cfg; + LOG_CSR(which); + } } mmu->flush_tlb(); } @@ -405,16 +839,30 @@ void processor_t::set_csr(int which, reg_t val) state.fflags = (val & FSR_AEXC) >> FSR_AEXC_SHIFT; state.frm = (val & FSR_RD) >> FSR_RD_SHIFT; break; + case CSR_VCSR: + dirty_vs_state; + VU.vxsat = (val & VCSR_VXSAT) >> VCSR_VXSAT_SHIFT; + VU.vxrm = (val & VCSR_VXRM) >> VCSR_VXRM_SHIFT; + break; case CSR_MSTATUS: { if ((val ^ state.mstatus) & (MSTATUS_MPP | MSTATUS_MPRV | MSTATUS_SUM | MSTATUS_MXR)) mmu->flush_tlb(); - reg_t mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE - | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM - | MSTATUS_MXR | MSTATUS_TW | MSTATUS_TVM - | MSTATUS_TSR | MSTATUS_UXL | MSTATUS_SXL | - (ext ? MSTATUS_XS : 0); + bool has_fs = supports_extension('S') || supports_extension('F') + || supports_extension('V'); + bool has_vs = supports_extension('V'); + bool has_mpv = supports_extension('S') && supports_extension('H'); + bool has_gva = has_mpv; + + reg_t mask = MSTATUS_MIE | MSTATUS_MPIE | MSTATUS_MPRV + | (supports_extension('S') ? (MSTATUS_SUM | MSTATUS_SIE | MSTATUS_SPIE) : 0) + | MSTATUS_MXR | MSTATUS_TW | MSTATUS_TVM | MSTATUS_TSR + | (has_fs ? MSTATUS_FS : 0) + | (has_vs ? MSTATUS_VS : 0) + | (ext ? MSTATUS_XS : 0) + | (has_gva ? MSTATUS_GVA : 0) + | (has_mpv ? MSTATUS_MPV : 0); reg_t requested_mpp = legalize_privilege(get_field(val, MSTATUS_MPP)); state.mstatus = set_field(state.mstatus, MSTATUS_MPP, requested_mpp); @@ -425,20 +873,22 @@ void processor_t::set_csr(int which, reg_t val) bool dirty = (state.mstatus & MSTATUS_FS) == MSTATUS_FS; dirty |= (state.mstatus & MSTATUS_XS) == MSTATUS_XS; + dirty |= (state.mstatus & MSTATUS_VS) == MSTATUS_VS; if (max_xlen == 32) state.mstatus = set_field(state.mstatus, MSTATUS32_SD, dirty); else state.mstatus = set_field(state.mstatus, MSTATUS64_SD, dirty); - state.mstatus = set_field(state.mstatus, MSTATUS_UXL, xlen_to_uxl(max_xlen)); - state.mstatus = set_field(state.mstatus, MSTATUS_UXL, xlen_to_uxl(max_xlen)); - state.mstatus = set_field(state.mstatus, MSTATUS_SXL, xlen_to_uxl(max_xlen)); + if (supports_extension('U')) + state.mstatus = set_field(state.mstatus, MSTATUS_UXL, xlen_to_uxl(max_xlen)); + if (supports_extension('S')) + state.mstatus = set_field(state.mstatus, MSTATUS_SXL, xlen_to_uxl(max_xlen)); // U-XLEN == S-XLEN == M-XLEN xlen = max_xlen; break; } case CSR_MIP: { - reg_t mask = MIP_SSIP | MIP_STIP; + reg_t mask = (supervisor_ints | hypervisor_ints) & (MIP_SSIP | MIP_STIP | vssip_int); state.mip = (state.mip & ~mask) | (val & mask); break; } @@ -453,9 +903,17 @@ void processor_t::set_csr(int which, reg_t val) (1 << CAUSE_MISALIGNED_FETCH) | (1 << CAUSE_BREAKPOINT) | (1 << CAUSE_USER_ECALL) | + (1 << CAUSE_SUPERVISOR_ECALL) | (1 << CAUSE_FETCH_PAGE_FAULT) | (1 << CAUSE_LOAD_PAGE_FAULT) | (1 << CAUSE_STORE_PAGE_FAULT); + mask |= supports_extension('H') ? + (1 << CAUSE_VIRTUAL_SUPERVISOR_ECALL) | + (1 << CAUSE_FETCH_GUEST_PAGE_FAULT) | + (1 << CAUSE_LOAD_GUEST_PAGE_FAULT) | + (1 << CAUSE_VIRTUAL_INSTRUCTION) | + (1 << CAUSE_STORE_GUEST_PAGE_FAULT) + : 0; state.medeleg = (state.medeleg & ~mask) | (val & mask); break; } @@ -484,36 +942,85 @@ void processor_t::set_csr(int which, reg_t val) break; case CSR_SSTATUS: { reg_t mask = SSTATUS_SIE | SSTATUS_SPIE | SSTATUS_SPP | SSTATUS_FS - | SSTATUS_XS | SSTATUS_SUM | SSTATUS_MXR; + | SSTATUS_XS | SSTATUS_SUM | SSTATUS_MXR + | (supports_extension('V') ? SSTATUS_VS : 0); return set_csr(CSR_MSTATUS, (state.mstatus & ~mask) | (val & mask)); } case CSR_SIP: { - reg_t mask = MIP_SSIP & state.mideleg; - return set_csr(CSR_MIP, (state.mip & ~mask) | (val & mask)); + reg_t mask; + if (state.v) { + mask = state.hideleg & MIP_VSSIP; + val = val << 1; + } else { + mask = state.mideleg & MIP_SSIP; + } + state.mip = (state.mip & ~mask) | (val & mask); + break; + } + case CSR_SIE: { + reg_t mask; + if (state.v) { + mask = state.hideleg & MIP_VS_MASK; + val = val << 1; + } else { + mask = state.mideleg & ~MIP_HS_MASK; + } + state.mie = (state.mie & ~mask) | (val & mask); + break; } - case CSR_SIE: - return set_csr(CSR_MIE, - (state.mie & ~state.mideleg) | (val & state.mideleg)); case CSR_SATP: { + reg_t reg_val = 0; + reg_t rv64_ppn_mask = (reg_t(1) << (MAX_PADDR_BITS - PGSHIFT)) - 1; mmu->flush_tlb(); if (max_xlen == 32) - state.satp = val & (SATP32_PPN | SATP32_MODE); + reg_val = val & (SATP32_PPN | SATP32_MODE); if (max_xlen == 64 && (get_field(val, SATP64_MODE) == SATP_MODE_OFF || get_field(val, SATP64_MODE) == SATP_MODE_SV39 || get_field(val, SATP64_MODE) == SATP_MODE_SV48)) - state.satp = val & (SATP64_PPN | SATP64_MODE); + reg_val = val & (SATP64_PPN | SATP64_MODE | rv64_ppn_mask); + if (state.v) + state.vsatp = reg_val; + else + state.satp = reg_val; break; } - case CSR_SEPC: state.sepc = val & ~(reg_t)1; break; - case CSR_STVEC: state.stvec = val >> 2 << 2; break; - case CSR_SSCRATCH: state.sscratch = val; break; - case CSR_SCAUSE: state.scause = val; break; - case CSR_STVAL: state.stval = val; break; + case CSR_SEPC: + if (state.v) + state.vsepc = val & ~(reg_t)1; + else + state.sepc = val & ~(reg_t)1; + break; + case CSR_STVEC: + if (state.v) + state.vstvec = val & ~(reg_t)2; + else + state.stvec = val & ~(reg_t)2; + break; + case CSR_SSCRATCH: + if (state.v) + state.vsscratch = val; + else + state.sscratch = val; + break; + case CSR_SCAUSE: + if (state.v) + state.vscause = val; + else + state.scause = val; + break; + case CSR_STVAL: + if (state.v) + state.vstval = val; + else + state.stval = val; + break; case CSR_MEPC: state.mepc = val & ~(reg_t)1; break; case CSR_MTVEC: state.mtvec = val & ~(reg_t)2; break; case CSR_MSCRATCH: state.mscratch = val; break; case CSR_MCAUSE: state.mcause = val; break; case CSR_MTVAL: state.mtval = val; break; + case CSR_MTVAL2: state.mtval2 = val; break; + case CSR_MTINST: state.mtinst = val; break; case CSR_MISA: { // the write is ignored if increasing IALIGN would misalign the PC if (!(val & (1L << ('C' - 'A'))) && (state.pc & 2)) @@ -529,9 +1036,118 @@ void processor_t::set_csr(int which, reg_t val) mask |= 1L << ('F' - 'A'); mask |= 1L << ('D' - 'A'); mask |= 1L << ('C' - 'A'); + mask |= 1L << ('H' - 'A'); mask &= max_isa; state.misa = (val & mask) | (state.misa & ~mask); + + // update the forced bits in MIDELEG + if (supports_extension('H')) + state.mideleg |= MIDELEG_FORCED_MASK; + else + state.mideleg &= ~MIDELEG_FORCED_MASK; + break; + } + case CSR_HSTATUS: { + reg_t mask = HSTATUS_VTSR | HSTATUS_VTW | HSTATUS_VTVM | + HSTATUS_HU | HSTATUS_SPVP | HSTATUS_SPV | HSTATUS_GVA; + state.hstatus = (state.hstatus & ~mask) | (val & mask); + break; + } + case CSR_HEDELEG: { + reg_t mask = + (1 << CAUSE_MISALIGNED_FETCH) | + (1 << CAUSE_BREAKPOINT) | + (1 << CAUSE_MISALIGNED_LOAD) | + (1 << CAUSE_LOAD_ACCESS) | + (1 << CAUSE_MISALIGNED_STORE) | + (1 << CAUSE_STORE_ACCESS) | + (1 << CAUSE_USER_ECALL) | + (1 << CAUSE_FETCH_PAGE_FAULT) | + (1 << CAUSE_LOAD_PAGE_FAULT) | + (1 << CAUSE_STORE_PAGE_FAULT); + state.hedeleg = (state.hedeleg & ~mask) | (val & mask); + break; + } + case CSR_HIDELEG: { + reg_t mask = MIP_VS_MASK; + state.hideleg = (state.hideleg & ~mask) | (val & mask); + break; + } + case CSR_HIE: { + reg_t mask = MIP_HS_MASK; + state.mie = (state.mie & ~mask) | (val & mask); + break; + } + case CSR_HCOUNTEREN: + state.hcounteren = val; + break; + case CSR_HGEIE: + /* Ignore */ + break; + case CSR_HTVAL: + state.htinst = val; + break; + case CSR_HIP: { + reg_t mask = MIP_VSSIP; + state.mip = (state.mip & ~mask) | (val & mask); + break; + } + case CSR_HVIP: { + reg_t mask = MIP_VS_MASK; + state.mip = (state.mip & ~mask) | (val & mask); + break; + } + case CSR_HTINST: + state.htinst = val; + break; + case CSR_HGATP: { + reg_t reg_val = 0; + reg_t rv64_ppn_mask = (reg_t(1) << (MAX_PADDR_BITS - PGSHIFT)) - 1; + mmu->flush_tlb(); + if (max_xlen == 32) + reg_val = val & (HGATP32_PPN | HGATP32_MODE); + if (max_xlen == 64 && (get_field(val, HGATP64_MODE) == HGATP_MODE_OFF || + get_field(val, HGATP64_MODE) == HGATP_MODE_SV39X4 || + get_field(val, HGATP64_MODE) == HGATP_MODE_SV48X4)) + reg_val = val & (HGATP64_PPN | HGATP64_MODE | rv64_ppn_mask); + state.hgatp = reg_val; + break; + } + case CSR_VSSTATUS: { + reg_t mask = SSTATUS_VS_MASK; + mask |= (supports_extension('F') ? SSTATUS_FS : 0); + mask |= (supports_extension('V') ? SSTATUS_VS : 0); + mask |= (xlen == 64 ? SSTATUS64_SD : SSTATUS32_SD); + state.vsstatus = (state.vsstatus & ~mask) | (val & mask); + break; + } + case CSR_VSIE: { + reg_t mask = state.hideleg & MIP_VS_MASK; + state.mie = (state.mie & ~mask) | ((val << 1) & mask); + break; + } + case CSR_VSTVEC: state.vstvec = val & ~(reg_t)2; break; + case CSR_VSSCRATCH: state.vsscratch = val; break; + case CSR_VSEPC: state.vsepc = val & ~(reg_t)1; break; + case CSR_VSCAUSE: state.vscause = val; break; + case CSR_VSTVAL: state.vstval = val; break; + case CSR_VSIP: { + reg_t mask = state.hideleg & MIP_VSSIP; + state.mip = (state.mip & ~mask) | ((val << 1) & mask); + break; + } + case CSR_VSATP: { + reg_t reg_val = 0; + reg_t rv64_ppn_mask = (reg_t(1) << (MAX_PADDR_BITS - PGSHIFT)) - 1; + mmu->flush_tlb(); + if (max_xlen == 32) + reg_val = val & (SATP32_PPN | SATP32_MODE); + if (max_xlen == 64 && (get_field(val, SATP64_MODE) == SATP_MODE_OFF || + get_field(val, SATP64_MODE) == SATP_MODE_SV39 || + get_field(val, SATP64_MODE) == SATP_MODE_SV48)) + reg_val = val & (SATP64_PPN | SATP64_MODE | rv64_ppn_mask); + state.vsatp = reg_val; break; } case CSR_TSELECT: @@ -542,7 +1158,7 @@ void processor_t::set_csr(int which, reg_t val) case CSR_TDATA1: { mcontrol_t *mc = &state.mcontrol[state.tselect]; - if (mc->dmode && !state.dcsr.cause) { + if (mc->dmode && !state.debug_mode) { break; } mc->dmode = get_field(val, MCONTROL_DMODE(xlen)); @@ -565,7 +1181,7 @@ void processor_t::set_csr(int which, reg_t val) } break; case CSR_TDATA2: - if (state.mcontrol[state.tselect].dmode && !state.dcsr.cause) { + if (state.mcontrol[state.tselect].dmode && !state.debug_mode) { break; } if (state.tselect < state.num_triggers) { @@ -585,47 +1201,161 @@ void processor_t::set_csr(int which, reg_t val) case CSR_DPC: state.dpc = val & ~(reg_t)1; break; - case CSR_DSCRATCH: - state.dscratch = val; + case CSR_DSCRATCH0: + state.dscratch0 = val; + break; + case CSR_DSCRATCH1: + state.dscratch1 = val; + break; + case CSR_VSTART: + dirty_vs_state; + VU.vstart = val & (VU.get_vlen() - 1); + break; + case CSR_VXSAT: + dirty_vs_state; + VU.vxsat = val & 0x1ul; + break; + case CSR_VXRM: + dirty_vs_state; + VU.vxrm = val & 0x3ul; + break; + } + +#if defined(RISCV_ENABLE_COMMITLOG) + switch (which) + { + case CSR_FFLAGS: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_FFLAGS); + break; + case CSR_FRM: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_FRM); + break; + case CSR_FCSR: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_FFLAGS); + LOG_CSR(CSR_FRM); + break; + case CSR_VCSR: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_VXSAT); + LOG_CSR(CSR_VXRM); + break; + + case CSR_VSTART: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_VSTART); + break; + case CSR_VXSAT: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_VXSAT); + break; + case CSR_VXRM: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_VXRM); + break; + + case CSR_SSTATUS: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_SSTATUS); + break; + case CSR_SIP: + LOG_CSR(CSR_MIP); + LOG_CSR(CSR_SIP); + break; + case CSR_SIE: + LOG_CSR(CSR_MIE); + LOG_CSR(CSR_SIE); + break; + + case CSR_MSTATUS: + case CSR_MIP: + case CSR_MIE: + case CSR_MIDELEG: + case CSR_MEDELEG: + case CSR_MINSTRET: + case CSR_MCYCLE: + case CSR_MINSTRETH: + case CSR_MCYCLEH: + case CSR_SCOUNTEREN: + case CSR_MCOUNTEREN: + case CSR_SATP: + case CSR_SEPC: + case CSR_STVEC: + case CSR_SSCRATCH: + case CSR_SCAUSE: + case CSR_STVAL: + case CSR_MEPC: + case CSR_MTVEC: + case CSR_MSCRATCH: + case CSR_MCAUSE: + case CSR_MTVAL: + case CSR_MISA: + case CSR_TSELECT: + case CSR_TDATA1: + case CSR_TDATA2: + case CSR_DCSR: + case CSR_DPC: + case CSR_DSCRATCH0: + case CSR_DSCRATCH1: + LOG_CSR(which); break; } +#endif } // Note that get_csr is sometimes called when read side-effects should not // be actioned. In other words, Spike cannot currently support CSRs with // side effects on reads. -reg_t processor_t::get_csr(int which) +reg_t processor_t::get_csr(int which, insn_t insn, bool write, bool peek) { uint32_t ctr_en = -1; if (state.prv < PRV_M) ctr_en &= state.mcounteren; + if (state.v) + ctr_en &= state.hcounteren; if (state.prv < PRV_S) ctr_en &= state.scounteren; bool ctr_ok = (ctr_en >> (which & 31)) & 1; + reg_t res = 0; +#define ret(n) do { \ + res = (n); \ + goto out; \ + } while (false) + if (ctr_ok) { if (which >= CSR_HPMCOUNTER3 && which <= CSR_HPMCOUNTER31) - return 0; + ret(0); if (xlen == 32 && which >= CSR_HPMCOUNTER3H && which <= CSR_HPMCOUNTER31H) - return 0; + ret(0); } if (which >= CSR_MHPMCOUNTER3 && which <= CSR_MHPMCOUNTER31) - return 0; + ret(0); if (xlen == 32 && which >= CSR_MHPMCOUNTER3H && which <= CSR_MHPMCOUNTER31H) - return 0; + ret(0); if (which >= CSR_MHPMEVENT3 && which <= CSR_MHPMEVENT31) - return 0; - - if (which >= CSR_PMPADDR0 && which < CSR_PMPADDR0 + state.n_pmp) - return state.pmpaddr[which - CSR_PMPADDR0]; + ret(0); + + if (which >= CSR_PMPADDR0 && which < CSR_PMPADDR0 + state.max_pmp) { + // If n_pmp is zero, that means pmp is not implemented hence raise trap if it tries to access the csr + if (n_pmp == 0) + goto throw_illegal; + reg_t i = which - CSR_PMPADDR0; + if ((state.pmpcfg[i] & PMP_A) >= PMP_NAPOT) + ret(state.pmpaddr[i] | (~pmp_tor_mask() >> 1)); + else + ret(state.pmpaddr[i] & pmp_tor_mask()); + } - if (which >= CSR_PMPCFG0 && which < CSR_PMPCFG0 + state.n_pmp / 4) { + if (which >= CSR_PMPCFG0 && which < CSR_PMPCFG0 + state.max_pmp / 4) { require((which & ((xlen / 32) - 1)) == 0); - reg_t res = 0; - for (size_t i0 = (which - CSR_PMPCFG0) * 4, i = i0; i < i0 + xlen / 8 && i < state.n_pmp; i++) - res |= reg_t(state.pmpcfg[i]) << (8 * (i - i0)); - return res; + reg_t cfg_res = 0; + for (size_t i0 = (which - CSR_PMPCFG0) * 4, i = i0; i < i0 + xlen / 8 && i < state.max_pmp; i++) + cfg_res |= reg_t(state.pmpcfg[i]) << (8 * (i - i0)); + ret(cfg_res); } switch (which) @@ -634,76 +1364,191 @@ reg_t processor_t::get_csr(int which) require_fp; if (!supports_extension('F')) break; - return state.fflags; + ret(state.fflags); case CSR_FRM: require_fp; if (!supports_extension('F')) break; - return state.frm; + ret(state.frm); case CSR_FCSR: require_fp; if (!supports_extension('F')) break; - return (state.fflags << FSR_AEXC_SHIFT) | (state.frm << FSR_RD_SHIFT); + ret((state.fflags << FSR_AEXC_SHIFT) | (state.frm << FSR_RD_SHIFT)); + case CSR_VCSR: + require_vector_vs; + if (!supports_extension('V')) + break; + ret((VU.vxsat << VCSR_VXSAT_SHIFT) | (VU.vxrm << VCSR_VXRM_SHIFT)); case CSR_INSTRET: case CSR_CYCLE: if (ctr_ok) - return state.minstret; + ret(state.minstret); + if (state.v && + ((state.mcounteren >> (which & 31)) & 1) && + !((state.hcounteren >> (which & 31)) & 1)) { + goto throw_virtual; + } break; case CSR_MINSTRET: case CSR_MCYCLE: - return state.minstret; + ret(state.minstret); case CSR_INSTRETH: case CSR_CYCLEH: if (ctr_ok && xlen == 32) - return state.minstret >> 32; + ret(state.minstret >> 32); + if (state.v && + ((state.mcounteren >> (which & 31)) & 1) && + !((state.hcounteren >> (which & 31)) & 1)) { + goto throw_virtual; + } break; case CSR_MINSTRETH: case CSR_MCYCLEH: if (xlen == 32) - return state.minstret >> 32; + ret(state.minstret >> 32); break; - case CSR_SCOUNTEREN: return state.scounteren; - case CSR_MCOUNTEREN: return state.mcounteren; + case CSR_SCOUNTEREN: ret(state.scounteren); + case CSR_MCOUNTEREN: + if (!supports_extension('U')) + break; + ret(state.mcounteren); + case CSR_MCOUNTINHIBIT: ret(0); case CSR_SSTATUS: { reg_t mask = SSTATUS_SIE | SSTATUS_SPIE | SSTATUS_SPP | SSTATUS_FS + | (supports_extension('V') ? SSTATUS_VS : 0) | SSTATUS_XS | SSTATUS_SUM | SSTATUS_MXR | SSTATUS_UXL; reg_t sstatus = state.mstatus & mask; if ((sstatus & SSTATUS_FS) == SSTATUS_FS || (sstatus & SSTATUS_XS) == SSTATUS_XS) sstatus |= (xlen == 32 ? SSTATUS32_SD : SSTATUS64_SD); - return sstatus; + ret(sstatus); } - case CSR_SIP: return state.mip & state.mideleg; - case CSR_SIE: return state.mie & state.mideleg; - case CSR_SEPC: return state.sepc & pc_alignment_mask(); - case CSR_STVAL: return state.stval; - case CSR_STVEC: return state.stvec; - case CSR_SCAUSE: - if (max_xlen > xlen) - return state.scause | ((state.scause >> (max_xlen-1)) << (xlen-1)); - return state.scause; - case CSR_SATP: - if (get_field(state.mstatus, MSTATUS_TVM)) + case CSR_SIP: { + if (state.v) { + ret((state.mip & state.hideleg & MIP_VS_MASK) >> 1); + } else { + ret(state.mip & state.mideleg & ~MIP_HS_MASK); + } + } + case CSR_SIE: { + if (state.v) { + ret((state.mie & state.hideleg & MIP_VS_MASK) >> 1); + } else { + ret(state.mie & state.mideleg & ~MIP_HS_MASK); + } + } + case CSR_SEPC: { + if (state.v) { + ret(state.vsepc & pc_alignment_mask()); + } else { + ret(state.sepc & pc_alignment_mask()); + } + } + case CSR_STVAL: { + if (state.v) { + ret(state.vstval); + } else { + ret(state.stval); + } + } + case CSR_STVEC: { + if (state.v) { + ret(state.vstvec); + } else { + ret(state.stvec); + } + } + case CSR_SCAUSE: { + if (state.v) { + if (max_xlen > xlen) + ret(state.vscause | ((state.vscause >> (max_xlen-1)) << (xlen-1))); + ret(state.vscause); + } else { + if (max_xlen > xlen) + ret(state.scause | ((state.scause >> (max_xlen-1)) << (xlen-1))); + ret(state.scause); + } + } + case CSR_SATP: { + if (state.v) { + if (get_field(state.hstatus, HSTATUS_VTVM)) + goto throw_virtual; + ret(state.vsatp); + } else { + if (get_field(state.mstatus, MSTATUS_TVM)) + require_privilege(PRV_M); + ret(state.satp); + } + } + case CSR_SSCRATCH: { + if (state.v) { + ret(state.vsscratch); + } else { + ret(state.sscratch); + } + } + case CSR_MSTATUS: ret(state.mstatus); + case CSR_MIP: ret(state.mip); + case CSR_MIE: ret(state.mie); + case CSR_MEPC: ret(state.mepc & pc_alignment_mask()); + case CSR_MSCRATCH: ret(state.mscratch); + case CSR_MCAUSE: ret(state.mcause); + case CSR_MTVAL: ret(state.mtval); + case CSR_MTVAL2: + if (supports_extension('H')) + ret(state.mtval2); + break; + case CSR_MTINST: + if (supports_extension('H')) + ret(state.mtinst); + break; + case CSR_MISA: ret(state.misa); + case CSR_MARCHID: ret(5); + case CSR_MIMPID: ret(0); + case CSR_MVENDORID: ret(0); + case CSR_MHARTID: ret(id); + case CSR_MTVEC: ret(state.mtvec); + case CSR_MEDELEG: + if (!supports_extension('S')) + break; + ret(state.medeleg); + case CSR_MIDELEG: + if (!supports_extension('S')) + break; + ret(state.mideleg); + case CSR_HSTATUS: ret(state.hstatus); + case CSR_HEDELEG: ret(state.hedeleg); + case CSR_HIDELEG: ret(state.hideleg); + case CSR_HIE: ret(state.mie & MIP_HS_MASK); + case CSR_HCOUNTEREN: ret(state.hcounteren); + case CSR_HGEIE: ret(0); + case CSR_HTVAL: ret(state.htval); + case CSR_HIP: ret(state.mip & MIP_HS_MASK); + case CSR_HVIP: ret(state.mip & MIP_VS_MASK); + case CSR_HTINST: ret(state.htinst); + case CSR_HGATP: { + if (!state.v && get_field(state.mstatus, MSTATUS_TVM)) require_privilege(PRV_M); - return state.satp; - case CSR_SSCRATCH: return state.sscratch; - case CSR_MSTATUS: return state.mstatus; - case CSR_MIP: return state.mip; - case CSR_MIE: return state.mie; - case CSR_MEPC: return state.mepc & pc_alignment_mask(); - case CSR_MSCRATCH: return state.mscratch; - case CSR_MCAUSE: return state.mcause; - case CSR_MTVAL: return state.mtval; - case CSR_MISA: return state.misa; - case CSR_MARCHID: return 5; - case CSR_MIMPID: return 0; - case CSR_MVENDORID: return 0; - case CSR_MHARTID: return id; - case CSR_MTVEC: return state.mtvec; - case CSR_MEDELEG: return state.medeleg; - case CSR_MIDELEG: return state.mideleg; - case CSR_TSELECT: return state.tselect; + ret(state.hgatp); + } + case CSR_HGEIP: ret(0); + case CSR_VSSTATUS: { + reg_t mask = SSTATUS_VS_MASK; + mask |= (supports_extension('F') ? SSTATUS_FS : 0); + mask |= (supports_extension('V') ? SSTATUS_VS : 0); + mask |= (xlen == 64 ? SSTATUS64_SD : SSTATUS32_SD); + ret(state.vsstatus & mask); + } + case CSR_VSIE: ret((state.mie & state.hideleg & MIP_VS_MASK) >> 1); + case CSR_VSTVEC: ret(state.vstvec); + case CSR_VSSCRATCH: ret(state.vsscratch); + case CSR_VSEPC: ret(state.vsepc & pc_alignment_mask()); + case CSR_VSCAUSE: ret(state.vscause); + case CSR_VSTVAL: ret(state.vstval); + case CSR_VSIP: ret((state.mip & state.hideleg & MIP_VS_MASK) >> 1); + case CSR_VSATP: ret(state.vsatp); + case CSR_TSELECT: ret(state.tselect); case CSR_TDATA1: if (state.tselect < state.num_triggers) { reg_t v = 0; @@ -723,21 +1568,23 @@ reg_t processor_t::get_csr(int which) v = set_field(v, MCONTROL_EXECUTE, mc->execute); v = set_field(v, MCONTROL_STORE, mc->store); v = set_field(v, MCONTROL_LOAD, mc->load); - return v; + ret(v); } else { - return 0; + ret(0); } break; case CSR_TDATA2: if (state.tselect < state.num_triggers) { - return state.tdata2[state.tselect]; + ret(state.tdata2[state.tselect]); } else { - return 0; + ret(0); } break; - case CSR_TDATA3: return 0; + case CSR_TDATA3: ret(0); case CSR_DCSR: { + if (!state.debug_mode) + break; uint32_t v = 0; v = set_field(v, DCSR_XDEBUGVER, 1); v = set_field(v, DCSR_EBREAKM, state.dcsr.ebreakm); @@ -749,19 +1596,90 @@ reg_t processor_t::get_csr(int which) v = set_field(v, DCSR_CAUSE, state.dcsr.cause); v = set_field(v, DCSR_STEP, state.dcsr.step); v = set_field(v, DCSR_PRV, state.dcsr.prv); - return v; + ret(v); } case CSR_DPC: - return state.dpc & pc_alignment_mask(); - case CSR_DSCRATCH: - return state.dscratch; + if (!state.debug_mode) + break; + ret(state.dpc & pc_alignment_mask()); + case CSR_DSCRATCH0: + if (!state.debug_mode) + break; + ret(state.dscratch0); + case CSR_DSCRATCH1: + if (!state.debug_mode) + break; + ret(state.dscratch1); + case CSR_VSTART: + require_vector_vs; + if (!supports_extension('V')) + break; + ret(VU.vstart); + case CSR_VXSAT: + require_vector_vs; + if (!supports_extension('V')) + break; + ret(VU.vxsat); + case CSR_VXRM: + require_vector_vs; + if (!supports_extension('V')) + break; + ret(VU.vxrm); + case CSR_VL: + require_vector_vs; + if (!supports_extension('V')) + break; + ret(VU.vl); + case CSR_VTYPE: + require_vector_vs; + if (!supports_extension('V')) + break; + ret(VU.vtype); + case CSR_VLENB: + require_vector_vs; + if (!supports_extension('V')) + break; + ret(VU.vlenb); } - throw trap_illegal_instruction(0); + +#undef ret + + // If we get here, the CSR doesn't exist. Unimplemented CSRs always throw + // illegal-instruction exceptions, not virtual-instruction exceptions. +throw_illegal: + throw trap_illegal_instruction(insn.bits()); + +throw_virtual: + throw trap_virtual_instruction(insn.bits()); + +out: + // Check permissions. Raise virtual-instruction exception if V=1, + // privileges are insufficient, and the CSR belongs to supervisor or + // hypervisor. Raise illegal-instruction exception otherwise. + + if (peek) + return res; + + unsigned csr_priv = get_field(which, 0x300); + bool csr_read_only = get_field(which, 0xC00) == 3; + unsigned priv = state.prv == PRV_S && !state.v ? PRV_HS : state.prv; + + if ((csr_priv == PRV_S && !supports_extension('S')) || + (csr_priv == PRV_HS && !supports_extension('H'))) + goto throw_illegal; + + if ((write && csr_read_only) || priv < csr_priv) { + if (state.v && csr_priv <= PRV_HS) + goto throw_virtual; + goto throw_illegal; + } + + return res; } reg_t illegal_instruction(processor_t* p, insn_t insn, reg_t pc) { - throw trap_illegal_instruction(0); + throw trap_illegal_instruction(insn.bits()); } insn_func_t processor_t::decode_insn(insn_t insn) @@ -818,8 +1736,11 @@ void processor_t::register_extension(extension_t* x) for (auto insn : x->get_instructions()) register_insn(insn); build_opcode_map(); - for (auto disasm_insn : x->get_disasms()) - disassembler->add_insn(disasm_insn); + + if (disassembler) + for (auto disasm_insn : x->get_disasms()) + disassembler->add_insn(disasm_insn); + if (ext != NULL) throw std::logic_error("only one extension may be registered"); ext = x; diff --git a/riscv/processor.h b/riscv/processor.h index 754d3e65f1..87df69f593 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -8,7 +8,9 @@ #include "trap.h" #include #include +#include #include +#include #include "debug_rom_defines.h" class processor_t; @@ -27,11 +29,11 @@ struct insn_desc_t insn_func_t rv64; }; -struct commit_log_reg_t -{ - reg_t addr; - freg_t data; -}; +// regnum, data +typedef std::unordered_map commit_log_reg_t; + +// addr, value, size +typedef std::vector> commit_log_mem_t; typedef struct { @@ -83,6 +85,68 @@ typedef struct bool load; } mcontrol_t; +enum VRM{ + RNU = 0, + RNE, + RDN, + ROD, + INVALID_RM +}; + +template +struct type_usew_t; + +template<> +struct type_usew_t<8> +{ + using type=uint8_t; +}; + +template<> +struct type_usew_t<16> +{ + using type=uint16_t; +}; + +template<> +struct type_usew_t<32> +{ + using type=uint32_t; +}; + +template<> +struct type_usew_t<64> +{ + using type=uint64_t; +}; + +template +struct type_sew_t; + +template<> +struct type_sew_t<8> +{ + using type=int8_t; +}; + +template<> +struct type_sew_t<16> +{ + using type=int16_t; +}; + +template<> +struct type_sew_t<32> +{ + using type=int32_t; +}; + +template<> +struct type_sew_t<64> +{ + using type=int64_t; +}; + // architectural state of a RISC-V hart struct state_t { @@ -96,6 +160,7 @@ struct state_t // control and status registers reg_t prv; // TODO: Can this be an enum instead? + bool v; reg_t misa; reg_t mstatus; reg_t mepc; @@ -116,16 +181,35 @@ struct state_t reg_t stvec; reg_t satp; reg_t scause; + + reg_t mtval2; + reg_t mtinst; + reg_t hstatus; + reg_t hideleg; + reg_t hedeleg; + uint32_t hcounteren; + reg_t htval; + reg_t htinst; + reg_t hgatp; + reg_t vsstatus; + reg_t vstvec; + reg_t vsscratch; + reg_t vsepc; + reg_t vscause; + reg_t vstval; + reg_t vsatp; + reg_t dpc; - reg_t dscratch; + reg_t dscratch0, dscratch1; dcsr_t dcsr; reg_t tselect; mcontrol_t mcontrol[num_triggers]; reg_t tdata2[num_triggers]; + bool debug_mode; - static const int n_pmp = 16; - uint8_t pmpcfg[n_pmp]; - reg_t pmpaddr[n_pmp]; + static const int max_pmp = 16; + uint8_t pmpcfg[max_pmp]; + reg_t pmpaddr[max_pmp]; uint32_t fflags; uint32_t frm; @@ -141,6 +225,8 @@ struct state_t #ifdef RISCV_ENABLE_COMMITLOG commit_log_reg_t log_reg_write; + commit_log_mem_t log_mem_read; + commit_log_mem_t log_mem_write; reg_t last_inst_priv; int last_inst_xlen; int last_inst_flen; @@ -153,6 +239,12 @@ typedef enum { OPERATION_LOAD, } trigger_operation_t; +typedef enum { + // 65('A') ~ 90('Z') is reserved for standard isa in misa + EXT_ZFH = 0, + EXT_ZVEDIV, +} isa_extension_t; + // Count number of contiguous 1 bits starting from the LSB. static int cto(reg_t val) { @@ -166,16 +258,22 @@ static int cto(reg_t val) class processor_t : public abstract_device_t { public: - processor_t(const char* isa, simif_t* sim, uint32_t id, bool halt_on_reset=false); + processor_t(const char* isa, const char* priv, const char* varch, + simif_t* sim, uint32_t id, bool halt_on_reset, + FILE *log_file); ~processor_t(); void set_debug(bool value); - void set_trace(bool value); void set_histogram(bool value); +#ifdef RISCV_ENABLE_COMMITLOG + void enable_log_commits(); + bool get_log_commits_enabled() const { return log_commits_enabled; } +#endif void reset(); void step(size_t n); // run for n cycles void set_csr(int which, reg_t val); - reg_t get_csr(int which); + reg_t get_csr(int which, insn_t insn, bool write, bool peek = 0); + reg_t get_csr(int which) { return get_csr(which, insn_t(0), false, true); } mmu_t* get_mmu() { return mmu; } state_t* get_state() { return &state; } unsigned get_xlen() { return xlen; } @@ -188,21 +286,26 @@ class processor_t : public abstract_device_t } extension_t* get_extension() { return ext; } bool supports_extension(unsigned char ext) { - if (ext >= 'a' && ext <= 'z') ext += 'A' - 'a'; - return ext >= 'A' && ext <= 'Z' && ((state.misa >> (ext - 'A')) & 1); + if (ext >= 'A' && ext <= 'Z') + return ((state.misa >> (ext - 'A')) & 1); + else + return extension_table[ext]; } reg_t pc_alignment_mask() { return ~(reg_t)(supports_extension('C') ? 0 : 2); } void check_pc_alignment(reg_t pc) { if (unlikely(pc & ~pc_alignment_mask())) - throw trap_instruction_address_misaligned(pc); + throw trap_instruction_address_misaligned(pc, 0, 0); } reg_t legalize_privilege(reg_t); void set_privilege(reg_t); + void set_virt(bool); void update_histogram(reg_t pc); const disassembler_t* get_disassembler() { return disassembler; } + FILE *get_log_file() { return log_file; } + void register_insn(insn_desc_t); void register_extension(extension_t*); @@ -212,17 +315,19 @@ class processor_t : public abstract_device_t // When true, display disassembly of each instruction that's executed. bool debug; - // When true, write the UST trace - bool trace; // When true, take the slow simulation path. bool slow_path(); - bool halted() { return state.dcsr.cause ? true : false; } - bool halt_request; + bool halted() { return state.debug_mode; } + enum { + HR_NONE, /* Halt request is inactive. */ + HR_REGULAR, /* Regular halt request/debug interrupt. */ + HR_GROUP /* Halt requested due to halt group. */ + } halt_request; // Return the index of a trigger that matched, or -1. inline int trigger_match(trigger_operation_t operation, reg_t address, reg_t data) { - if (state.dcsr.cause) + if (state.debug_mode) return -1; bool chain_ok = true; @@ -262,7 +367,7 @@ class processor_t : public abstract_device_t break; case MATCH_NAPOT: { - reg_t mask = ~((1 << cto(state.tdata2[i])) - 1); + reg_t mask = ~((1 << (cto(state.tdata2[i])+1)) - 1); if ((value & mask) != (state.tdata2[i] & mask)) continue; } @@ -301,6 +406,11 @@ class processor_t : public abstract_device_t void trigger_updated(); + void set_pmp_num(reg_t pmp_num); + void set_pmp_granularity(reg_t pmp_granularity); + + const char* get_symbol(uint64_t addr); + private: simif_t* sim; mmu_t* mmu; // main memory is always accessed via the mmu @@ -313,7 +423,11 @@ class processor_t : public abstract_device_t reg_t max_isa; std::string isa_string; bool histogram_enabled; + bool log_commits_enabled; + FILE *log_file; bool halt_on_reset; + std::vector extension_table; + std::vector instructions; std::map pc_histogram; @@ -327,19 +441,90 @@ class processor_t : public abstract_device_t void disasm(insn_t insn); // disassemble and print an instruction int paddr_bits(); + reg_t pmp_tor_mask() { return -(reg_t(1) << (lg_pmp_granularity - PMP_SHIFT)); } + void enter_debug_mode(uint8_t cause); friend class mmu_t; friend class clint_t; friend class extension_t; - void parse_isa_string(const char* isa); + void parse_varch_string(const char*); + void parse_priv_string(const char*); + void parse_isa_string(const char*); void build_opcode_map(); void register_base_instructions(); insn_func_t decode_insn(insn_t insn); // Track repeated executions for processor_t::disasm() uint64_t last_pc, last_bits, executions; + reg_t n_pmp; + reg_t lg_pmp_granularity; + +public: + class vectorUnit_t { + public: + processor_t* p; + void *reg_file; + char reg_referenced[NVPR]; + int setvl_count; + reg_t vlmax; + reg_t vstart, vxrm, vxsat, vl, vtype, vlenb; + reg_t vma, vta; + reg_t vediv, vsew; + float vflmul; + reg_t ELEN, VLEN; + bool vill; + bool vstart_alu; + + // vector element for varies SEW + template + T& elt(reg_t vReg, reg_t n, bool is_write = false){ + assert(vsew != 0); + assert((VLEN >> 3)/sizeof(T) > 0); + reg_t elts_per_reg = (VLEN >> 3) / (sizeof(T)); + vReg += n / elts_per_reg; + n = n % elts_per_reg; +#ifdef WORDS_BIGENDIAN + // "V" spec 0.7.1 requires lower indices to map to lower significant + // bits when changing SEW, thus we need to index from the end on BE. + n ^= elts_per_reg - 1; +#endif + reg_referenced[vReg] = 1; + +#ifdef RISCV_ENABLE_COMMITLOG + if (is_write) + p->get_state()->log_reg_write[((vReg) << 4) | 2] = {0, 0}; +#endif + + T *regStart = (T*)((char*)reg_file + vReg * (VLEN >> 3)); + return regStart[n]; + } + public: + + void reset(); + + vectorUnit_t(){ + reg_file = 0; + } + + ~vectorUnit_t(){ + free(reg_file); + reg_file = 0; + } + + reg_t set_vl(int rd, int rs1, reg_t reqVL, reg_t newType); + + reg_t get_vlen() { return VLEN; } + reg_t get_elen() { return ELEN; } + reg_t get_slen() { return VLEN; } + + VRM get_vround_mode() { + return (VRM)vxrm; + } + }; + + vectorUnit_t VU; }; reg_t illegal_instruction(processor_t* p, insn_t insn, reg_t pc); diff --git a/riscv/remote_bitbang.cc b/riscv/remote_bitbang.cc index 21306dd166..8453e85abd 100644 --- a/riscv/remote_bitbang.cc +++ b/riscv/remote_bitbang.cc @@ -5,6 +5,13 @@ #include #include +#ifndef AF_INET +#include +#endif +#ifndef INADDR_ANY +#include +#endif + #include #include #include diff --git a/riscv/riscv.ac b/riscv/riscv.ac index 68bcdb55d1..64693e9144 100644 --- a/riscv/riscv.ac +++ b/riscv/riscv.ac @@ -6,21 +6,24 @@ AC_ARG_WITH(isa, AC_DEFINE_UNQUOTED([DEFAULT_ISA], "$withval", [Default value for --isa switch]), AC_DEFINE_UNQUOTED([DEFAULT_ISA], "RV64IMAFDC", [Default value for --isa switch])) -AC_SEARCH_LIBS([dlopen], [dl dld], [], [ - AC_MSG_ERROR([unable to find the dlopen() function]) +AC_ARG_WITH(priv, + [AS_HELP_STRING([--with-priv=MSU], + [Sets the default RISC-V privilege modes supported])], + AC_DEFINE_UNQUOTED([DEFAULT_PRIV], "$withval", [Default value for --priv switch]), + AC_DEFINE_UNQUOTED([DEFAULT_PRIV], "MSU", [Default value for --priv switch])) + +AC_ARG_WITH(varch, + [AS_HELP_STRING([--with-varch=vlen:128,elen:64,slen:128], + [Sets the default vector config])], + AC_DEFINE_UNQUOTED([DEFAULT_VARCH], "$withval", [Default value for --varch switch]), + AC_DEFINE_UNQUOTED([DEFAULT_VARCH], ["vlen:128,elen:64,slen:128"], [Default value for --varch switch])) + + +AC_SEARCH_LIBS([dlopen], [dl dld], [ + AC_DEFINE([HAVE_DLOPEN], [], [Dynamic library loading is supported]), + AC_SUBST([HAVE_DLOPEN], [yes]) ]) -AC_ARG_WITH([fesvr], - [AS_HELP_STRING([--with-fesvr], - [path to your fesvr installation if not in a standard location])], - [ - LDFLAGS="-L$withval/lib $LDFLAGS" - CPPFLAGS="-I$withval/include $CPPFLAGS" - ] -) - -AC_CHECK_LIB(fesvr, libfesvr_is_present, [], [AC_MSG_ERROR([libfesvr is required])], [-pthread]) - AC_CHECK_LIB(pthread, pthread_create, [], [AC_MSG_ERROR([libpthread is required])]) AC_ARG_ENABLE([commitlog], AS_HELP_STRING([--enable-commitlog], [Enable commit log generation])) diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index dc23eb893b..49d4a821f6 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -2,10 +2,13 @@ get_insn_list = $(shell grep ^DECLARE_INSN $(1) | sed 's/DECLARE_INSN(\(.*\),.*, get_opcode = $(shell grep ^DECLARE_INSN.*\\\<$(2)\\\> $(1) | sed 's/DECLARE_INSN(.*,\(.*\),.*)/\1/') riscv_subproject_deps = \ + fdt \ softfloat \ riscv_install_prog_srcs = \ +riscv_CFLAGS = -fPIC + riscv_hdrs = \ common.h \ decode.h \ @@ -20,16 +23,17 @@ riscv_hdrs = \ encoding.h \ cachesim.h \ memtracer.h \ + mmio_plugin.h \ tracer.h \ extension.h \ rocc.h \ insn_template.h \ - mulhi.h \ debug_module.h \ debug_rom_defines.h \ remote_bitbang.h \ jtag_dtm.h \ - ust_tracer.h \ + +riscv_install_hdrs = mmio_plugin.h riscv_precompiled_hdrs = \ insn_template.h \ @@ -47,14 +51,12 @@ riscv_srcs = \ extension.cc \ extensions.cc \ rocc.cc \ - regnames.cc \ devices.cc \ rom.cc \ clint.cc \ debug_module.cc \ remote_bitbang.cc \ jtag_dtm.cc \ - ust_tracer.cc \ $(riscv_gen_srcs) \ riscv_test_srcs = @@ -63,11 +65,61 @@ riscv_gen_hdrs = \ icache.h \ insn_list.h \ -riscv_insn_list = \ + +riscv_insn_ext_i = \ add \ addi \ addiw \ addw \ + and \ + andi \ + auipc \ + beq \ + bge \ + bgeu \ + blt \ + bltu \ + bne \ + jal \ + jalr \ + lb \ + lbu \ + ld \ + lh \ + lhu \ + lui \ + lw \ + lwu \ + or \ + ori \ + sb \ + sd \ + sh \ + sll \ + slli \ + slliw \ + sllw \ + slt \ + slti \ + sltiu \ + sltu \ + sra \ + srai \ + sraiw \ + sraw \ + srl \ + srli \ + srliw \ + srlw \ + sub \ + subw \ + sw \ + xor \ + xori \ + fence \ + fence_i \ + +riscv_insn_ext_a = \ amoadd_d \ amoadd_w \ amoand_d \ @@ -86,18 +138,15 @@ riscv_insn_list = \ amoswap_w \ amoxor_d \ amoxor_w \ - and \ - andi \ - auipc \ - beq \ - bge \ - bgeu \ - blt \ - bltu \ - bne \ + lr_d \ + lr_w \ + sc_d \ + sc_w \ + +riscv_insn_ext_c = \ c_add \ - c_addi4spn \ c_addi \ + c_addi4spn \ c_addw \ c_and \ c_andi \ @@ -112,9 +161,9 @@ riscv_insn_list = \ c_fsdsp \ c_fsw \ c_fswsp \ + c_j \ c_jal \ c_jalr \ - c_j \ c_jr \ c_li \ c_lui \ @@ -127,28 +176,60 @@ riscv_insn_list = \ c_srli \ c_sub \ c_subw \ - c_xor \ - csrrc \ - csrrci \ - csrrs \ - csrrsi \ - csrrw \ - csrrwi \ c_sw \ c_swsp \ + c_xor \ + +riscv_insn_ext_m = \ div \ divu \ divuw \ divw \ - dret \ - ebreak \ - ecall \ - fadd_d \ - fadd_q \ + mul \ + mulh \ + mulhsu \ + mulhu \ + mulw \ + rem \ + remu \ + remuw \ + remw \ + +riscv_insn_ext_f = \ fadd_s \ - fclass_d \ - fclass_q \ fclass_s \ + fcvt_l_s \ + fcvt_lu_s \ + fcvt_s_l \ + fcvt_s_lu \ + fcvt_s_w \ + fcvt_s_wu \ + fcvt_w_s \ + fcvt_wu_s \ + fdiv_s \ + feq_s \ + fle_s \ + flt_s \ + flw \ + fmadd_s \ + fmax_s \ + fmin_s \ + fmsub_s \ + fmul_s \ + fmv_w_x \ + fmv_x_w \ + fnmadd_s \ + fnmsub_s \ + fsgnj_s \ + fsgnjn_s \ + fsgnjx_s \ + fsqrt_s \ + fsub_s \ + fsw \ + +riscv_insn_ext_d = \ + fadd_d \ + fclass_d \ fcvt_d_l \ fcvt_d_lu \ fcvt_d_q \ @@ -156,142 +237,748 @@ riscv_insn_list = \ fcvt_d_w \ fcvt_d_wu \ fcvt_l_d \ - fcvt_l_q \ - fcvt_l_s \ fcvt_lu_d \ + fcvt_s_d \ + fcvt_w_d \ + fcvt_wu_d \ + fdiv_d \ + feq_d \ + fld \ + fle_d \ + flt_d \ + fmadd_d \ + fmax_d \ + fmin_d \ + fmsub_d \ + fmul_d \ + fmv_d_x \ + fmv_x_d \ + fnmadd_d \ + fnmsub_d \ + fsd \ + fsgnj_d \ + fsgnjn_d \ + fsgnjx_d \ + fsqrt_d \ + fsub_d \ + +riscv_insn_ext_zfh = \ + fadd_h \ + fclass_h \ + fcvt_l_h \ + fcvt_lu_h \ + fcvt_d_h \ + fcvt_h_d \ + fcvt_h_l \ + fcvt_h_lu \ + #fcvt_h_q \ + fcvt_h_s \ + fcvt_h_w \ + fcvt_h_wu \ + #fcvt_q_h \ + fcvt_s_h \ + fcvt_w_h \ + fcvt_wu_h \ + fdiv_h \ + feq_h \ + fle_h \ + flh \ + flt_h \ + fmadd_h \ + fmax_h \ + fmin_h \ + fmsub_h \ + fmul_h \ + fmv_h_x \ + fmv_x_h \ + fnmadd_h \ + fnmsub_h \ + fsgnj_h \ + fsgnjn_h \ + fsgnjx_h \ + fsh \ + fsqrt_h \ + fsub_h \ + +riscv_insn_ext_q = \ + fadd_q \ + fclass_q \ + fcvt_l_q \ fcvt_lu_q \ - fcvt_lu_s \ fcvt_q_d \ fcvt_q_l \ fcvt_q_lu \ fcvt_q_s \ fcvt_q_w \ fcvt_q_wu \ - fcvt_s_d \ - fcvt_s_l \ - fcvt_s_lu \ fcvt_s_q \ - fcvt_s_w \ - fcvt_s_wu \ - fcvt_w_d \ fcvt_w_q \ - fcvt_w_s \ - fcvt_wu_d \ fcvt_wu_q \ - fcvt_wu_s \ - fdiv_d \ fdiv_q \ - fdiv_s \ - fence \ - fence_i \ - feq_d \ feq_q \ - feq_s \ - fld \ - fle_d \ fle_q \ - fle_s \ flq \ - flt_d \ flt_q \ - flt_s \ - flw \ - fmadd_d \ fmadd_q \ - fmadd_s \ - fmax_d \ fmax_q \ - fmax_s \ - fmin_d \ fmin_q \ - fmin_s \ - fmsub_d \ fmsub_q \ - fmsub_s \ - fmul_d \ fmul_q \ - fmul_s \ - fmv_d_x \ - fmv_w_x \ - fmv_x_d \ - fmv_x_w \ - fnmadd_d \ fnmadd_q \ - fnmadd_s \ - fnmsub_d \ fnmsub_q \ - fnmsub_s \ - fsd \ - fsgnj_d \ fsgnj_q \ - fsgnjn_d \ fsgnjn_q \ - fsgnjn_s \ - fsgnj_s \ - fsgnjx_d \ fsgnjx_q \ - fsgnjx_s \ fsq \ - fsqrt_d \ fsqrt_q \ - fsqrt_s \ - fsub_d \ fsub_q \ - fsub_s \ - fsw \ - jal \ - jalr \ - lb \ - lbu \ - ld \ - lh \ - lhu \ - lr_d \ - lr_w \ - lui \ - lw \ - lwu \ + +# Disabled riscv_insn_ext_v_alu_int instructions for opcode overlap: +#vasubu_vx +#vslide1up_vx +#vaaddu_vx +#vadc_vvm +#vadc_vxm +#vsbc_vvm +#vsbc_vxm +#vmulhu_vx +#vdivu_vx +#vmulhsu_vx + +riscv_insn_ext_v_alu_int = \ + vaadd_vv \ + vaaddu_vv \ + vaadd_vx \ + vadc_vim \ + vadd_vi \ + vadd_vv \ + vadd_vx \ + vand_vi \ + vand_vv \ + vand_vx \ + vasub_vv \ + vasubu_vv \ + vasub_vx \ + vcompress_vm \ + vdiv_vv \ + vdiv_vx \ + vdivu_vv \ + vdot_vv \ + vdotu_vv \ + vid_v \ + viota_m \ + vmacc_vv \ + vmacc_vx \ + vmadc_vim \ + vmadc_vvm \ + vmadc_vxm \ + vmadd_vv \ + vmadd_vx \ + vmand_mm \ + vmandnot_mm \ + vmax_vv \ + vmax_vx \ + vmaxu_vv \ + vmaxu_vx \ + vmerge_vim \ + vmerge_vvm \ + vmerge_vxm \ + vfirst_m \ + vmin_vv \ + vmin_vx \ + vminu_vv \ + vminu_vx \ + vmnand_mm \ + vmnor_mm \ + vmor_mm \ + vmornot_mm \ + vpopc_m \ + vmsbc_vvm \ + vmsbc_vxm \ + vmsbf_m \ + vmseq_vi \ + vmseq_vv \ + vmseq_vx \ + vmsgt_vi \ + vmsgt_vx \ + vmsgtu_vi \ + vmsgtu_vx \ + vmsif_m \ + vmsle_vi \ + vmsle_vv \ + vmsle_vx \ + vmsleu_vi \ + vmsleu_vv \ + vmsleu_vx \ + vmslt_vv \ + vmslt_vx \ + vmsltu_vv \ + vmsltu_vx \ + vmsne_vi \ + vmsne_vv \ + vmsne_vx \ + vmsof_m \ + vmul_vv \ + vmul_vx \ + vmulh_vv \ + vmulh_vx \ + vmulhsu_vv \ + vmulhu_vv \ + vmv_s_x \ + vmv_v_i \ + vmv_v_v \ + vmv_v_x \ + vmv_x_s \ + vmv1r_v \ + vmv2r_v \ + vmv4r_v \ + vmv8r_v \ + vmxnor_mm \ + vmxor_mm \ + vnclip_wi \ + vnclip_wv \ + vnclip_wx \ + vnclipu_wi \ + vnclipu_wv \ + vnclipu_wx \ + vnmsac_vv \ + vnmsac_vx \ + vnmsub_vv \ + vnmsub_vx \ + vnsra_wi \ + vnsra_wv \ + vnsra_wx \ + vnsrl_wi \ + vnsrl_wv \ + vnsrl_wx \ + vor_vi \ + vor_vv \ + vor_vx \ + vredand_vs \ + vredmax_vs \ + vredmaxu_vs \ + vredmin_vs \ + vredminu_vs \ + vredor_vs \ + vredsum_vs \ + vredxor_vs \ + vrem_vv \ + vrem_vx \ + vremu_vv \ + vremu_vx \ + vrgather_vi \ + vrgather_vv \ + vrgather_vx \ + vrgatherei16_vv \ + vrsub_vi \ + vrsub_vx \ + vsadd_vi \ + vsadd_vv \ + vsadd_vx \ + vsaddu_vi \ + vsaddu_vv \ + vsaddu_vx \ + vsext_vf2 \ + vsext_vf4 \ + vsext_vf8 \ + vslide1down_vx \ + vslidedown_vi \ + vslidedown_vx \ + vslideup_vi \ + vslideup_vx \ + vsll_vi \ + vsll_vv \ + vsll_vx \ + vsmul_vv \ + vsmul_vx \ + vsra_vi \ + vsra_vv \ + vsra_vx \ + vsrl_vi \ + vsrl_vv \ + vsrl_vx \ + vssra_vi \ + vssra_vv \ + vssra_vx \ + vssrl_vi \ + vssrl_vv \ + vssrl_vx \ + vssub_vv \ + vssub_vx \ + vssubu_vv \ + vssubu_vx \ + vsub_vv \ + vsub_vx \ + vwadd_vv \ + vwadd_vx \ + vwadd_wv \ + vwadd_wx \ + vwaddu_vv \ + vwaddu_vx \ + vwaddu_wv \ + vwaddu_wx \ + vwmacc_vv \ + vwmacc_vx \ + vwmaccsu_vv \ + vwmaccsu_vx \ + vwmaccu_vv \ + vwmaccu_vx \ + vwmaccus_vx \ + vwmul_vv \ + vwmul_vx \ + vwmulsu_vv \ + vwmulsu_vx \ + vwmulu_vv \ + vwmulu_vx \ + vwredsum_vs \ + vwredsumu_vs \ + vwsub_vv \ + vwsub_vx \ + vwsub_wv \ + vwsub_wx \ + vwsubu_vv \ + vwsubu_vx \ + vwsubu_wv \ + vwsubu_wx \ + vxor_vi \ + vxor_vv \ + vxor_vx \ + vzext_vf2 \ + vzext_vf4 \ + vzext_vf8 \ + +# Disabled riscv_insn_ext_v_alu_fp instructions for opcode overlap: +#vfcvt_x_f_v + +riscv_insn_ext_v_alu_fp = \ + vfadd_vf \ + vfadd_vv \ + vfclass_v \ + vfcvt_f_x_v \ + vfcvt_f_xu_v \ + vfcvt_rtz_x_f_v \ + vfcvt_rtz_xu_f_v \ + vfcvt_xu_f_v \ + vfdiv_vf \ + vfdiv_vv \ + vfdot_vv \ + vfmacc_vf \ + vfmacc_vv \ + vfmadd_vf \ + vfmadd_vv \ + vfmax_vf \ + vfmax_vv \ + vfmerge_vfm \ + vfmin_vf \ + vfmin_vv \ + vfmsac_vf \ + vfmsac_vv \ + vfmsub_vf \ + vfmsub_vv \ + vfmul_vf \ + vfmul_vv \ + vfmv_f_s \ + vfmv_s_f \ + vfmv_v_f \ + vfncvt_f_f_w \ + vfncvt_f_x_w \ + vfncvt_f_xu_w \ + vfncvt_rod_f_f_w \ + vfncvt_rtz_x_f_w \ + vfncvt_rtz_xu_f_w \ + vfncvt_x_f_w \ + vfncvt_xu_f_w \ + vfnmacc_vf \ + vfnmacc_vv \ + vfnmadd_vf \ + vfnmadd_vv \ + vfnmsac_vf \ + vfnmsac_vv \ + vfnmsub_vf \ + vfnmsub_vv \ + vfrdiv_vf \ + vfredmax_vs \ + vfredmin_vs \ + vfredosum_vs \ + vfredsum_vs \ + vfrece7_v \ + vfrsub_vf \ + vfrsqrte7_v \ + vfsgnj_vf \ + vfsgnj_vv \ + vfsgnjn_vf \ + vfsgnjn_vv \ + vfsgnjx_vf \ + vfsgnjx_vv \ + vfsqrt_v \ + vfslide1down_vf \ + vfslide1up_vf \ + vfsub_vf \ + vfsub_vv \ + vfwadd_vf \ + vfwadd_vv \ + vfwadd_wf \ + vfwadd_wv \ + vfwcvt_f_f_v \ + vfwcvt_f_x_v \ + vfwcvt_f_xu_v \ + vfwcvt_rtz_x_f_v \ + vfwcvt_rtz_xu_f_v \ + vfwcvt_x_f_v \ + vfwcvt_xu_f_v \ + vfwmacc_vf \ + vfwmacc_vv \ + vfwmsac_vf \ + vfwmsac_vv \ + vfwmul_vf \ + vfwmul_vv \ + vfwnmacc_vf \ + vfwnmacc_vv \ + vfwnmsac_vf \ + vfwnmsac_vv \ + vfwredosum_vs \ + vfwredsum_vs \ + vfwsub_vf \ + vfwsub_vv \ + vfwsub_wf \ + vfwsub_wv \ + vmfeq_vf \ + vmfeq_vv \ + vmfge_vf \ + vmfgt_vf \ + vmfle_vf \ + vmfle_vv \ + vmflt_vf \ + vmflt_vv \ + vmfne_vf \ + vmfne_vv \ + +riscv_insn_ext_v_amo = \ + vamoswapei8_v \ + vamoaddei8_v \ + vamoandei8_v \ + vamomaxei8_v \ + vamomaxuei8_v \ + vamominei8_v \ + vamominuei8_v \ + vamoorei8_v \ + vamoxorei8_v \ + vamoswapei16_v \ + vamoaddei16_v \ + vamoandei16_v \ + vamomaxei16_v \ + vamomaxuei16_v \ + vamominei16_v \ + vamominuei16_v \ + vamoorei16_v \ + vamoxorei16_v \ + vamoswapei32_v \ + vamoaddei32_v \ + vamoandei32_v \ + vamomaxei32_v \ + vamomaxuei32_v \ + vamominei32_v \ + vamominuei32_v \ + vamoorei32_v \ + vamoxorei32_v \ + vamoswapei64_v \ + vamoaddei64_v \ + vamoandei64_v \ + vamomaxei64_v \ + vamomaxuei64_v \ + vamominei64_v \ + vamominuei64_v \ + vamoorei64_v \ + vamoxorei64_v \ + +riscv_insn_ext_v_ldst = \ + vle8_v \ + vle16_v \ + vle32_v \ + vle64_v \ + vlse8_v \ + vlse16_v \ + vlse32_v \ + vlse64_v \ + vlxei8_v \ + vlxei16_v \ + vlxei32_v \ + vlxei64_v \ + vle8ff_v \ + vle16ff_v \ + vle32ff_v \ + vle64ff_v \ + vl1re8_v \ + vl2re8_v \ + vl4re8_v \ + vl8re8_v \ + vl1re16_v \ + vl2re16_v \ + vl4re16_v \ + vl8re16_v \ + vl1re32_v \ + vl2re32_v \ + vl4re32_v \ + vl8re32_v \ + vl1re64_v \ + vl2re64_v \ + vl4re64_v \ + vl8re64_v \ + vse8_v \ + vse16_v \ + vse32_v \ + vse64_v \ + vsse8_v \ + vsse16_v \ + vsse32_v \ + vsse64_v \ + vsxei8_v \ + vsxei16_v \ + vsxei32_v \ + vsxei64_v \ + vsuxei8_v \ + vsuxei16_v \ + vsuxei32_v \ + vsuxei64_v \ + vs1r_v \ + vs2r_v \ + vs4r_v \ + vs8r_v \ + +# Disabled riscv_insn_ext_v_ctrl instructions for opcode overlap: +#vsetvl + +riscv_insn_ext_v_ctrl = \ + vsetvli \ + +riscv_insn_ext_v = \ + $(riscv_insn_ext_v_alu_fp) \ + $(riscv_insn_ext_v_alu_int) \ + $(riscv_insn_ext_v_amo) \ + $(riscv_insn_ext_v_ctrl) \ + $(riscv_insn_ext_v_ldst) \ + +riscv_insn_ext_xpulpimg = \ + p_lb_irpost \ + p_lbu_irpost \ + p_lh_irpost \ + p_lhu_irpost \ + p_lw_irpost \ + p_lb_rrpost \ + p_lbu_rrpost \ + p_lh_rrpost \ + p_lhu_rrpost \ + p_lw_rrpost \ + p_lb_rr \ + p_lbu_rr \ + p_lh_rr \ + p_lhu_rr \ + p_lw_rr \ + p_sb_irpost \ + p_sh_irpost \ + p_sw_irpost \ + p_sb_rrpost \ + p_sh_rrpost \ + p_sw_rrpost \ + p_sb_rr \ + p_sh_rr \ + p_sw_rr \ + p_abs \ + p_slet \ + p_sletu \ + p_max \ + p_maxu \ + p_min \ + p_minu \ + p_exths \ + p_exthz \ + p_extbs \ + p_extbz \ + p_clip \ + p_clipu \ + p_clipr \ + p_clipur \ + p_beqimm \ + p_bneimm \ + p_mac \ + p_msu \ + pv_add_h \ + pv_add_sc_h \ + pv_add_sci_h \ + pv_add_b \ + pv_add_sc_b \ + pv_add_sci_b \ + pv_sub_h \ + pv_sub_sc_h \ + pv_sub_sci_h \ + pv_sub_b \ + pv_sub_sc_b \ + pv_sub_sci_b \ + pv_avg_h \ + pv_avg_sc_h \ + pv_avg_sci_h \ + pv_avg_b \ + pv_avg_sc_b \ + pv_avg_sci_b \ + pv_avgu_h \ + pv_avgu_sc_h \ + pv_avgu_sci_h \ + pv_avgu_b \ + pv_avgu_sc_b \ + pv_avgu_sci_b \ + pv_min_h \ + pv_min_sc_h \ + pv_min_sci_h \ + pv_min_b \ + pv_min_sc_b \ + pv_min_sci_b \ + pv_minu_h \ + pv_minu_sc_h \ + pv_minu_sci_h \ + pv_minu_b \ + pv_minu_sc_b \ + pv_minu_sci_b \ + pv_max_h \ + pv_max_sc_h \ + pv_max_sci_h \ + pv_max_b \ + pv_max_sc_b \ + pv_max_sci_b \ + pv_maxu_h \ + pv_maxu_sc_h \ + pv_maxu_sci_h \ + pv_maxu_b \ + pv_maxu_sc_b \ + pv_maxu_sci_b \ + pv_srl_h \ + pv_srl_sc_h \ + pv_srl_sci_h \ + pv_srl_b \ + pv_srl_sc_b \ + pv_srl_sci_b \ + pv_sra_h \ + pv_sra_sc_h \ + pv_sra_sci_h \ + pv_sra_b \ + pv_sra_sc_b \ + pv_sra_sci_b \ + pv_sll_h \ + pv_sll_sc_h \ + pv_sll_sci_h \ + pv_sll_b \ + pv_sll_sc_b \ + pv_sll_sci_b \ + pv_or_h \ + pv_or_sc_h \ + pv_or_sci_h \ + pv_or_b \ + pv_or_sc_b \ + pv_or_sci_b \ + pv_xor_h \ + pv_xor_sc_h \ + pv_xor_sci_h \ + pv_xor_b \ + pv_xor_sc_b \ + pv_xor_sci_b \ + pv_and_h \ + pv_and_sc_h \ + pv_and_sci_h \ + pv_and_b \ + pv_and_sc_b \ + pv_and_sci_b \ + pv_abs_h \ + pv_abs_b \ + pv_extract_h \ + pv_extract_b \ + pv_extractu_h \ + pv_extractu_b \ + pv_insert_h \ + pv_insert_b \ + pv_dotup_h \ + pv_dotup_sc_h \ + pv_dotup_sci_h \ + pv_dotup_b \ + pv_dotup_sc_b \ + pv_dotup_sci_b \ + pv_dotusp_h \ + pv_dotusp_sc_h \ + pv_dotusp_sci_h \ + pv_dotusp_b \ + pv_dotusp_sc_b \ + pv_dotusp_sci_b \ + pv_dotsp_h \ + pv_dotsp_sc_h \ + pv_dotsp_sci_h \ + pv_dotsp_b \ + pv_dotsp_sc_b \ + pv_dotsp_sci_b \ + pv_sdotup_h \ + pv_sdotup_sc_h \ + pv_sdotup_sci_h \ + pv_sdotup_b \ + pv_sdotup_sc_b \ + pv_sdotup_sci_b \ + pv_sdotusp_h \ + pv_sdotusp_sc_h \ + pv_sdotusp_sci_h \ + pv_sdotusp_b \ + pv_sdotusp_sc_b \ + pv_sdotusp_sci_b \ + pv_sdotsp_h \ + pv_sdotsp_sc_h \ + pv_sdotsp_sci_h \ + pv_sdotsp_b \ + pv_sdotsp_sc_b \ + pv_sdotsp_sci_b \ + pv_shuffle2_h \ + pv_shuffle2_b \ + +riscv_insn_ext_h = \ + hfence_gvma \ + hfence_vvma \ + hlv_b \ + hlv_bu \ + hlv_h \ + hlv_hu \ + hlvx_hu \ + hlv_w \ + hlv_wu \ + hlvx_wu \ + hlv_d \ + hsv_b \ + hsv_h \ + hsv_w \ + hsv_d \ + +riscv_insn_priv = \ + csrrc \ + csrrci \ + csrrs \ + csrrsi \ + csrrw \ + csrrwi \ + dret \ + ebreak \ + ecall \ mret \ - mul \ - mulh \ - mulhsu \ - mulhu \ - mulw \ - or \ - ori \ - rem \ - remu \ - remuw \ - remw \ - sb \ - sc_d \ - sc_w \ - sd \ sfence_vma \ - sh \ - sll \ - slli \ - slliw \ - sllw \ - slt \ - slti \ - sltiu \ - sltu \ - sra \ - srai \ - sraiw \ - sraw \ sret \ - srl \ - srli \ - srliw \ - srlw \ - sub \ - subw \ - sw \ wfi \ - xor \ - xori \ + + +riscv_insn_list = \ + $(riscv_insn_ext_a) \ + $(riscv_insn_ext_c) \ + $(riscv_insn_ext_i) \ + $(riscv_insn_ext_m) \ + $(riscv_insn_ext_f) \ + $(riscv_insn_ext_d) \ + $(riscv_insn_ext_zfh) \ + $(riscv_insn_ext_q) \ + $(riscv_insn_ext_xpulpimg) \ + $(riscv_insn_ext_h) \ + $(riscv_insn_priv) \ + # $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ riscv_gen_srcs = \ $(addsuffix .cc,$(riscv_insn_list)) diff --git a/riscv/sim.cc b/riscv/sim.cc index 6725e93536..76bb3cdff0 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -1,10 +1,11 @@ // See LICENSE for license details. #include "sim.h" -#include "ust_tracer.h" #include "mmu.h" #include "dts.h" #include "remote_bitbang.h" +#include "byteorder.h" +#include #include #include #include @@ -25,47 +26,83 @@ static void handle_signal(int sig) signal(sig, &handle_signal); } -sim_t::sim_t(const char* isa, size_t nprocs, bool halted, reg_t start_pc, - std::vector> mems, +sim_t::sim_t(const char* isa, const char* priv, const char* varch, + size_t nprocs, bool halted, bool real_time_clint, + reg_t initrd_start, reg_t initrd_end, const char* bootargs, + reg_t start_pc, std::vector> mems, + std::vector> plugin_devices, const std::vector& args, - std::vector const hartids, unsigned progsize, - unsigned max_bus_master_bits, bool require_authentication) - : htif_t(args), mems(mems), procs(std::max(nprocs, size_t(1))), - start_pc(start_pc), current_step(0), current_proc(0), debug(false), trace(false), - histogram_enabled(false), dtb_enabled(true), remote_bitbang(NULL), - debug_module(this, progsize, max_bus_master_bits, require_authentication) + std::vector const hartids, + const debug_module_config_t &dm_config, + const char *log_path, + bool dtb_enabled, const char *dtb_file) + : htif_t(args), + mems(mems), + plugin_devices(plugin_devices), + procs(std::max(nprocs, size_t(1))), + initrd_start(initrd_start), + initrd_end(initrd_end), + bootargs(bootargs), + start_pc(start_pc), + dtb_file(dtb_file ? dtb_file : ""), + dtb_enabled(dtb_enabled), + log_file(log_path), + current_step(0), + current_proc(0), + debug(false), + histogram_enabled(false), + log(false), + remote_bitbang(NULL), + debug_module(this, dm_config) { signal(SIGINT, &handle_signal); for (auto& x : mems) bus.add_device(x.first, x.second); + for (auto& x : plugin_devices) + bus.add_device(x.first, x.second); + debug_module.add_device(&bus); debug_mmu = new mmu_t(this, NULL); - if (hartids.size() == 0) { - for (size_t i = 0; i < procs.size(); i++) { - procs[i] = new processor_t(isa, this, i, halted); - } - } - else { - if (hartids.size() != procs.size()) { - std::cerr << "Number of specified hartids doesn't match number of processors" << strerror(errno) << std::endl; + if (! (hartids.empty() || hartids.size() == nprocs)) { + std::cerr << "Number of specified hartids (" + << hartids.size() + << ") doesn't match number of processors (" + << nprocs << ").\n"; exit(1); - } - for (size_t i = 0; i < procs.size(); i++) { - procs[i] = new processor_t(isa, this, hartids[i], halted); - } } - clint.reset(new clint_t(procs)); - bus.add_device(CLINT_BASE, clint.get()); + for (size_t i = 0; i < nprocs; i++) { + int hart_id = hartids.empty() ? i : hartids[i]; + procs[i] = new processor_t(isa, priv, varch, this, hart_id, halted, + log_file.get()); + } + + make_dtb(); + + clint.reset(new clint_t(procs, CPU_HZ / INSNS_PER_RTC_TICK, real_time_clint)); + reg_t clint_base; + if (fdt_parse_clint((void *)dtb.c_str(), &clint_base, "riscv,clint0")) { + bus.add_device(CLINT_BASE, clint.get()); + } else { + bus.add_device(clint_base, clint.get()); + } + + for (size_t i = 0; i < nprocs; i++) { + reg_t pmp_num = 0, pmp_granularity = 0; + fdt_parse_pmp_num((void *)dtb.c_str(), &pmp_num, "riscv"); + fdt_parse_pmp_alignment((void *)dtb.c_str(), &pmp_granularity, "riscv"); + + procs[i]->set_pmp_num(pmp_num); + procs[i]->set_pmp_granularity(pmp_granularity); + } } sim_t::~sim_t() { - ust_close(); for (size_t i = 0; i < procs.size(); i++) delete procs[i]; delete debug_mmu; @@ -81,9 +118,6 @@ void sim_t::main() if (!debug && log) set_procs_debug(true); - if (!debug && trace) - set_procs_trace(true); - while (!done()) { if (debug || ctrlc_pressed) @@ -130,17 +164,6 @@ void sim_t::set_debug(bool value) debug = value; } -void sim_t::set_log(bool value) -{ - log = value; -} - -void sim_t::set_ust_trace(const char * const ust_file) -{ - ust_open(ust_file); - trace = true; -} - void sim_t::set_histogram(bool value) { histogram_enabled = value; @@ -149,33 +172,71 @@ void sim_t::set_histogram(bool value) } } +void sim_t::configure_log(bool enable_log, bool enable_commitlog) +{ + log = enable_log; + + if (!enable_commitlog) + return; + +#ifndef RISCV_ENABLE_COMMITLOG + fputs("Commit logging support has not been properly enabled; " + "please re-build the riscv-isa-sim project using " + "\"configure --enable-commitlog\".\n", + stderr); + abort(); +#else + for (processor_t *proc : procs) { + proc->enable_log_commits(); + } +#endif +} + void sim_t::set_procs_debug(bool value) { for (size_t i=0; i< procs.size(); i++) procs[i]->set_debug(value); } -void sim_t::set_procs_trace(bool value) +static bool paddr_ok(reg_t addr) { - for (size_t i=0; i< procs.size(); i++) - procs[i]->set_trace(value); + return (addr >> MAX_PADDR_BITS) == 0; } bool sim_t::mmio_load(reg_t addr, size_t len, uint8_t* bytes) { - if (addr + len < addr) + if (addr + len < addr || !paddr_ok(addr + len - 1)) return false; return bus.load(addr, len, bytes); } bool sim_t::mmio_store(reg_t addr, size_t len, const uint8_t* bytes) { - if (addr + len < addr) + if (addr + len < addr || !paddr_ok(addr + len - 1)) return false; return bus.store(addr, len, bytes); } void sim_t::make_dtb() +{ + if (!dtb_file.empty()) { + std::ifstream fin(dtb_file.c_str(), std::ios::binary); + if (!fin.good()) { + std::cerr << "can't find dtb file: " << dtb_file << std::endl; + exit(-1); + } + + std::stringstream strstream; + strstream << fin.rdbuf(); + + dtb = strstream.str(); + } else { + dts = make_dts(INSNS_PER_RTC_TICK, CPU_HZ, initrd_start, initrd_end, bootargs, procs, mems); + dtb = dts_compile(dts); + } +} + +void sim_t::set_rom() { const int reset_vec_size = 8; @@ -193,11 +254,27 @@ void sim_t::make_dtb() (uint32_t) (start_pc & 0xffffffff), (uint32_t) (start_pc >> 32) }; + for(int i = 0; i < reset_vec_size; i++) + reset_vec[i] = to_le(reset_vec[i]); std::vector rom((char*)reset_vec, (char*)reset_vec + sizeof(reset_vec)); - dts = make_dts(INSNS_PER_RTC_TICK, CPU_HZ, procs, mems); - std::string dtb = dts_compile(dts); + std::string dtb; + if (!dtb_file.empty()) { + std::ifstream fin(dtb_file.c_str(), std::ios::binary); + if (!fin.good()) { + std::cerr << "can't find dtb file: " << dtb_file << std::endl; + exit(-1); + } + + std::stringstream strstream; + strstream << fin.rdbuf(); + + dtb = strstream.str(); + } else { + dts = make_dts(INSNS_PER_RTC_TICK, CPU_HZ, initrd_start, initrd_end, bootargs, procs, mems); + dtb = dts_compile(dts); + } rom.insert(rom.end(), dtb.begin(), dtb.end()); const int align = 0x1000; @@ -208,6 +285,8 @@ void sim_t::make_dtb() } char* sim_t::addr_to_mem(reg_t addr) { + if (!paddr_ok(addr)) + return NULL; auto desc = bus.find_device(addr); if (auto mem = dynamic_cast(desc.second)) if (addr - desc.first < mem->size()) @@ -215,12 +294,17 @@ char* sim_t::addr_to_mem(reg_t addr) { return NULL; } +const char* sim_t::get_symbol(uint64_t addr) +{ + return htif_t::get_symbol(addr); +} + // htif void sim_t::reset() { if (dtb_enabled) - make_dtb(); + set_rom(); } void sim_t::idle() @@ -231,7 +315,7 @@ void sim_t::idle() void sim_t::read_chunk(addr_t taddr, size_t len, void* dst) { assert(len == 8); - auto data = debug_mmu->load_uint64(taddr); + auto data = to_le(debug_mmu->load_uint64(taddr)); memcpy(dst, &data, sizeof data); } @@ -240,7 +324,7 @@ void sim_t::write_chunk(addr_t taddr, size_t len, const void* src) assert(len == 8); uint64_t data; memcpy(&data, src, sizeof data); - debug_mmu->store_uint64(taddr, data); + debug_mmu->store_uint64(taddr, from_le(data)); } void sim_t::proc_reset(unsigned id) diff --git a/riscv/sim.h b/riscv/sim.h index 90fbc97b8d..c7e3de4f71 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -3,15 +3,18 @@ #ifndef _RISCV_SIM_H #define _RISCV_SIM_H -#include "processor.h" -#include "devices.h" #include "debug_module.h" +#include "devices.h" +#include "log_file.h" +#include "processor.h" #include "simif.h" + #include #include #include #include #include +#include class mmu_t; class remote_bitbang_t; @@ -20,23 +23,30 @@ class remote_bitbang_t; class sim_t : public htif_t, public simif_t { public: - sim_t(const char* isa, size_t _nprocs, bool halted, reg_t start_pc, - std::vector> mems, + sim_t(const char* isa, const char* priv, const char* varch, size_t _nprocs, + bool halted, bool real_time_clint, + reg_t initrd_start, reg_t initrd_end, const char* bootargs, + reg_t start_pc, std::vector> mems, + std::vector> plugin_devices, const std::vector& args, const std::vector hartids, - unsigned progsize, unsigned max_bus_master_bits, bool require_authentication); + const debug_module_config_t &dm_config, const char *log_path, + bool dtb_enabled, const char *dtb_file); ~sim_t(); // run the simulation to completion int run(); void set_debug(bool value); - void set_log(bool value); - void set_ust_trace(const char * const ust_file); void set_histogram(bool value); + + // Configure logging + // + // If enable_log is true, an instruction trace will be generated. If + // enable_commitlog is true, so will the commit results (if this + // build was configured without support for commit logging, the + // function will print an error message and abort). + void configure_log(bool enable_log, bool enable_commitlog); + void set_procs_debug(bool value); - void set_procs_trace(bool value); - void set_dtb_enabled(bool value) { - this->dtb_enabled = value; - } void set_remote_bitbang(remote_bitbang_t* remote_bitbang) { this->remote_bitbang = remote_bitbang; } @@ -49,13 +59,21 @@ class sim_t : public htif_t, public simif_t private: std::vector> mems; + std::vector> plugin_devices; mmu_t* debug_mmu; // debug port into main memory std::vector procs; + reg_t initrd_start; + reg_t initrd_end; + const char* bootargs; reg_t start_pc; std::string dts; + std::string dtb; + std::string dtb_file; + bool dtb_enabled; std::unique_ptr boot_rom; std::unique_ptr clint; bus_t bus; + log_file_t log_file; processor_t* get_core(const std::string& i); void step(size_t n); // step through simulation @@ -65,10 +83,8 @@ class sim_t : public htif_t, public simif_t size_t current_step; size_t current_proc; bool debug; - bool log; - bool trace; bool histogram_enabled; // provide a histogram of PCs - bool dtb_enabled; + bool log; remote_bitbang_t* remote_bitbang; // memory-mapped I/O routines @@ -76,6 +92,9 @@ class sim_t : public htif_t, public simif_t bool mmio_load(reg_t addr, size_t len, uint8_t* bytes); bool mmio_store(reg_t addr, size_t len, const uint8_t* bytes); void make_dtb(); + void set_rom(); + + const char* get_symbol(uint64_t addr); // presents a prompt for introspection into the simulation void interactive(); @@ -86,8 +105,10 @@ class sim_t : public htif_t, public simif_t void interactive_run(const std::string& cmd, const std::vector& args, bool noisy); void interactive_run_noisy(const std::string& cmd, const std::vector& args); void interactive_run_silent(const std::string& cmd, const std::vector& args); + void interactive_vreg(const std::string& cmd, const std::vector& args); void interactive_reg(const std::string& cmd, const std::vector& args); void interactive_freg(const std::string& cmd, const std::vector& args); + void interactive_fregh(const std::string& cmd, const std::vector& args); void interactive_fregs(const std::string& cmd, const std::vector& args); void interactive_fregd(const std::string& cmd, const std::vector& args); void interactive_pc(const std::string& cmd, const std::vector& args); diff --git a/riscv/simif.h b/riscv/simif.h index 1d982b3396..0e75d45b16 100644 --- a/riscv/simif.h +++ b/riscv/simif.h @@ -16,6 +16,9 @@ class simif_t virtual bool mmio_store(reg_t addr, size_t len, const uint8_t* bytes) = 0; // Callback for processors to let the simulation know they were reset. virtual void proc_reset(unsigned id) = 0; + + virtual const char* get_symbol(uint64_t addr) = 0; + }; #endif diff --git a/riscv/trap.h b/riscv/trap.h index b5b8a5080a..4431d8a94e 100644 --- a/riscv/trap.h +++ b/riscv/trap.h @@ -13,18 +13,23 @@ class trap_t public: trap_t(reg_t which) : which(which) {} virtual const char* name(); + virtual bool has_gva() { return false; } virtual bool has_tval() { return false; } virtual reg_t get_tval() { return 0; } + virtual bool has_tval2() { return false; } + virtual reg_t get_tval2() { return 0; } + virtual bool has_tinst() { return false; } + virtual reg_t get_tinst() { return 0; } reg_t cause() { return which; } private: char _name[16]; reg_t which; }; -class mem_trap_t : public trap_t +class insn_trap_t : public trap_t { public: - mem_trap_t(reg_t which, reg_t tval) + insn_trap_t(reg_t which, reg_t tval) : trap_t(which), tval(tval) {} bool has_tval() override { return true; } reg_t get_tval() override { return tval; } @@ -32,32 +37,59 @@ class mem_trap_t : public trap_t reg_t tval; }; +class mem_trap_t : public trap_t +{ + public: + mem_trap_t(reg_t which, bool gva, reg_t tval, reg_t tval2, reg_t tinst) + : trap_t(which), gva(gva), tval(tval), tval2(tval2), tinst(tinst) {} + bool has_gva() override { return gva; } + bool has_tval() override { return true; } + reg_t get_tval() override { return tval; } + bool has_tval2() override { return true; } + reg_t get_tval2() override { return tval2; } + bool has_tinst() override { return true; } + reg_t get_tinst() override { return tinst; } + private: + bool gva; + reg_t tval, tval2, tinst; +}; + #define DECLARE_TRAP(n, x) class trap_##x : public trap_t { \ public: \ trap_##x() : trap_t(n) {} \ const char* name() { return "trap_"#x; } \ }; +#define DECLARE_INST_TRAP(n, x) class trap_##x : public insn_trap_t { \ + public: \ + trap_##x(reg_t tval) : insn_trap_t(n, tval) {} \ + const char* name() { return "trap_"#x; } \ +}; + #define DECLARE_MEM_TRAP(n, x) class trap_##x : public mem_trap_t { \ public: \ - trap_##x(reg_t tval) : mem_trap_t(n, tval) {} \ + trap_##x(reg_t tval, reg_t tval2, reg_t tinst) : mem_trap_t(n, true, tval, tval2, tinst) {} \ const char* name() { return "trap_"#x; } \ }; DECLARE_MEM_TRAP(CAUSE_MISALIGNED_FETCH, instruction_address_misaligned) DECLARE_MEM_TRAP(CAUSE_FETCH_ACCESS, instruction_access_fault) -DECLARE_MEM_TRAP(CAUSE_ILLEGAL_INSTRUCTION, illegal_instruction) -DECLARE_MEM_TRAP(CAUSE_BREAKPOINT, breakpoint) +DECLARE_INST_TRAP(CAUSE_ILLEGAL_INSTRUCTION, illegal_instruction) +DECLARE_INST_TRAP(CAUSE_BREAKPOINT, breakpoint) DECLARE_MEM_TRAP(CAUSE_MISALIGNED_LOAD, load_address_misaligned) DECLARE_MEM_TRAP(CAUSE_MISALIGNED_STORE, store_address_misaligned) DECLARE_MEM_TRAP(CAUSE_LOAD_ACCESS, load_access_fault) DECLARE_MEM_TRAP(CAUSE_STORE_ACCESS, store_access_fault) DECLARE_TRAP(CAUSE_USER_ECALL, user_ecall) DECLARE_TRAP(CAUSE_SUPERVISOR_ECALL, supervisor_ecall) -DECLARE_TRAP(CAUSE_HYPERVISOR_ECALL, hypervisor_ecall) +DECLARE_TRAP(CAUSE_VIRTUAL_SUPERVISOR_ECALL, virtual_supervisor_ecall) DECLARE_TRAP(CAUSE_MACHINE_ECALL, machine_ecall) DECLARE_MEM_TRAP(CAUSE_FETCH_PAGE_FAULT, instruction_page_fault) DECLARE_MEM_TRAP(CAUSE_LOAD_PAGE_FAULT, load_page_fault) DECLARE_MEM_TRAP(CAUSE_STORE_PAGE_FAULT, store_page_fault) +DECLARE_MEM_TRAP(CAUSE_FETCH_GUEST_PAGE_FAULT, instruction_guest_page_fault) +DECLARE_MEM_TRAP(CAUSE_LOAD_GUEST_PAGE_FAULT, load_guest_page_fault) +DECLARE_INST_TRAP(CAUSE_VIRTUAL_INSTRUCTION, virtual_instruction) +DECLARE_MEM_TRAP(CAUSE_STORE_GUEST_PAGE_FAULT, store_guest_page_fault) #endif diff --git a/riscv/ust_tracer.cc b/riscv/ust_tracer.cc deleted file mode 100644 index fb68d02dd6..0000000000 --- a/riscv/ust_tracer.cc +++ /dev/null @@ -1,74 +0,0 @@ -// See LICENSE for license details. - -#include "ust_tracer.h" -#include "trap.h" - -#include - -static reg_t s_addr = 0; -static uint64_t s_insn = 0; -static uint8_t s_prv = 0; -static uint8_t s_ex = 0; -static reg_t s_ex_cause = 0; -static reg_t s_tval = 0; -static uint8_t s_interrupt = 0; -static FILE* s_trace = NULL; -static bool has_output_header = false; - -void ust_open(const char *filename) -{ - if(!filename) - return; - s_trace = fopen(filename, "w"); - if (!s_trace) - fprintf(stderr, "Failed to open ust trace file %s: %s", - filename, strerror(errno)); -} - -void ust_step(void) -{ - if (!s_trace) - return; - - if (has_output_header) { - fprintf(s_trace, "1,%lx,%lx,%x,%x,%lx,%lx,%x\n", - s_addr, s_insn, s_prv, s_ex, s_ex_cause, s_tval, s_interrupt); - } else { - fprintf(s_trace, "VALID,ADDRESS,INSN,PRIVILEGE,EXCEPTION,ECAUSE,TVAL,INTERRUPT\n"); - has_output_header = true; - } - - s_ex = 0; -} - -void ust_close(void) -{ - if (s_trace) - fclose(s_trace); - s_trace = NULL; -} - -void ust_set_addr(reg_t addr) { - s_addr = addr; -} - -void ust_set_insn(uint64_t insn) { - s_insn = insn; -} - -void ust_set_priv(uint8_t prv) { - s_prv = prv; -} - -void ust_set_exception(reg_t cause) { - s_ex = 1; - s_ex_cause = cause; -} - -void ust_set_tval(reg_t tval) { - s_tval = tval; -} - -void ust_set_interrupt(uint8_t interrupt) { - s_interrupt = interrupt; -} diff --git a/riscv/ust_tracer.h b/riscv/ust_tracer.h deleted file mode 100644 index 7eb6fc479a..0000000000 --- a/riscv/ust_tracer.h +++ /dev/null @@ -1,19 +0,0 @@ -// See LICENSE for license details. - -#ifndef _RISCV_UST_TRACER_H -#define _RISCV_UST_TRACER_H - -#include "processor.h" - -void ust_open(const char *filename); -void ust_start(void); -void ust_step(void); -void ust_close(void); -void ust_set_addr(reg_t addr); -void ust_set_insn(uint64_t insn); -void ust_set_priv(uint8_t prv); -void ust_set_exception(reg_t cause); -void ust_set_tval(reg_t tval); -void ust_set_interrupt(uint8_t interrupt); - -#endif diff --git a/scripts/vcs-version.sh b/scripts/vcs-version.sh index 31fae86951..692c071ef7 100755 --- a/scripts/vcs-version.sh +++ b/scripts/vcs-version.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash #========================================================================= # vcs-version.sh [options] [src-dir] #========================================================================= diff --git a/softfloat/f16_classify.c b/softfloat/f16_classify.c new file mode 100755 index 0000000000..9402ff13e8 --- /dev/null +++ b/softfloat/f16_classify.c @@ -0,0 +1,36 @@ + +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f16_classify( float16_t a ) +{ + union ui16_f16 uA; + uint_fast16_t uiA; + + uA.f = a; + uiA = uA.ui; + + uint_fast16_t infOrNaN = expF16UI( uiA ) == 0x1F; + uint_fast16_t subnormalOrZero = expF16UI( uiA ) == 0; + bool sign = signF16UI( uiA ); + bool fracZero = fracF16UI( uiA ) == 0; + bool isNaN = isNaNF16UI( uiA ); + bool isSNaN = softfloat_isSigNaNF16UI( uiA ); + + return + ( sign && infOrNaN && fracZero ) << 0 | + ( sign && !infOrNaN && !subnormalOrZero ) << 1 | + ( sign && subnormalOrZero && !fracZero ) << 2 | + ( sign && subnormalOrZero && fracZero ) << 3 | + ( !sign && infOrNaN && fracZero ) << 7 | + ( !sign && !infOrNaN && !subnormalOrZero ) << 6 | + ( !sign && subnormalOrZero && !fracZero ) << 5 | + ( !sign && subnormalOrZero && fracZero ) << 4 | + ( isNaN && isSNaN ) << 8 | + ( isNaN && !isSNaN ) << 9; +} + diff --git a/softfloat/f16_to_i16.c b/softfloat/f16_to_i16.c new file mode 100644 index 0000000000..b0fbb7cc75 --- /dev/null +++ b/softfloat/f16_to_i16.c @@ -0,0 +1,57 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +int_fast16_t f16_to_i16( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + int_fast32_t sig32 = f16_to_i32(a, roundingMode, exact); + + if (sig32 > INT16_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i16_fromPosOverflow; + } else if (sig32 < INT16_MIN) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i16_fromNegOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f16_to_i8.c b/softfloat/f16_to_i8.c new file mode 100644 index 0000000000..23638cc102 --- /dev/null +++ b/softfloat/f16_to_i8.c @@ -0,0 +1,57 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +int_fast8_t f16_to_i8( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + int_fast32_t sig32 = f16_to_i32(a, roundingMode, exact); + + if (sig32 > INT8_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i8_fromPosOverflow; + } else if (sig32 < INT8_MIN) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i8_fromNegOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f16_to_ui16.c b/softfloat/f16_to_ui16.c new file mode 100644 index 0000000000..81c4f8d9e0 --- /dev/null +++ b/softfloat/f16_to_ui16.c @@ -0,0 +1,54 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f16_to_ui16( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + uint_fast32_t sig32 = f16_to_ui32(a, roundingMode, exact); + + if (sig32 > UINT16_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return ui16_fromPosOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f16_to_ui8.c b/softfloat/f16_to_ui8.c new file mode 100644 index 0000000000..96124e1275 --- /dev/null +++ b/softfloat/f16_to_ui8.c @@ -0,0 +1,54 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +uint_fast8_t f16_to_ui8( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + uint_fast32_t sig32 = f16_to_ui32(a, roundingMode, exact); + + if (sig32 > UINT8_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return ui8_fromPosOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f32_to_i16.c b/softfloat/f32_to_i16.c new file mode 100644 index 0000000000..bde4c76c9a --- /dev/null +++ b/softfloat/f32_to_i16.c @@ -0,0 +1,57 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +int_fast16_t f32_to_i16( float32_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + int_fast32_t sig32 = f32_to_i32(a, roundingMode, exact); + + if (sig32 > INT16_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i16_fromPosOverflow; + } else if (sig32 < INT16_MIN) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i16_fromNegOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f32_to_ui16.c b/softfloat/f32_to_ui16.c new file mode 100644 index 0000000000..073492bfaa --- /dev/null +++ b/softfloat/f32_to_ui16.c @@ -0,0 +1,53 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f32_to_ui16( float32_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + uint_fast32_t sig32 = f32_to_ui32(a, roundingMode, exact); + + if (sig32 > UINT16_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return ui16_fromPosOverflow; + } else { + return sig32; + } +} diff --git a/softfloat/fall_maxmin.c b/softfloat/fall_maxmin.c new file mode 100644 index 0000000000..32a9ade59e --- /dev/null +++ b/softfloat/fall_maxmin.c @@ -0,0 +1,81 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +#define COMPARE_MAX(a, b, bits) \ +float ## bits ## _t f ## bits ## _max( float ## bits ## _t a, float ## bits ## _t b ) \ +{ \ + bool greater = f ## bits ## _lt_quiet(b, a) || \ + (f ## bits ## _eq(b, a) && signF ## bits ## UI(b.v)); \ + \ + if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) { \ + union ui ## bits ## _f ## bits ui; \ + ui.ui = defaultNaNF ## bits ## UI; \ + return ui.f; \ + } else { \ + return greater || isNaNF ## bits ## UI((b).v) ? a : b; \ + } \ +} + +#define COMPARE_MIN(a, b, bits) \ +float ## bits ## _t f ## bits ## _min( float ## bits ## _t a, float ## bits ## _t b ) \ +{ \ + bool less = f ## bits ## _lt_quiet(a, b) || \ + (f ## bits ## _eq(a, b) && signF ## bits ## UI(a.v)); \ + \ + if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) { \ + union ui ## bits ## _f ## bits ui; \ + ui.ui = defaultNaNF ## bits ## UI; \ + return ui.f; \ + } else { \ + return less || isNaNF ## bits ## UI((b).v) ? a : b; \ + } \ +} + +COMPARE_MAX(a, b, 16); +COMPARE_MAX(a, b, 32); +COMPARE_MAX(a, b, 64); + +COMPARE_MIN(a, b, 16); +COMPARE_MIN(a, b, 32); +COMPARE_MIN(a, b, 64); diff --git a/softfloat/fall_reciprocal.c b/softfloat/fall_reciprocal.c new file mode 100644 index 0000000000..1c96458935 --- /dev/null +++ b/softfloat/fall_reciprocal.c @@ -0,0 +1,392 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +static inline uint64_t extract64(uint64_t val, int pos, int len) +{ + assert(pos >= 0 && len > 0 && len <= 64 - pos); + return (val >> pos) & (~UINT64_C(0) >> (64 - len)); +} + +static inline uint64_t make_mask64(int pos, int len) +{ + assert(pos >= 0 && len > 0 && pos < 64 && len <= 64); + return (UINT64_MAX >> (64 - len)) << pos; +} + +//user needs to truncate output to required length +static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) { + uint64_t exp = extract64(val, s, e); + uint64_t sig = extract64(val, 0, s); + uint64_t sign = extract64(val, s + e, 1); + const int p = 7; + + static const uint8_t table[] = { + 52, 51, 50, 48, 47, 46, 44, 43, + 42, 41, 40, 39, 38, 36, 35, 34, + 33, 32, 31, 30, 30, 29, 28, 27, + 26, 25, 24, 23, 23, 22, 21, 20, + 19, 19, 18, 17, 16, 16, 15, 14, + 14, 13, 12, 12, 11, 10, 10, 9, + 9, 8, 7, 7, 6, 6, 5, 4, + 4, 3, 3, 2, 2, 1, 1, 0, + 127, 125, 123, 121, 119, 118, 116, 114, + 113, 111, 109, 108, 106, 105, 103, 102, + 100, 99, 97, 96, 95, 93, 92, 91, + 90, 88, 87, 86, 85, 84, 83, 82, + 80, 79, 78, 77, 76, 75, 74, 73, + 72, 71, 70, 70, 69, 68, 67, 66, + 65, 64, 63, 63, 62, 61, 60, 59, + 59, 58, 57, 56, 56, 55, 54, 53}; + + if (sub) { + while (extract64(sig, s - 1, 1) == 0) + exp--, sig <<= 1; + + sig = (sig << 1) & make_mask64(0 ,s); + } + + int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1)); + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); + uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2; + + return (sign << (s+e)) | (out_exp << s) | out_sig; +} + +float16_t f16_rsqrte7(float16_t in) +{ + union ui16_f16 uA; + + uA.f = in; + unsigned int ret = f16_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF16UI; + break; + case 0x008: // -0 + uA.ui = 0xfc00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7c00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + default: // +num + uA.ui = rsqrte7(uA.ui, 5, 10, sub); + break; + } + + return uA.f; +} + +float32_t f32_rsqrte7(float32_t in) +{ + union ui32_f32 uA; + + uA.f = in; + unsigned int ret = f32_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF32UI; + break; + case 0x008: // -0 + uA.ui = 0xff800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7f800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + default: // +num + uA.ui = rsqrte7(uA.ui, 8, 23, sub); + break; + } + + return uA.f; +} + +float64_t f64_rsqrte7(float64_t in) +{ + union ui64_f64 uA; + + uA.f = in; + unsigned int ret = f64_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF64UI; + break; + case 0x008: // -0 + uA.ui = 0xfff0000000000000ul; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7ff0000000000000ul; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + default: // +num + uA.ui = rsqrte7(uA.ui, 11, 52, sub); + break; + } + + return uA.f; +} + +//user needs to truncate output to required length +static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub, + bool *round_abnormal) +{ + uint64_t exp = extract64(val, s, e); + uint64_t sig = extract64(val, 0, s); + uint64_t sign = extract64(val, s + e, 1); + const int p = 7; + + static const uint8_t table[] = { + 127, 125, 123, 121, 119, 117, 116, 114, + 112, 110, 109, 107, 105, 104, 102, 100, + 99, 97, 96, 94, 93, 91, 90, 88, + 87, 85, 84, 83, 81, 80, 79, 77, + 76, 75, 74, 72, 71, 70, 69, 68, + 66, 65, 64, 63, 62, 61, 60, 59, + 58, 57, 56, 55, 54, 53, 52, 51, + 50, 49, 48, 47, 46, 45, 44, 43, + 42, 41, 40, 40, 39, 38, 37, 36, + 35, 35, 34, 33, 32, 31, 31, 30, + 29, 28, 28, 27, 26, 25, 25, 24, + 23, 23, 22, 21, 21, 20, 19, 19, + 18, 17, 17, 16, 15, 15, 14, 14, + 13, 12, 12, 11, 11, 10, 9, 9, + 8, 8, 7, 7, 6, 5, 5, 4, + 4, 3, 3, 2, 2, 1, 1, 0}; + + if (sub) { + while (extract64(sig, s - 1, 1) == 0) + exp--, sig <<= 1; + + sig = (sig << 1) & make_mask64(0 ,s); + + if (exp != 0 && exp != UINT64_MAX) { + *round_abnormal = true; + if (rm == 1 || + (rm == 2 && !sign) || + (rm == 3 && sign)) + return ((sign << (s+e)) | make_mask64(s, e)) - 1; + else + return (sign << (s+e)) | make_mask64(s, e); + } + } + + int idx = sig >> (s-p); + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); + uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp; + if (out_exp == 0 || out_exp == UINT64_MAX) { + out_sig = (out_sig >> 1) | make_mask64(s - 1, 1); + if (out_exp == UINT64_MAX) { + out_sig >>= 1; + out_exp = 0; + } + } + + return (sign << (s+e)) | (out_exp << s) | out_sig; +} + +float16_t f16_recip7(float16_t in) +{ + union ui16_f16 uA; + + uA.f = in; + unsigned int ret = f16_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x8000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xfc00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7c00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF16UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + default: // +- normal + uA.ui = recip7(uA.ui, 5, 10, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} + +float32_t f32_recip7(float32_t in) +{ + union ui32_f32 uA; + + uA.f = in; + unsigned int ret = f32_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x80000000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xff800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7f800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF32UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + default: // +- normal + uA.ui = recip7(uA.ui, 8, 23, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} + +float64_t f64_recip7(float64_t in) +{ + union ui64_f64 uA; + + uA.f = in; + unsigned int ret = f64_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x8000000000000000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xfff0000000000000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7ff0000000000000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF64UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + default: // +- normal + uA.ui = recip7(uA.ui, 11, 52, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} diff --git a/softfloat/platform.h b/softfloat/platform.h index 03dd429faf..55de1941a7 100644 --- a/softfloat/platform.h +++ b/softfloat/platform.h @@ -36,11 +36,15 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ +#include "config.h" +#ifndef WORDS_BIGENDIAN #define LITTLEENDIAN 1 +#endif #define INLINE_LEVEL 5 #define SOFTFLOAT_FAST_INT64 #define SOFTFLOAT_FAST_DIV64TO32 +#define SOFTFLOAT_ROUND_ODD /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h index b277281ec2..bdac1be263 100644 --- a/softfloat/softfloat.h +++ b/softfloat/softfloat.h @@ -141,8 +141,12 @@ void i64_to_f128M( int64_t, float128_t * ); /*---------------------------------------------------------------------------- | 16-bit (half-precision) floating-point operations. *----------------------------------------------------------------------------*/ +uint_fast8_t f16_to_ui8( float16_t, uint_fast8_t, bool ); +uint_fast16_t f16_to_ui16( float16_t, uint_fast8_t, bool ); uint_fast32_t f16_to_ui32( float16_t, uint_fast8_t, bool ); uint_fast64_t f16_to_ui64( float16_t, uint_fast8_t, bool ); +int_fast8_t f16_to_i8( float16_t, uint_fast8_t, bool ); +int_fast16_t f16_to_i16( float16_t, uint_fast8_t, bool ); int_fast32_t f16_to_i32( float16_t, uint_fast8_t, bool ); int_fast64_t f16_to_i64( float16_t, uint_fast8_t, bool ); uint_fast32_t f16_to_ui32_r_minMag( float16_t, bool ); @@ -160,6 +164,8 @@ void f16_to_f128M( float16_t, float128_t * ); float16_t f16_roundToInt( float16_t, uint_fast8_t, bool ); float16_t f16_add( float16_t, float16_t ); float16_t f16_sub( float16_t, float16_t ); +float16_t f16_max( float16_t, float16_t ); +float16_t f16_min( float16_t, float16_t ); float16_t f16_mul( float16_t, float16_t ); float16_t f16_mulAdd( float16_t, float16_t, float16_t ); float16_t f16_div( float16_t, float16_t ); @@ -172,12 +178,17 @@ bool f16_eq_signaling( float16_t, float16_t ); bool f16_le_quiet( float16_t, float16_t ); bool f16_lt_quiet( float16_t, float16_t ); bool f16_isSignalingNaN( float16_t ); +uint_fast16_t f16_classify( float16_t ); +float16_t f16_rsqrte7( float16_t ); +float16_t f16_recip7( float16_t ); /*---------------------------------------------------------------------------- | 32-bit (single-precision) floating-point operations. *----------------------------------------------------------------------------*/ +uint_fast16_t f32_to_ui16( float32_t, uint_fast8_t, bool ); uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool ); uint_fast64_t f32_to_ui64( float32_t, uint_fast8_t, bool ); +int_fast16_t f32_to_i16( float32_t, uint_fast8_t, bool ); int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool ); int_fast64_t f32_to_i64( float32_t, uint_fast8_t, bool ); uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool ); @@ -195,6 +206,8 @@ void f32_to_f128M( float32_t, float128_t * ); float32_t f32_roundToInt( float32_t, uint_fast8_t, bool ); float32_t f32_add( float32_t, float32_t ); float32_t f32_sub( float32_t, float32_t ); +float32_t f32_max( float32_t, float32_t ); +float32_t f32_min( float32_t, float32_t ); float32_t f32_mul( float32_t, float32_t ); float32_t f32_mulAdd( float32_t, float32_t, float32_t ); float32_t f32_div( float32_t, float32_t ); @@ -208,6 +221,8 @@ bool f32_le_quiet( float32_t, float32_t ); bool f32_lt_quiet( float32_t, float32_t ); bool f32_isSignalingNaN( float32_t ); uint_fast16_t f32_classify( float32_t ); +float32_t f32_rsqrte7( float32_t ); +float32_t f32_recip7( float32_t ); /*---------------------------------------------------------------------------- | 64-bit (double-precision) floating-point operations. @@ -231,6 +246,8 @@ void f64_to_f128M( float64_t, float128_t * ); float64_t f64_roundToInt( float64_t, uint_fast8_t, bool ); float64_t f64_add( float64_t, float64_t ); float64_t f64_sub( float64_t, float64_t ); +float64_t f64_max( float64_t, float64_t ); +float64_t f64_min( float64_t, float64_t ); float64_t f64_mul( float64_t, float64_t ); float64_t f64_mulAdd( float64_t, float64_t, float64_t ); float64_t f64_div( float64_t, float64_t ); @@ -244,6 +261,8 @@ bool f64_le_quiet( float64_t, float64_t ); bool f64_lt_quiet( float64_t, float64_t ); bool f64_isSignalingNaN( float64_t ); uint_fast16_t f64_classify( float64_t ); +float64_t f64_rsqrte7( float64_t ); +float64_t f64_recip7( float64_t ); /*---------------------------------------------------------------------------- | Rounding precision for 80-bit extended double-precision floating-point. diff --git a/softfloat/softfloat.mk.in b/softfloat/softfloat.mk.in index ff7637b13e..07dca1618a 100644 --- a/softfloat/softfloat.mk.in +++ b/softfloat/softfloat.mk.in @@ -38,6 +38,7 @@ softfloat_c_srcs = \ f128_to_ui64.c \ f128_to_ui64_r_minMag.c \ f16_add.c \ + f16_classify.c \ f16_div.c \ f16_eq.c \ f16_eq_signaling.c \ @@ -55,10 +56,14 @@ softfloat_c_srcs = \ f16_to_f128.c \ f16_to_f32.c \ f16_to_f64.c \ + f16_to_i8.c \ + f16_to_i16.c \ f16_to_i32.c \ f16_to_i32_r_minMag.c \ f16_to_i64.c \ f16_to_i64_r_minMag.c \ + f16_to_ui8.c \ + f16_to_ui16.c \ f16_to_ui32.c \ f16_to_ui32_r_minMag.c \ f16_to_ui64.c \ @@ -82,10 +87,12 @@ softfloat_c_srcs = \ f32_to_f128.c \ f32_to_f16.c \ f32_to_f64.c \ + f32_to_i16.c \ f32_to_i32.c \ f32_to_i32_r_minMag.c \ f32_to_i64.c \ f32_to_i64_r_minMag.c \ + f32_to_ui16.c \ f32_to_ui32.c \ f32_to_ui32_r_minMag.c \ f32_to_ui64.c \ @@ -117,6 +124,8 @@ softfloat_c_srcs = \ f64_to_ui32_r_minMag.c \ f64_to_ui64.c \ f64_to_ui64_r_minMag.c \ + fall_maxmin.c \ + fall_reciprocal.c \ i32_to_f128.c \ i32_to_f16.c \ i32_to_f32.c \ @@ -225,6 +234,10 @@ softfloat_c_srcs = \ ui64_to_f32.c \ ui64_to_f64.c \ +softfloat_CFLAGS = -fPIC + +softfloat_install_shared_lib = yes + softfloat_test_srcs = softfloat_install_prog_srcs = diff --git a/softfloat/specialize.h b/softfloat/specialize.h index 629d5185b9..556476c1a5 100644 --- a/softfloat/specialize.h +++ b/softfloat/specialize.h @@ -55,6 +55,20 @@ extern "C" { | The values to return on conversions to 32-bit integer formats that raise an | invalid exception. *----------------------------------------------------------------------------*/ +#define ui8_fromPosOverflow 0xFF +#define ui8_fromNegOverflow 0 +#define ui8_fromNaN 0xFF +#define i8_fromPosOverflow 0x7F +#define i8_fromNegOverflow (-0x7F - 1) +#define i8_fromNaN 0x7F + +#define ui16_fromPosOverflow 0xFFFF +#define ui16_fromNegOverflow 0 +#define ui16_fromNaN 0xFFFF +#define i16_fromPosOverflow 0x7FFF +#define i16_fromNegOverflow (-0x7FFF - 1) +#define i16_fromNaN 0x7FFF + #define ui32_fromPosOverflow 0xFFFFFFFF #define ui32_fromNegOverflow 0 #define ui32_fromNaN 0xFFFFFFFF diff --git a/spike_main/spike-dasm.cc b/spike_dasm/spike-dasm.cc similarity index 70% rename from spike_main/spike-dasm.cc rename to spike_dasm/spike-dasm.cc index 1161825c2e..fa6a25ae6a 100644 --- a/spike_main/spike-dasm.cc +++ b/spike_dasm/spike-dasm.cc @@ -21,13 +21,32 @@ int main(int argc, char** argv) std::function extension; option_parser_t parser; +#ifdef HAVE_DLOPEN parser.option(0, "extension", 1, [&](const char* s){extension = find_extension(s);}); +#endif parser.option(0, "isa", 1, [&](const char* s){isa = s;}); parser.parse(argv); - processor_t p(isa, 0, 0); - if (extension) - p.register_extension(extension()); + std::string lowercase; + for (const char *p = isa; *p; p++) + lowercase += std::tolower(*p); + + int xlen; + if (lowercase.compare(0, 4, "rv32") == 0) { + xlen = 32; + } else if (lowercase.compare(0, 4, "rv64") == 0) { + xlen = 64; + } else { + fprintf(stderr, "bad ISA string: %s\n", isa); + return 1; + } + + disassembler_t* disassembler = new disassembler_t(xlen); + if (extension) { + for (auto disasm_insn : extension()->get_disasms()) { + disassembler->add_insn(disasm_insn); + } + } while (getline(cin, s)) { @@ -52,7 +71,7 @@ int main(int argc, char** argv) if (nbits < 64) bits = bits << (64 - nbits) >> (64 - nbits); - string dis = p.get_disassembler()->disassemble(bits); + string dis = disassembler->disassemble(bits); s = s.substr(0, start) + dis + s.substr(endp - &s[0] + 1); pos = start + dis.length(); } diff --git a/spike_dasm/spike_dasm.ac b/spike_dasm/spike_dasm.ac new file mode 100644 index 0000000000..e69de29bb2 diff --git a/spike_dasm/spike_dasm.mk.in b/spike_dasm/spike_dasm.mk.in new file mode 100644 index 0000000000..b6118fd5c6 --- /dev/null +++ b/spike_dasm/spike_dasm.mk.in @@ -0,0 +1,9 @@ +spike_dasm_subproject_deps = \ + disasm \ + $(if $(HAVE_DLOPEN),riscv,) \ + +spike_dasm_srcs = \ + spike_dasm_option_parser.cc \ + +spike_dasm_install_prog_srcs = \ + spike-dasm.cc \ diff --git a/spike_dasm/spike_dasm_option_parser.cc b/spike_dasm/spike_dasm_option_parser.cc new file mode 120000 index 0000000000..4244c15de1 --- /dev/null +++ b/spike_dasm/spike_dasm_option_parser.cc @@ -0,0 +1 @@ +../fesvr/option_parser.cc \ No newline at end of file diff --git a/spike_main/disasm.cc b/spike_main/disasm.cc deleted file mode 100644 index 81264dd802..0000000000 --- a/spike_main/disasm.cc +++ /dev/null @@ -1,648 +0,0 @@ -// See LICENSE for license details. - -#include "disasm.h" -#include -#include -#include -#include -#include - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.i_imm()) + '(' + xpr_name[insn.rs1()] + ')'; - } -} load_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.s_imm()) + '(' + xpr_name[insn.rs1()] + ')'; - } -} store_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::string("(") + xpr_name[insn.rs1()] + ')'; - } -} amo_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rd()]; - } -} xrd; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rs1()]; - } -} xrs1; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rs2()]; - } -} xrs2; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return fpr_name[insn.rd()]; - } -} frd; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return fpr_name[insn.rs1()]; - } -} frs1; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return fpr_name[insn.rs2()]; - } -} frs2; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return fpr_name[insn.rs3()]; - } -} frs3; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - switch (insn.csr()) - { - #define DECLARE_CSR(name, num) case num: return #name; - #include "encoding.h" - #undef DECLARE_CSR - default: - { - char buf[16]; - snprintf(buf, sizeof buf, "unknown_%03" PRIx64, insn.csr()); - return std::string(buf); - } - } - } -} csr; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.i_imm()); - } -} imm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.shamt()); - } -} shamt; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - std::stringstream s; - s << std::hex << "0x" << ((uint32_t)insn.u_imm() >> 12); - return s.str(); - } -} bigimm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string(insn.rs1()); - } -} zimm5; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - std::stringstream s; - int32_t target = insn.sb_imm(); - char sign = target >= 0 ? '+' : '-'; - s << "pc " << sign << ' ' << abs(target); - return s.str(); - } -} branch_target; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - std::stringstream s; - int32_t target = insn.uj_imm(); - char sign = target >= 0 ? '+' : '-'; - s << "pc " << sign << std::hex << " 0x" << abs(target); - return s.str(); - } -} jump_target; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rvc_rs1()]; - } -} rvc_rs1; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rvc_rs2()]; - } -} rvc_rs2; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return fpr_name[insn.rvc_rs2()]; - } -} rvc_fp_rs2; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rvc_rs1s()]; - } -} rvc_rs1s; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rvc_rs2s()]; - } -} rvc_rs2s; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return fpr_name[insn.rvc_rs2s()]; - } -} rvc_fp_rs2s; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[X_SP]; - } -} rvc_sp; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_imm()); - } -} rvc_imm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_addi4spn_imm()); - } -} rvc_addi4spn_imm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_addi16sp_imm()); - } -} rvc_addi16sp_imm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_lwsp_imm()); - } -} rvc_lwsp_imm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)(insn.rvc_imm() & 0x3f)); - } -} rvc_shamt; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - std::stringstream s; - s << std::hex << "0x" << ((uint32_t)insn.rvc_imm() << 12 >> 12); - return s.str(); - } -} rvc_uimm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_lwsp_imm()) + '(' + xpr_name[X_SP] + ')'; - } -} rvc_lwsp_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_ldsp_imm()) + '(' + xpr_name[X_SP] + ')'; - } -} rvc_ldsp_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_swsp_imm()) + '(' + xpr_name[X_SP] + ')'; - } -} rvc_swsp_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_sdsp_imm()) + '(' + xpr_name[X_SP] + ')'; - } -} rvc_sdsp_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_lw_imm()) + '(' + xpr_name[insn.rvc_rs1s()] + ')'; - } -} rvc_lw_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_ld_imm()) + '(' + xpr_name[insn.rvc_rs1s()] + ')'; - } -} rvc_ld_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - std::stringstream s; - int32_t target = insn.rvc_b_imm(); - char sign = target >= 0 ? '+' : '-'; - s << "pc " << sign << ' ' << abs(target); - return s.str(); - } -} rvc_branch_target; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - std::stringstream s; - int32_t target = insn.rvc_j_imm(); - char sign = target >= 0 ? '+' : '-'; - s << "pc " << sign << ' ' << abs(target); - return s.str(); - } -} rvc_jump_target; - -std::string disassembler_t::disassemble(insn_t insn) const -{ - const disasm_insn_t* disasm_insn = lookup(insn); - return disasm_insn ? disasm_insn->to_string(insn) : "unknown"; -} - -disassembler_t::disassembler_t(int xlen) -{ - const uint32_t mask_rd = 0x1fUL << 7; - const uint32_t match_rd_ra = 1UL << 7; - const uint32_t mask_rs1 = 0x1fUL << 15; - const uint32_t match_rs1_ra = 1UL << 15; - const uint32_t mask_rs2 = 0x1fUL << 20; - const uint32_t mask_imm = 0xfffUL << 20; - const uint32_t match_imm_1 = 1UL << 20; - const uint32_t mask_rvc_rs2 = 0x1fUL << 2; - const uint32_t mask_rvc_imm = mask_rvc_rs2 | 0x1000UL; - - #define DECLARE_INSN(code, match, mask) \ - const uint32_t match_##code = match; \ - const uint32_t mask_##code = mask; - #include "encoding.h" - #undef DECLARE_INSN - - // explicit per-instruction disassembly - #define DISASM_INSN(name, code, extra, ...) \ - add_insn(new disasm_insn_t(name, match_##code, mask_##code | (extra), __VA_ARGS__)); - #define DEFINE_NOARG(code) \ - add_insn(new disasm_insn_t(#code, match_##code, mask_##code, {})); - #define DEFINE_RTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &xrs2}) - #define DEFINE_ITYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &imm}) - #define DEFINE_ITYPE_SHIFT(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &shamt}) - #define DEFINE_I0TYPE(name, code) DISASM_INSN(name, code, mask_rs1, {&xrd, &imm}) - #define DEFINE_I1TYPE(name, code) DISASM_INSN(name, code, mask_imm, {&xrd, &xrs1}) - #define DEFINE_I2TYPE(name, code) DISASM_INSN(name, code, mask_rd | mask_imm, {&xrs1}) - #define DEFINE_LTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &bigimm}) - #define DEFINE_BTYPE(code) DISASM_INSN(#code, code, 0, {&xrs1, &xrs2, &branch_target}) - #define DEFINE_B0TYPE(name, code) DISASM_INSN(name, code, mask_rs1 | mask_rs2, {&branch_target}) - #define DEFINE_B1TYPE(name, code) DISASM_INSN(name, code, mask_rs2, {&xrs1, &branch_target}) - #define DEFINE_XLOAD(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address}) - #define DEFINE_XSTORE(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address}) - #define DEFINE_XAMO(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs2, &amo_address}) - #define DEFINE_XAMO_LR(code) DISASM_INSN(#code, code, 0, {&xrd, &amo_address}) - #define DEFINE_FLOAD(code) DISASM_INSN(#code, code, 0, {&frd, &load_address}) - #define DEFINE_FSTORE(code) DISASM_INSN(#code, code, 0, {&frs2, &store_address}) - #define DEFINE_FRTYPE(code) DISASM_INSN(#code, code, 0, {&frd, &frs1, &frs2}) - #define DEFINE_FR1TYPE(code) DISASM_INSN(#code, code, 0, {&frd, &frs1}) - #define DEFINE_FR3TYPE(code) DISASM_INSN(#code, code, 0, {&frd, &frs1, &frs2, &frs3}) - #define DEFINE_FXTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &frs1}) - #define DEFINE_FX2TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &frs1, &frs2}) - #define DEFINE_XFTYPE(code) DISASM_INSN(#code, code, 0, {&frd, &xrs1}) - #define DEFINE_SFENCE_TYPE(code) DISASM_INSN(#code, code, 0, {&xrs1, &xrs2}) - - DEFINE_XLOAD(lb) - DEFINE_XLOAD(lbu) - DEFINE_XLOAD(lh) - DEFINE_XLOAD(lhu) - DEFINE_XLOAD(lw) - DEFINE_XLOAD(lwu) - DEFINE_XLOAD(ld) - - DEFINE_XSTORE(sb) - DEFINE_XSTORE(sh) - DEFINE_XSTORE(sw) - DEFINE_XSTORE(sd) - - DEFINE_XAMO(amoadd_w) - DEFINE_XAMO(amoswap_w) - DEFINE_XAMO(amoand_w) - DEFINE_XAMO(amoor_w) - DEFINE_XAMO(amoxor_w) - DEFINE_XAMO(amomin_w) - DEFINE_XAMO(amomax_w) - DEFINE_XAMO(amominu_w) - DEFINE_XAMO(amomaxu_w) - DEFINE_XAMO(amoadd_d) - DEFINE_XAMO(amoswap_d) - DEFINE_XAMO(amoand_d) - DEFINE_XAMO(amoor_d) - DEFINE_XAMO(amoxor_d) - DEFINE_XAMO(amomin_d) - DEFINE_XAMO(amomax_d) - DEFINE_XAMO(amominu_d) - DEFINE_XAMO(amomaxu_d) - - DEFINE_XAMO_LR(lr_w) - DEFINE_XAMO(sc_w) - DEFINE_XAMO_LR(lr_d) - DEFINE_XAMO(sc_d) - - DEFINE_FLOAD(flw) - DEFINE_FLOAD(fld) - DEFINE_FLOAD(flq) - - DEFINE_FSTORE(fsw) - DEFINE_FSTORE(fsd) - DEFINE_FSTORE(fsq) - - add_insn(new disasm_insn_t("j", match_jal, mask_jal | mask_rd, {&jump_target})); - add_insn(new disasm_insn_t("jal", match_jal | match_rd_ra, mask_jal | mask_rd, {&jump_target})); - add_insn(new disasm_insn_t("jal", match_jal, mask_jal, {&xrd, &jump_target})); - - DEFINE_B1TYPE("beqz", beq); - DEFINE_B1TYPE("bnez", bne); - DEFINE_B1TYPE("bltz", blt); - DEFINE_B1TYPE("bgez", bge); - DEFINE_BTYPE(beq) - DEFINE_BTYPE(bne) - DEFINE_BTYPE(blt) - DEFINE_BTYPE(bge) - DEFINE_BTYPE(bltu) - DEFINE_BTYPE(bgeu) - - DEFINE_LTYPE(lui); - DEFINE_LTYPE(auipc); - - add_insn(new disasm_insn_t("ret", match_jalr | match_rs1_ra, mask_jalr | mask_rd | mask_rs1 | mask_imm, {})); - DEFINE_I2TYPE("jr", jalr); - add_insn(new disasm_insn_t("jalr", match_jalr | match_rd_ra, mask_jalr | mask_rd | mask_imm, {&xrs1})); - DEFINE_ITYPE(jalr); - - add_insn(new disasm_insn_t("nop", match_addi, mask_addi | mask_rd | mask_rs1 | mask_imm, {})); - add_insn(new disasm_insn_t(" - ", match_xor, mask_xor | mask_rd | mask_rs1 | mask_rs2, {})); // for machine-generated bubbles - DEFINE_I0TYPE("li", addi); - DEFINE_I1TYPE("mv", addi); - DEFINE_ITYPE(addi); - DEFINE_ITYPE(slti); - add_insn(new disasm_insn_t("seqz", match_sltiu | match_imm_1, mask_sltiu | mask_imm, {&xrd, &xrs1})); - DEFINE_ITYPE(sltiu); - add_insn(new disasm_insn_t("not", match_xori | mask_imm, mask_xori | mask_imm, {&xrd, &xrs1})); - DEFINE_ITYPE(xori); - - DEFINE_ITYPE_SHIFT(slli); - DEFINE_ITYPE_SHIFT(srli); - DEFINE_ITYPE_SHIFT(srai); - - DEFINE_ITYPE(ori); - DEFINE_ITYPE(andi); - DEFINE_I1TYPE("sext.w", addiw); - DEFINE_ITYPE(addiw); - - DEFINE_ITYPE_SHIFT(slliw); - DEFINE_ITYPE_SHIFT(srliw); - DEFINE_ITYPE_SHIFT(sraiw); - - DEFINE_RTYPE(add); - DEFINE_RTYPE(sub); - DEFINE_RTYPE(sll); - DEFINE_RTYPE(slt); - add_insn(new disasm_insn_t("snez", match_sltu, mask_sltu | mask_rs1, {&xrd, &xrs2})); - DEFINE_RTYPE(sltu); - DEFINE_RTYPE(xor); - DEFINE_RTYPE(srl); - DEFINE_RTYPE(sra); - DEFINE_RTYPE(or); - DEFINE_RTYPE(and); - DEFINE_RTYPE(mul); - DEFINE_RTYPE(mulh); - DEFINE_RTYPE(mulhu); - DEFINE_RTYPE(mulhsu); - DEFINE_RTYPE(div); - DEFINE_RTYPE(divu); - DEFINE_RTYPE(rem); - DEFINE_RTYPE(remu); - DEFINE_RTYPE(addw); - DEFINE_RTYPE(subw); - DEFINE_RTYPE(sllw); - DEFINE_RTYPE(srlw); - DEFINE_RTYPE(sraw); - DEFINE_RTYPE(mulw); - DEFINE_RTYPE(divw); - DEFINE_RTYPE(divuw); - DEFINE_RTYPE(remw); - DEFINE_RTYPE(remuw); - - DEFINE_NOARG(ecall); - DEFINE_NOARG(ebreak); - DEFINE_NOARG(uret); - DEFINE_NOARG(sret); - DEFINE_NOARG(mret); - DEFINE_NOARG(dret); - DEFINE_NOARG(wfi); - DEFINE_NOARG(fence); - DEFINE_NOARG(fence_i); - DEFINE_SFENCE_TYPE(sfence_vma); - - add_insn(new disasm_insn_t("csrr", match_csrrs, mask_csrrs | mask_rs1, {&xrd, &csr})); - add_insn(new disasm_insn_t("csrw", match_csrrw, mask_csrrw | mask_rd, {&csr, &xrs1})); - add_insn(new disasm_insn_t("csrs", match_csrrs, mask_csrrs | mask_rd, {&csr, &xrs1})); - add_insn(new disasm_insn_t("csrc", match_csrrc, mask_csrrc | mask_rd, {&csr, &xrs1})); - add_insn(new disasm_insn_t("csrwi", match_csrrwi, mask_csrrwi | mask_rd, {&csr, &zimm5})); - add_insn(new disasm_insn_t("csrsi", match_csrrsi, mask_csrrsi | mask_rd, {&csr, &zimm5})); - add_insn(new disasm_insn_t("csrci", match_csrrci, mask_csrrci | mask_rd, {&csr, &zimm5})); - add_insn(new disasm_insn_t("csrrw", match_csrrw, mask_csrrw, {&xrd, &csr, &xrs1})); - add_insn(new disasm_insn_t("csrrs", match_csrrs, mask_csrrs, {&xrd, &csr, &xrs1})); - add_insn(new disasm_insn_t("csrrc", match_csrrc, mask_csrrc, {&xrd, &csr, &xrs1})); - add_insn(new disasm_insn_t("csrrwi", match_csrrwi, mask_csrrwi, {&xrd, &csr, &zimm5})); - add_insn(new disasm_insn_t("csrrsi", match_csrrsi, mask_csrrsi, {&xrd, &csr, &zimm5})); - add_insn(new disasm_insn_t("csrrci", match_csrrci, mask_csrrci, {&xrd, &csr, &zimm5})); - - DEFINE_FRTYPE(fadd_s); - DEFINE_FRTYPE(fsub_s); - DEFINE_FRTYPE(fmul_s); - DEFINE_FRTYPE(fdiv_s); - DEFINE_FR1TYPE(fsqrt_s); - DEFINE_FRTYPE(fmin_s); - DEFINE_FRTYPE(fmax_s); - DEFINE_FR3TYPE(fmadd_s); - DEFINE_FR3TYPE(fmsub_s); - DEFINE_FR3TYPE(fnmadd_s); - DEFINE_FR3TYPE(fnmsub_s); - DEFINE_FRTYPE(fsgnj_s); - DEFINE_FRTYPE(fsgnjn_s); - DEFINE_FRTYPE(fsgnjx_s); - DEFINE_FR1TYPE(fcvt_s_d); - DEFINE_FR1TYPE(fcvt_s_q); - DEFINE_XFTYPE(fcvt_s_l); - DEFINE_XFTYPE(fcvt_s_lu); - DEFINE_XFTYPE(fcvt_s_w); - DEFINE_XFTYPE(fcvt_s_wu); - DEFINE_XFTYPE(fcvt_s_wu); - DEFINE_XFTYPE(fmv_w_x); - DEFINE_FXTYPE(fcvt_l_s); - DEFINE_FXTYPE(fcvt_lu_s); - DEFINE_FXTYPE(fcvt_w_s); - DEFINE_FXTYPE(fcvt_wu_s); - DEFINE_FXTYPE(fclass_s); - DEFINE_FXTYPE(fmv_x_w); - DEFINE_FX2TYPE(feq_s); - DEFINE_FX2TYPE(flt_s); - DEFINE_FX2TYPE(fle_s); - - DEFINE_FRTYPE(fadd_d); - DEFINE_FRTYPE(fsub_d); - DEFINE_FRTYPE(fmul_d); - DEFINE_FRTYPE(fdiv_d); - DEFINE_FR1TYPE(fsqrt_d); - DEFINE_FRTYPE(fmin_d); - DEFINE_FRTYPE(fmax_d); - DEFINE_FR3TYPE(fmadd_d); - DEFINE_FR3TYPE(fmsub_d); - DEFINE_FR3TYPE(fnmadd_d); - DEFINE_FR3TYPE(fnmsub_d); - DEFINE_FRTYPE(fsgnj_d); - DEFINE_FRTYPE(fsgnjn_d); - DEFINE_FRTYPE(fsgnjx_d); - DEFINE_FR1TYPE(fcvt_d_s); - DEFINE_FR1TYPE(fcvt_d_q); - DEFINE_XFTYPE(fcvt_d_l); - DEFINE_XFTYPE(fcvt_d_lu); - DEFINE_XFTYPE(fcvt_d_w); - DEFINE_XFTYPE(fcvt_d_wu); - DEFINE_XFTYPE(fcvt_d_wu); - DEFINE_XFTYPE(fmv_d_x); - DEFINE_FXTYPE(fcvt_l_d); - DEFINE_FXTYPE(fcvt_lu_d); - DEFINE_FXTYPE(fcvt_w_d); - DEFINE_FXTYPE(fcvt_wu_d); - DEFINE_FXTYPE(fclass_d); - DEFINE_FXTYPE(fmv_x_d); - DEFINE_FX2TYPE(feq_d); - DEFINE_FX2TYPE(flt_d); - DEFINE_FX2TYPE(fle_d); - - DEFINE_FRTYPE(fadd_q); - DEFINE_FRTYPE(fsub_q); - DEFINE_FRTYPE(fmul_q); - DEFINE_FRTYPE(fdiv_q); - DEFINE_FR1TYPE(fsqrt_q); - DEFINE_FRTYPE(fmin_q); - DEFINE_FRTYPE(fmax_q); - DEFINE_FR3TYPE(fmadd_q); - DEFINE_FR3TYPE(fmsub_q); - DEFINE_FR3TYPE(fnmadd_q); - DEFINE_FR3TYPE(fnmsub_q); - DEFINE_FRTYPE(fsgnj_q); - DEFINE_FRTYPE(fsgnjn_q); - DEFINE_FRTYPE(fsgnjx_q); - DEFINE_FR1TYPE(fcvt_q_s); - DEFINE_FR1TYPE(fcvt_q_d); - DEFINE_XFTYPE(fcvt_q_l); - DEFINE_XFTYPE(fcvt_q_lu); - DEFINE_XFTYPE(fcvt_q_w); - DEFINE_XFTYPE(fcvt_q_wu); - DEFINE_XFTYPE(fcvt_q_wu); - DEFINE_XFTYPE(fmv_q_x); - DEFINE_FXTYPE(fcvt_l_q); - DEFINE_FXTYPE(fcvt_lu_q); - DEFINE_FXTYPE(fcvt_w_q); - DEFINE_FXTYPE(fcvt_wu_q); - DEFINE_FXTYPE(fclass_q); - DEFINE_FXTYPE(fmv_x_q); - DEFINE_FX2TYPE(feq_q); - DEFINE_FX2TYPE(flt_q); - DEFINE_FX2TYPE(fle_q); - - DISASM_INSN("c.ebreak", c_add, mask_rd | mask_rvc_rs2, {}); - add_insn(new disasm_insn_t("ret", match_c_jr | match_rd_ra, mask_c_jr | mask_rd | mask_rvc_imm, {})); - DISASM_INSN("c.jr", c_jr, mask_rvc_imm, {&rvc_rs1}); - DISASM_INSN("c.jalr", c_jalr, mask_rvc_imm, {&rvc_rs1}); - DISASM_INSN("c.nop", c_addi, mask_rd | mask_rvc_imm, {}); - DISASM_INSN("c.addi16sp", c_addi16sp, mask_rd, {&rvc_sp, &rvc_addi16sp_imm}); - DISASM_INSN("c.addi4spn", c_addi4spn, 0, {&rvc_rs2s, &rvc_sp, &rvc_addi4spn_imm}); - DISASM_INSN("c.li", c_li, 0, {&xrd, &rvc_imm}); - DISASM_INSN("c.lui", c_lui, 0, {&xrd, &rvc_uimm}); - DISASM_INSN("c.addi", c_addi, 0, {&xrd, &rvc_imm}); - DISASM_INSN("c.slli", c_slli, 0, {&rvc_rs1, &rvc_shamt}); - DISASM_INSN("c.srli", c_srli, 0, {&rvc_rs1s, &rvc_shamt}); - DISASM_INSN("c.srai", c_srai, 0, {&rvc_rs1s, &rvc_shamt}); - DISASM_INSN("c.andi", c_andi, 0, {&rvc_rs1s, &rvc_imm}); - DISASM_INSN("c.mv", c_mv, 0, {&xrd, &rvc_rs2}); - DISASM_INSN("c.add", c_add, 0, {&xrd, &rvc_rs2}); - DISASM_INSN("c.addw", c_addw, 0, {&rvc_rs1s, &rvc_rs2s}); - DISASM_INSN("c.sub", c_sub, 0, {&rvc_rs1s, &rvc_rs2s}); - DISASM_INSN("c.subw", c_subw, 0, {&rvc_rs1s, &rvc_rs2s}); - DISASM_INSN("c.and", c_and, 0, {&rvc_rs1s, &rvc_rs2s}); - DISASM_INSN("c.or", c_or, 0, {&rvc_rs1s, &rvc_rs2s}); - DISASM_INSN("c.xor", c_xor, 0, {&rvc_rs1s, &rvc_rs2s}); - DISASM_INSN("c.lwsp", c_lwsp, 0, {&xrd, &rvc_lwsp_address}); - DISASM_INSN("c.fld", c_fld, 0, {&rvc_fp_rs2s, &rvc_ld_address}); - DISASM_INSN("c.swsp", c_swsp, 0, {&rvc_rs2, &rvc_swsp_address}); - DISASM_INSN("c.lw", c_lw, 0, {&rvc_rs2s, &rvc_lw_address}); - DISASM_INSN("c.sw", c_sw, 0, {&rvc_rs2s, &rvc_lw_address}); - DISASM_INSN("c.beqz", c_beqz, 0, {&rvc_rs1s, &rvc_branch_target}); - DISASM_INSN("c.bnez", c_bnez, 0, {&rvc_rs1s, &rvc_branch_target}); - DISASM_INSN("c.j", c_j, 0, {&rvc_jump_target}); - DISASM_INSN("c.fldsp", c_fldsp, 0, {&rvc_fp_rs2s, &rvc_ldsp_address}); - DISASM_INSN("c.fsd", c_fsd, 0, {&rvc_fp_rs2s, &rvc_ld_address}); - DISASM_INSN("c.fsdsp", c_fsdsp, 0, {&rvc_fp_rs2s, &rvc_sdsp_address}); - - if (xlen == 32) { - DISASM_INSN("c.flw", c_flw, 0, {&rvc_fp_rs2s, &rvc_lw_address}); - DISASM_INSN("c.flwsp", c_flwsp, 0, {&frd, &rvc_lwsp_address}); - DISASM_INSN("c.fsw", c_fsw, 0, {&rvc_fp_rs2s, &rvc_lw_address}); - DISASM_INSN("c.fswsp", c_fswsp, 0, {&rvc_fp_rs2, &rvc_swsp_address}); - DISASM_INSN("c.jal", c_jal, 0, {&rvc_jump_target}); - } else { - DISASM_INSN("c.ld", c_ld, 0, {&rvc_rs2s, &rvc_ld_address}); - DISASM_INSN("c.ldsp", c_ldsp, 0, {&xrd, &rvc_ldsp_address}); - DISASM_INSN("c.sd", c_sd, 0, {&rvc_rs2s, &rvc_ld_address}); - DISASM_INSN("c.sdsp", c_sdsp, 0, {&rvc_rs2, &rvc_sdsp_address}); - DISASM_INSN("c.addiw", c_addiw, 0, {&xrd, &rvc_imm}); - } - - // provide a default disassembly for all instructions as a fallback - #define DECLARE_INSN(code, match, mask) \ - add_insn(new disasm_insn_t(#code " (args unknown)", match, mask, {})); - #include "encoding.h" - #undef DECLARE_INSN -} - -const disasm_insn_t* disassembler_t::lookup(insn_t insn) const -{ - size_t idx = insn.bits() % HASH_SIZE; - for (size_t j = 0; j < chain[idx].size(); j++) - if(*chain[idx][j] == insn) - return chain[idx][j]; - - idx = HASH_SIZE; - for (size_t j = 0; j < chain[idx].size(); j++) - if(*chain[idx][j] == insn) - return chain[idx][j]; - - return NULL; -} - -void disassembler_t::add_insn(disasm_insn_t* insn) -{ - size_t idx = HASH_SIZE; - if (insn->get_mask() % HASH_SIZE == HASH_SIZE - 1) - idx = insn->get_match() % HASH_SIZE; - chain[idx].push_back(insn); -} - -disassembler_t::~disassembler_t() -{ - for (size_t i = 0; i < HASH_SIZE+1; i++) - for (size_t j = 0; j < chain[i].size(); j++) - delete chain[i][j]; -} diff --git a/spike_main/spike-log-parser.cc b/spike_main/spike-log-parser.cc new file mode 100644 index 0000000000..d174afc6ba --- /dev/null +++ b/spike_main/spike-log-parser.cc @@ -0,0 +1,60 @@ +// See LICENSE for license details. + +// This little program finds occurrences of strings like +// core 0: 0x000000008000c36c (0xfe843783) ld a5, -24(s0) +// in its inputs, then output the RISC-V instruction with the disassembly +// enclosed hexadecimal number. + +#include +#include +#include +#include +#include "fesvr/option_parser.h" + +#include "disasm.h" +#include "extension.h" + +using namespace std; + +int main(int argc, char** argv) +{ + string s; + const char* isa = DEFAULT_ISA; + + std::function extension; + option_parser_t parser; + parser.option(0, "extension", 1, [&](const char* s){extension = find_extension(s);}); + parser.option(0, "isa", 1, [&](const char* s){isa = s;}); + parser.parse(argv); + + processor_t p(isa, DEFAULT_PRIV, DEFAULT_VARCH, 0, 0, false, nullptr); + if (extension) { + p.register_extension(extension()); + } + + std::regex reg("^core\\s+\\d+:\\s+0x[0-9a-f]+\\s+\\(0x([0-9a-f]+)\\)", std::regex_constants::icase); + std::smatch m; + std::ssub_match sm ; + + while (getline(cin,s)){ + if (regex_search(s, m, reg)){ + // the opcode string + string op = m[1].str(); + uint32_t bit_num = op.size() * 4; + uint64_t opcode = strtoull(op.c_str(), nullptr, 16); + + if (bit_num<64){ + opcode = opcode << (64-bit_num) >> (64-bit_num); + } + + const disasm_insn_t* disasm = p.get_disassembler()->lookup(opcode); + if (disasm) { + cout << disasm->get_name() << '\n'; + } else { + cout << "unknown_op\n"; + } + } + } + + return 0; +} diff --git a/spike_main/spike.cc b/spike_main/spike.cc index d95ce469c5..89bf915103 100644 --- a/spike_main/spike.cc +++ b/spike_main/spike.cc @@ -12,9 +12,12 @@ #include #include #include +#include +#include "../VERSION" -static void help() +static void help(int exit_code = 1) { + fprintf(stderr, "Spike RISC-V ISA Simulator " SPIKE_VERSION "\n\n"); fprintf(stderr, "usage: spike [host options] [target options]\n"); fprintf(stderr, "Host Options:\n"); fprintf(stderr, " -p Simulate processors [default 1]\n"); @@ -24,28 +27,113 @@ static void help() fprintf(stderr, " -d Interactive debug mode\n"); fprintf(stderr, " -g Track histogram of PCs\n"); fprintf(stderr, " -l Generate a log of execution\n"); - fprintf(stderr, " -h Print this help message\n"); + fprintf(stderr, " -h, --help Print this help message\n"); fprintf(stderr, " -H Start halted, allowing a debugger to connect\n"); fprintf(stderr, " --isa= RISC-V ISA string [default %s]\n", DEFAULT_ISA); + fprintf(stderr, " --priv= RISC-V privilege modes supported [default %s]\n", DEFAULT_PRIV); + fprintf(stderr, " --varch= RISC-V Vector uArch string [default %s]\n", DEFAULT_VARCH); fprintf(stderr, " --pc=
Override ELF entry point\n"); fprintf(stderr, " --hartids= Explicitly specify hartids, default is 0,1,...\n"); fprintf(stderr, " --ic=:: Instantiate a cache model with S sets,\n"); fprintf(stderr, " --dc=:: W ways, and B-byte blocks (with S and\n"); fprintf(stderr, " --l2=:: B both powers of 2).\n"); + fprintf(stderr, " --device= Attach MMIO plugin device from an --extlib library\n"); + fprintf(stderr, " P -- Name of the MMIO plugin\n"); + fprintf(stderr, " B -- Base memory address of the device\n"); + fprintf(stderr, " A -- String arguments to pass to the plugin\n"); + fprintf(stderr, " This flag can be used multiple times.\n"); + fprintf(stderr, " The extlib flag for the library must come first.\n"); fprintf(stderr, " --log-cache-miss Generate a log of cache miss\n"); fprintf(stderr, " --extension= Specify RoCC Extension\n"); fprintf(stderr, " --extlib= Shared library to load\n"); + fprintf(stderr, " This flag can be used multiple times.\n"); fprintf(stderr, " --rbb-port= Listen on for remote bitbang connection\n"); fprintf(stderr, " --dump-dts Print device tree string and exit\n"); - fprintf(stderr, " --ust-trace= Write UST trace file\n"); fprintf(stderr, " --disable-dtb Don't write the device tree blob into memory\n"); - fprintf(stderr, " --progsize= Progsize for the debug module [default 2]\n"); - fprintf(stderr, " --debug-sba= Debug bus master supports up to " + fprintf(stderr, " --kernel= Load kernel flat image into memory\n"); + fprintf(stderr, " --initrd= Load kernel initrd into memory\n"); + fprintf(stderr, " --bootargs= Provide custom bootargs for kernel [default: console=hvc0 earlycon=sbi]\n"); + fprintf(stderr, " --real-time-clint Increment clint time at real-time rate\n"); + fprintf(stderr, " --dm-progsize= Progsize for the debug module [default 2]\n"); + fprintf(stderr, " --dm-sba= Debug bus master supports up to " " wide accesses [default 0]\n"); - fprintf(stderr, " --debug-auth Debug module requires debugger to authenticate\n"); + fprintf(stderr, " --dm-auth Debug module requires debugger to authenticate\n"); + fprintf(stderr, " --dmi-rti= Number of Run-Test/Idle cycles " + "required for a DMI access [default 0]\n"); + fprintf(stderr, " --dm-abstract-rti= Number of Run-Test/Idle cycles " + "required for an abstract command to execute [default 0]\n"); + fprintf(stderr, " --dm-no-hasel Debug module supports hasel\n"); + fprintf(stderr, " --dm-no-abstract-csr Debug module won't support abstract to authenticate\n"); + fprintf(stderr, " --dm-no-halt-groups Debug module won't support halt groups\n"); + fprintf(stderr, " --dm-no-impebreak Debug module won't support implicit ebreak in program buffer\n"); + + exit(exit_code); +} + +static void suggest_help() +{ + fprintf(stderr, "Try 'spike --help' for more information.\n"); exit(1); } +static bool check_file_exists(const char *fileName) +{ + std::ifstream infile(fileName); + return infile.good(); +} + +static std::ifstream::pos_type get_file_size(const char *filename) +{ + std::ifstream in(filename, std::ios::ate | std::ios::binary); + return in.tellg(); +} + +static void read_file_bytes(const char *filename,size_t fileoff, + char *read_buf, size_t read_sz) +{ + std::ifstream in(filename, std::ios::in | std::ios::binary); + in.seekg(fileoff, std::ios::beg); + in.read(read_buf, read_sz); +} + +bool sort_mem_region(const std::pair &a, + const std::pair &b) +{ + if (a.first == b.first) + return (a.second->size() < b.second->size()); + else + return (a.first < b.first); +} + +void merge_overlapping_memory_regions(std::vector>& mems) +{ + // check the user specified memory regions and merge the overlapping or + // eliminate the containing parts + std::sort(mems.begin(), mems.end(), sort_mem_region); + reg_t start_page = 0, end_page = 0; + std::vector>::reverse_iterator it = mems.rbegin(); + std::vector>::reverse_iterator _it = mems.rbegin(); + for(; it != mems.rend(); ++it) { + reg_t _start_page = it->first/PGSIZE; + reg_t _end_page = _start_page + it->second->size()/PGSIZE; + if (_start_page >= start_page && _end_page <= end_page) { + // contains + mems.erase(std::next(it).base()); + }else if ( _start_page < start_page && _end_page > start_page) { + // overlapping + _it->first = _start_page; + if (_end_page > end_page) + end_page = _end_page; + mems.erase(std::next(it).base()); + }else { + _it = it; + start_page = _start_page; + end_page = _end_page; + assert(start_page < end_page); + } + } +} + static std::vector> make_mems(const char* arg) { // handle legacy mem argument @@ -65,8 +153,23 @@ static std::vector> make_mems(const char* arg) if (!*p || *p != ':') help(); auto size = strtoull(p + 1, &p, 0); - if ((size | base) % PGSIZE != 0) + + // page-align base and size + auto base0 = base, size0 = size; + size += base0 % PGSIZE; + base -= base0 % PGSIZE; + if (size % PGSIZE != 0) + size += PGSIZE - size % PGSIZE; + + if (base + size < base) help(); + + if (size != size0) { + fprintf(stderr, "Warning: the memory at [0x%llX, 0x%llX] has been realigned\n" + "to the %ld KiB page size: [0x%llX, 0x%llX]\n", + base0, base0 + size0 - 1, PGSIZE / 1024, base, base + size - 1); + } + res.push_back(std::make_pair(reg_t(base), new mem_t(size))); if (!*p) break; @@ -74,6 +177,8 @@ static std::vector> make_mems(const char* arg) help(); arg = p + 1; } + + merge_overlapping_memory_regions(res); return res; } @@ -85,21 +190,41 @@ int main(int argc, char** argv) bool log = false; bool dump_dts = false; bool dtb_enabled = true; + bool real_time_clint = false; size_t nprocs = 1; + const char* kernel = NULL; + reg_t kernel_offset, kernel_size; + size_t initrd_size; + reg_t initrd_start = 0, initrd_end = 0; + const char* bootargs = NULL; reg_t start_pc = reg_t(-1); std::vector> mems; + std::vector> plugin_devices; std::unique_ptr ic; std::unique_ptr dc; std::unique_ptr l2; bool log_cache = false; + bool log_commits = false; + const char *log_path = nullptr; std::function extension; + const char* initrd = NULL; const char* isa = DEFAULT_ISA; - const char* ust_file = NULL; + const char* priv = DEFAULT_PRIV; + const char* varch = DEFAULT_VARCH; + const char* dtb_file = NULL; uint16_t rbb_port = 0; bool use_rbb = false; - unsigned progsize = 2; - unsigned max_bus_master_bits = 0; - bool require_authentication = false; + unsigned dmi_rti = 0; + debug_module_config_t dm_config = { + .progbufsize = 2, + .max_bus_master_bits = 0, + .require_authentication = false, + .abstract_rti = 0, + .support_hasel = true, + .support_abstract_csr_access = true, + .support_haltgroups = true, + .support_impebreak = true + }; std::vector hartids; auto const hartids_parser = [&](const char *s) { @@ -114,9 +239,52 @@ int main(int argc, char** argv) } }; + auto const device_parser = [&plugin_devices](const char *s) { + const std::string str(s); + std::istringstream stream(str); + + // We are parsing a string like name,base,args. + + // Parse the name, which is simply all of the characters leading up to the + // first comma. The validity of the plugin name will be checked later. + std::string name; + std::getline(stream, name, ','); + if (name.empty()) { + throw std::runtime_error("Plugin name is empty."); + } + + // Parse the base address. First, get all of the characters up to the next + // comma (or up to the end of the string if there is no comma). Then try to + // parse that string as an integer according to the rules of strtoull. It + // could be in decimal, hex, or octal. Fail if we were able to parse a + // number but there were garbage characters after the valid number. We must + // consume the entire string between the commas. + std::string base_str; + std::getline(stream, base_str, ','); + if (base_str.empty()) { + throw std::runtime_error("Device base address is empty."); + } + char* end; + reg_t base = static_cast(strtoull(base_str.c_str(), &end, 0)); + if (end != &*base_str.cend()) { + throw std::runtime_error("Error parsing device base address."); + } + + // The remainder of the string is the arguments. We could use getline, but + // that could ignore newline characters in the arguments. That should be + // rare and discouraged, but handle it here anyway with this weird in_avail + // technique. The arguments are optional, so if there were no arguments + // specified we could end up with an empty string here. That's okay. + auto avail = stream.rdbuf()->in_avail(); + std::string args(avail, '\0'); + stream.readsome(&args[0], avail); + + plugin_devices.emplace_back(base, new mmio_plugin_device_t(name, args)); + }; + option_parser_t parser; - parser.help(&help); - parser.option('h', 0, 0, [&](const char* s){help();}); + parser.help(&suggest_help); + parser.option('h', "help", 0, [&](const char* s){help(0);}); parser.option('d', 0, 0, [&](const char* s){debug = true;}); parser.option('g', 0, 0, [&](const char* s){histogram = true;}); parser.option('l', 0, 0, [&](const char* s){log = true;}); @@ -132,10 +300,17 @@ int main(int argc, char** argv) parser.option(0, "l2", 1, [&](const char* s){l2.reset(cache_sim_t::construct(s, "L2$"));}); parser.option(0, "log-cache-miss", 0, [&](const char* s){log_cache = true;}); parser.option(0, "isa", 1, [&](const char* s){isa = s;}); + parser.option(0, "priv", 1, [&](const char* s){priv = s;}); + parser.option(0, "varch", 1, [&](const char* s){varch = s;}); + parser.option(0, "device", 1, device_parser); parser.option(0, "extension", 1, [&](const char* s){extension = find_extension(s);}); parser.option(0, "dump-dts", 0, [&](const char *s){dump_dts = true;}); - parser.option(0, "ust-trace", 1, [&](const char *s){ust_file = s;}); parser.option(0, "disable-dtb", 0, [&](const char *s){dtb_enabled = false;}); + parser.option(0, "dtb", 1, [&](const char *s){dtb_file = s;}); + parser.option(0, "kernel", 1, [&](const char* s){kernel = s;}); + parser.option(0, "initrd", 1, [&](const char* s){initrd = s;}); + parser.option(0, "bootargs", 1, [&](const char* s){bootargs = s;}); + parser.option(0, "real-time-clint", 0, [&](const char *s){real_time_clint = true;}); parser.option(0, "extlib", 1, [&](const char *s){ void *lib = dlopen(s, RTLD_NOW | RTLD_GLOBAL); if (lib == NULL) { @@ -143,11 +318,28 @@ int main(int argc, char** argv) exit(-1); } }); - parser.option(0, "progsize", 1, [&](const char* s){progsize = atoi(s);}); - parser.option(0, "debug-sba", 1, - [&](const char* s){max_bus_master_bits = atoi(s);}); - parser.option(0, "debug-auth", 0, - [&](const char* s){require_authentication = true;}); + parser.option(0, "dm-progsize", 1, + [&](const char* s){dm_config.progbufsize = atoi(s);}); + parser.option(0, "dm-no-impebreak", 0, + [&](const char* s){dm_config.support_impebreak = false;}); + parser.option(0, "dm-sba", 1, + [&](const char* s){dm_config.max_bus_master_bits = atoi(s);}); + parser.option(0, "dm-auth", 0, + [&](const char* s){dm_config.require_authentication = true;}); + parser.option(0, "dmi-rti", 1, + [&](const char* s){dmi_rti = atoi(s);}); + parser.option(0, "dm-abstract-rti", 1, + [&](const char* s){dm_config.abstract_rti = atoi(s);}); + parser.option(0, "dm-no-hasel", 0, + [&](const char* s){dm_config.support_hasel = false;}); + parser.option(0, "dm-no-abstract-csr", 0, + [&](const char* s){dm_config.support_abstract_csr_access = false;}); + parser.option(0, "dm-no-halt-groups", 0, + [&](const char* s){dm_config.support_haltgroups = false;}); + parser.option(0, "log-commits", 0, + [&](const char* s){log_commits = true;}); + parser.option(0, "log", 1, + [&](const char* s){log_path = s;}); auto argv1 = parser.parse(argv); std::vector htif_args(argv1, (const char*const*)argv + argc); @@ -157,15 +349,42 @@ int main(int argc, char** argv) if (!*argv1) help(); - sim_t s(isa, nprocs, halted, start_pc, mems, htif_args, std::move(hartids), - progsize, max_bus_master_bits, require_authentication); + if (kernel && check_file_exists(kernel)) { + kernel_size = get_file_size(kernel); + if (isa[2] == '6' && isa[3] == '4') + kernel_offset = 0x200000; + else + kernel_offset = 0x400000; + for (auto& m : mems) { + if (kernel_size && (kernel_offset + kernel_size) < m.second->size()) { + read_file_bytes(kernel, 0, m.second->contents() + kernel_offset, kernel_size); + break; + } + } + } + + if (initrd && check_file_exists(initrd)) { + initrd_size = get_file_size(initrd); + for (auto& m : mems) { + if (initrd_size && (initrd_size + 0x1000) < m.second->size()) { + initrd_end = m.first + m.second->size() - 0x1000; + initrd_start = initrd_end - initrd_size; + read_file_bytes(initrd, 0, m.second->contents() + (initrd_start - m.first), initrd_size); + break; + } + } + } + + sim_t s(isa, priv, varch, nprocs, halted, real_time_clint, + initrd_start, initrd_end, bootargs, start_pc, mems, plugin_devices, htif_args, + std::move(hartids), dm_config, log_path, dtb_enabled, dtb_file); std::unique_ptr remote_bitbang((remote_bitbang_t *) NULL); - std::unique_ptr jtag_dtm(new jtag_dtm_t(&s.debug_module)); + std::unique_ptr jtag_dtm( + new jtag_dtm_t(&s.debug_module, dmi_rti)); if (use_rbb) { remote_bitbang.reset(new remote_bitbang_t(rbb_port, &(*jtag_dtm))); s.set_remote_bitbang(&(*remote_bitbang)); } - s.set_dtb_enabled(dtb_enabled); if (dump_dts) { printf("%s", s.get_dts()); @@ -184,8 +403,16 @@ int main(int argc, char** argv) } s.set_debug(debug); - s.set_log(log); - s.set_ust_trace(ust_file); + s.configure_log(log, log_commits); s.set_histogram(histogram); - return s.run(); + + auto return_code = s.run(); + + for (auto& mem : mems) + delete mem.second; + + for (auto& plugin_device : plugin_devices) + delete plugin_device.second; + + return return_code; } diff --git a/spike_main/spike_main.mk.in b/spike_main/spike_main.mk.in index 500446fa6c..35bef398c4 100644 --- a/spike_main/spike_main.mk.in +++ b/spike_main/spike_main.mk.in @@ -1,10 +1,13 @@ spike_main_subproject_deps = \ + fdt \ + fesvr \ softfloat \ + disasm \ riscv \ spike_main_install_prog_srcs = \ spike.cc \ - spike-dasm.cc \ + spike-log-parser.cc \ xspike.cc \ termios-xspike.cc \ From 43fd971c18d4f738047e589f39b53a85eed22b1f Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 26 Jul 2022 17:59:26 +0200 Subject: [PATCH 04/23] Implement p.cnt instruction --- riscv/insns/p_cnt.h | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 riscv/insns/p_cnt.h diff --git a/riscv/insns/p_cnt.h b/riscv/insns/p_cnt.h new file mode 100644 index 0000000000..f19906934b --- /dev/null +++ b/riscv/insns/p_cnt.h @@ -0,0 +1,8 @@ +reg_t val = zext_xlen(RS1); +reg_t cnt = 0; + +for(cnt = 0; val != 0x00; ++cnt) +{ + val &= val - 1; +} +WRITE_RD(cnt); From e9d29d51759c07423c6e3b8b95b3adbd5fe80dd5 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 26 Jul 2022 18:00:55 +0200 Subject: [PATCH 05/23] Implement all instruction in macrnhi subset The instructions are not yet integrated into the build-procedure. --- riscv/decode.h | 7 ++++--- riscv/insns/p_machhsN.h | 8 ++++++++ riscv/insns/p_machhsRN.h | 9 +++++++++ riscv/insns/p_machhuN.h | 10 ++++++++++ riscv/insns/p_machhuRN.h | 11 +++++++++++ riscv/insns/p_macsN.h | 8 ++++++++ riscv/insns/p_macsRN.h | 9 +++++++++ riscv/insns/p_macuN.h | 10 ++++++++++ riscv/insns/p_macuRN.h | 11 +++++++++++ 9 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 riscv/insns/p_machhsN.h create mode 100644 riscv/insns/p_machhsRN.h create mode 100644 riscv/insns/p_machhuN.h create mode 100644 riscv/insns/p_machhuRN.h create mode 100644 riscv/insns/p_macsN.h create mode 100644 riscv/insns/p_macsRN.h create mode 100644 riscv/insns/p_macuN.h create mode 100644 riscv/insns/p_macuRN.h diff --git a/riscv/decode.h b/riscv/decode.h index d6d270af87..374539b93d 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -129,9 +129,10 @@ class insn_t uint64_t v_mew() { return x(28, 1); } // Xpulpimg - uint64_t p_zimm5() { return x(20, 5); } - int64_t p_simm5() { return xs(20, 5); } - uint64_t p_rs3() { return x(7, 5); } + uint64_t p_Luimm5() { return x(25, 5); } // [29:25] + uint64_t p_zimm5() { return x(20, 5); } // [24:20] + int64_t p_simm5() { return xs(20, 5); } // [24:20] + uint64_t p_rs3() { return x(7, 5); } // [11:07] uint64_t p_zimm6() { return x(25,1) + (x(20, 5) << 1); } int64_t p_simm6() { return x(25,1) + (xs(20, 5) << 1); } diff --git a/riscv/insns/p_machhsN.h b/riscv/insns/p_machhsN.h new file mode 100644 index 0000000000..5afc9a0dc2 --- /dev/null +++ b/riscv/insns/p_machhsN.h @@ -0,0 +1,8 @@ +sreg_t mul1 = sext16(RS1_H(1)); +sreg_t mul2 = sext16(RS2_H(1)); +sreg_t acc = sext_xlen(P_RS3); +int norm = insn.p_Luimm5(); + +acc = ((mul1 * mul2) + acc) >> norm; + +WRITE_RD(sext_xlen(acc)); \ No newline at end of file diff --git a/riscv/insns/p_machhsRN.h b/riscv/insns/p_machhsRN.h new file mode 100644 index 0000000000..7cd965b45b --- /dev/null +++ b/riscv/insns/p_machhsRN.h @@ -0,0 +1,9 @@ +sreg_t mul1 = sext16(RS1_H(1)); +sreg_t mul2 = sext16(RS2_H(1)); +sreg_t acc = sext_xlen(P_RS3); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +acc = ((mul1 * mul2) + acc + halfbit) >> norm; + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/p_machhuN.h b/riscv/insns/p_machhuN.h new file mode 100644 index 0000000000..aa1d44d93b --- /dev/null +++ b/riscv/insns/p_machhuN.h @@ -0,0 +1,10 @@ +reg_t mul1 = zext16(RS1_H(1)); +reg_t mul2 = zext16(RS2_H(1)); +reg_t acc = zext_xlen(P_RS3); +int norm = insn.p_Luimm5(); + +acc = ((mul1 * mul2) + acc) >> norm; + +WRITE_RD(sext_xlen(acc)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_machhuRN.h b/riscv/insns/p_machhuRN.h new file mode 100644 index 0000000000..3fb0027056 --- /dev/null +++ b/riscv/insns/p_machhuRN.h @@ -0,0 +1,11 @@ +reg_t mul1 = zext16(RS1_H(1)); +reg_t mul2 = zext16(RS2_H(1)); +reg_t acc = zext_xlen(P_RS3); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +acc = ((mul1 * mul2) + acc + halfbit) >> norm; + +WRITE_RD(sext_xlen(acc)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_macsN.h b/riscv/insns/p_macsN.h new file mode 100644 index 0000000000..e32a4abbe2 --- /dev/null +++ b/riscv/insns/p_macsN.h @@ -0,0 +1,8 @@ +sreg_t mul1 = sext16(RS1_H(0)); +sreg_t mul2 = sext16(RS2_H(0)); +sreg_t acc = sext_xlen(P_RS3); +int norm = insn.p_Luimm5(); + +acc = ((mul1 * mul2) + acc) >> norm; + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/p_macsRN.h b/riscv/insns/p_macsRN.h new file mode 100644 index 0000000000..e5dbe68afa --- /dev/null +++ b/riscv/insns/p_macsRN.h @@ -0,0 +1,9 @@ +sreg_t mul1 = sext16(RS1_H(0)); +sreg_t mul2 = sext16(RS2_H(0)); +sreg_t acc = sext_xlen(P_RS3); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +acc = ((mul1 * mul2) + acc + halfbit) >> norm; + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/p_macuN.h b/riscv/insns/p_macuN.h new file mode 100644 index 0000000000..60570d7c07 --- /dev/null +++ b/riscv/insns/p_macuN.h @@ -0,0 +1,10 @@ +reg_t mul1 = zext16(RS1_H(0)); +reg_t mul2 = zext16(RS2_H(0)); +reg_t acc = zext_xlen(P_RS3); +int norm = insn.p_Luimm5(); + +acc = ((mul1 * mul2) + acc) >> norm; + +WRITE_RD(sext_xlen(acc)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_macuRN.h b/riscv/insns/p_macuRN.h new file mode 100644 index 0000000000..ed360e8ebd --- /dev/null +++ b/riscv/insns/p_macuRN.h @@ -0,0 +1,11 @@ +reg_t mul1 = zext16(RS1_H(0)); +reg_t mul2 = zext16(RS2_H(0)); +reg_t acc = zext_xlen(P_RS3); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +acc = ((mul1 * mul2) + acc + halfbit) >> norm; + +WRITE_RD(sext_xlen(acc)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file From 8cc942391b5b23764b6eca1fc8781665356c0d8c Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 26 Jul 2022 18:05:09 +0200 Subject: [PATCH 06/23] Split Xpulpimg instrs into subset in the makefile It would also be possible to just add all the remaining instructions to the existing variable but splitting it better represents how the extension is organized everywhere else. --- riscv/riscv.mk.in | 183 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 163 insertions(+), 20 deletions(-) diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 49d4a821f6..842ab4d8d9 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -757,8 +757,17 @@ riscv_insn_ext_v = \ $(riscv_insn_ext_v_amo) \ $(riscv_insn_ext_v_ctrl) \ $(riscv_insn_ext_v_ldst) \ - -riscv_insn_ext_xpulpimg = \ + + +riscv_insn_ext_pulphwloop = \ + lp_starti \ + lp_endi \ + lp_count \ + lp_counti \ + lp_setup \ + lp_setupi \ + +riscv_insn_ext_pulppostmod = \ p_lb_irpost \ p_lbu_irpost \ p_lh_irpost \ @@ -783,26 +792,84 @@ riscv_insn_ext_xpulpimg = \ p_sb_rr \ p_sh_rr \ p_sw_rr \ + +riscv_insn_ext_pulpabs = \ p_abs \ + +riscv_insn_ext_pulpslet = \ p_slet \ p_sletu \ - p_max \ - p_maxu \ - p_min \ - p_minu \ - p_exths \ - p_exthz \ - p_extbs \ - p_extbz \ - p_clip \ - p_clipu \ - p_clipr \ - p_clipur \ - p_beqimm \ - p_bneimm \ + +riscv_insn_ext_pulpmacsi = \ p_mac \ p_msu \ - pv_add_h \ + +riscv_insn_ext_pulpmulrnhi = \ + p_mulhhs \ + p_mulhhu \ + p_muls \ + p_mulu \ + p_mulsN \ + p_mulsRN \ + p_muluN \ + p_muluRN \ + p_mulhhsN \ + p_mulhhuN \ + p_mulhhsRN \ + p_mulhhuRN \ + +riscv_insn_ext_pulpmacrnhi = \ + p_macsN \ + p_macuN \ + p_macsRN \ + p_macuRN \ + p_machhsN \ + p_machhuN \ + p_machhsRN \ + p_machhuRN \ + +riscv_insn_ext_pulppartmac = \ + p_macs \ + p_macu \ + p_machhs \ + p_machhu \ + +riscv_insn_ext_pulpminmax = \ + p_min \ + p_max \ + p_minu \ + p_maxu \ + +riscv_insn_ext_pulpbitop = \ + p_cnt \ + p_exths \ + p_exthz \ + p_extbs \ + p_extbz \ + +# riscv_insn_ext_pulpbitop = \ +# p_extract \ +# p_extractr \ +# p_extractu \ +# p_extractur \ +# p_insert \ +# p_insertr \ +# p_bset \ +# p_bsetr \ +# p_bclr \ +# p_bclrr \ +# p_cnt \ +# p_clb \ +# p_fl1 \ +# p_ff1 \ +# p_ror \ +# p_exths \ +# p_exthz \ +# p_extbs \ +# p_extbz \ + +riscv_insn_ext_pulpvect = \ + pv_add_h \ pv_add_sc_h \ pv_add_sci_h \ pv_add_b \ @@ -930,8 +997,83 @@ riscv_insn_ext_xpulpimg = \ pv_sdotsp_b \ pv_sdotsp_sc_b \ pv_sdotsp_sci_b \ - pv_shuffle2_h \ - pv_shuffle2_b \ + +riscv_insn_ext_pulpvectshufflepack = \ + pv_shuffle.h \ + pv_shuffle.sci.h \ + pv_shuffle.b \ + pv_shufflei0.sci.b \ + pv_shufflei1.sci.b \ + pv_shufflei2.sci.b \ + pv_shufflei3.sci.b \ + pv_shuffle2.h \ + pv_shuffle2.b \ + pv_pack \ + pv_pack.h \ + pv_packhi.b \ + pv_packlo.b \ + +riscv_insn_ext_pulpclip = \ + p_clip \ + p_clipu \ + p_clipr \ + p_clipur \ + +riscv_insn_ext_pulpaddsubrn = \ + p_addn \ + p_addnr \ + p_addun \ + p_addunr \ + p_addrn \ + p_addrnr \ + p_addurn \ + p_addurnr \ + p_subn \ + p_subnr \ + p_subun \ + p_subunr \ + p_subrn \ + p_subrnr \ + p_suburn \ + p_suburnr \ + +riscv_insn_ext_pulpbr = \ + p_beqimm \ + p_bneimm \ + +riscv_insn_ext_pulpbitrev = \ + p_bitrev \ + +riscv_insn_ext_pulpimg = \ + $(riscv_insn_ext_pulppostmod) \ + $(riscv_insn_ext_pulpabs) \ + $(riscv_insn_ext_pulpslet) \ + $(riscv_insn_ext_pulpmacsi) \ + $(riscv_insn_ext_pulpmacrnhi) \ + $(riscv_insn_ext_pulpminmax) \ + $(riscv_insn_ext_pulpbitop) \ + $(riscv_insn_ext_pulpvect) \ + $(riscv_insn_ext_pulpclip) \ + $(riscv_insn_ext_pulpbr) + +# riscv_insn_ext_pulpimg = \ +# $(riscv_insn_ext_pulphwloop) \ +# $(riscv_insn_ext_pulppostmod) \ +# $(riscv_insn_ext_pulpabs) \ +# $(riscv_insn_ext_pulpslet) \ +# $(riscv_insn_ext_pulpmacsi) \ +# $(riscv_insn_ext_pulpmulrnhi) \ +# $(riscv_insn_ext_pulpmacrnhi) \ +# $(riscv_insn_ext_pulppartmac) \ +# $(riscv_insn_ext_pulpminmax) \ +# $(riscv_insn_ext_pulpbitop) \ +# $(riscv_insn_ext_pulpvect) \ +# $(riscv_insn_ext_pulpvectshufflepack) \ +# $(riscv_insn_ext_pulpclip) \ +# $(riscv_insn_ext_pulpaddsubrn) \ +# $(riscv_insn_ext_pulpbr) \ +# $(riscv_insn_ext_pulpbitrev) \ + riscv_insn_ext_h = \ hfence_gvma \ @@ -975,7 +1117,7 @@ riscv_insn_list = \ $(riscv_insn_ext_d) \ $(riscv_insn_ext_zfh) \ $(riscv_insn_ext_q) \ - $(riscv_insn_ext_xpulpimg) \ + $(riscv_insn_ext_pulpimg) \ $(riscv_insn_ext_h) \ $(riscv_insn_priv) \ # $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ @@ -1000,3 +1142,4 @@ $(riscv_gen_srcs): %.cc: insns/%.h insn_template.cc riscv_junk = \ $(riscv_gen_srcs) \ + From 1a417f5717ad84ef2829ec4bc08275e689095fcb Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Thu, 4 Aug 2022 14:27:53 +0200 Subject: [PATCH 07/23] Add disassembly for macrnhi set --- disasm/disasm.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index fbb889775a..405c40a70a 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -493,6 +493,7 @@ disassembler_t::disassembler_t(int xlen) #define DEFINE_PI1ZTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm6}) #define DEFINE_PI1STYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm6}) #define DEFINE_PBTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm5, &branch_target}) + #define DEFINE_PR2ITYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &xrs2, &p_simm5}) DEFINE_XLOAD(lb) DEFINE_XLOAD(lbu) @@ -1373,6 +1374,7 @@ disassembler_t::disassembler_t(int xlen) DEFINE_R1TYPE(p_exthz); DEFINE_R1TYPE(p_extbs); DEFINE_R1TYPE(p_extbz); + DEFINE_R1TYPE(p_cnt); DEFINE_PI0TYPE(p_clip); DEFINE_PI0TYPE(p_clipu); DEFINE_RTYPE(p_clipr); @@ -1382,6 +1384,16 @@ disassembler_t::disassembler_t(int xlen) DEFINE_RTYPE(p_mac); DEFINE_RTYPE(p_msu); + // xpulpmacrnhi + DEFINE_PR2ITYPE(p_macuN); + DEFINE_PR2ITYPE(p_machhuN); + DEFINE_PR2ITYPE(p_macsN); + DEFINE_PR2ITYPE(p_machhsN); + DEFINE_PR2ITYPE(p_macuRN); + DEFINE_PR2ITYPE(p_machhuRN); + DEFINE_PR2ITYPE(p_macsRN); + DEFINE_PR2ITYPE(p_machhsRN); + DEFINE_RTYPE(pv_add_h); DEFINE_RTYPE(pv_add_sc_h); DEFINE_PI1STYPE(pv_add_sci_h); From 44a0ef237f73024562b2a964058c1389287cac45 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Thu, 4 Aug 2022 14:47:01 +0200 Subject: [PATCH 08/23] Implement all instructions in hwloop subset This implementation was only tested against the very simple example in the CV32E40P docs, so it might not always work properly. 1. Implement a hardware-loop unit in processor_t, add the CSRs to state_t (incl set_csr and get_csr handling) 2. Add the hw-loop unit to the cycle-execution loop in execute.cc 3. Add instructions to write to hwloop CSRs Currently not all constraints are checked yet (illegal instrs in body). --- disasm/disasm.cc | 26 ++++++++ riscv/decode.h | 5 +- riscv/execute.cc | 5 ++ riscv/insns/lp_count.h | 4 ++ riscv/insns/lp_counti.h | 4 ++ riscv/insns/lp_endi.h | 7 +++ riscv/insns/lp_setup.h | 9 +++ riscv/insns/lp_setupi.h | 9 +++ riscv/insns/lp_starti.h | 6 ++ riscv/processor.cc | 132 ++++++++++++++++++++++++++++++++++++++++ riscv/processor.h | 44 ++++++++++++++ riscv/riscv.mk.in | 1 + 12 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 riscv/insns/lp_count.h create mode 100644 riscv/insns/lp_counti.h create mode 100644 riscv/insns/lp_endi.h create mode 100644 riscv/insns/lp_setup.h create mode 100644 riscv/insns/lp_setupi.h create mode 100644 riscv/insns/lp_starti.h diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 405c40a70a..0ad45836c9 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -355,6 +355,25 @@ struct : public arg_t { // Xpulpimg +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_uimmL()); + } +} p_uimmL; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_uimmS()); + } +} p_uimmS; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_loop()); + } +} p_loop; + + struct : public arg_t { std::string to_string(insn_t insn) const { return std::to_string((uint32_t)insn.p_zimm5()); @@ -1394,6 +1413,13 @@ disassembler_t::disassembler_t(int xlen) DEFINE_PR2ITYPE(p_macsRN); DEFINE_PR2ITYPE(p_machhsRN); + // xpulphwloop + DISASM_INSN("lp_starti", lp_starti, 0, {&p_loop, &p_uimmL}); + DISASM_INSN("lp_endi", lp_endi, 0, {&p_loop, &p_uimmL}); + DISASM_INSN("lp_count", lp_count, 0, {&p_loop, &xrs1}); + DISASM_INSN("lp_setup", lp_setup, 0, {&p_loop, &xrs1, &p_uimmL}); + DISASM_INSN("lp_setupi", lp_setupi, 0, {&p_loop, &p_uimmL, &p_uimmS}); + DEFINE_RTYPE(pv_add_h); DEFINE_RTYPE(pv_add_sc_h); DEFINE_PI1STYPE(pv_add_sci_h); diff --git a/riscv/decode.h b/riscv/decode.h index 374539b93d..45640736b2 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -129,10 +129,13 @@ class insn_t uint64_t v_mew() { return x(28, 1); } // Xpulpimg + uint64_t p_uimmL() { return x(20, 12); } // [31:20] same bits as I-type imm12 + uint64_t p_uimmS() { return x(15, 5); } // [19:15] same bits as RS1 + uint64_t p_loop() { return x(7, 1); } // [7] called 'L' in the docs uint64_t p_Luimm5() { return x(25, 5); } // [29:25] uint64_t p_zimm5() { return x(20, 5); } // [24:20] int64_t p_simm5() { return xs(20, 5); } // [24:20] - uint64_t p_rs3() { return x(7, 5); } // [11:07] + uint64_t p_rs3() { return x(7, 5); } // [11:07] alias for RD uint64_t p_zimm6() { return x(25,1) + (x(20, 5) << 1); } int64_t p_simm6() { return x(25,1) + (xs(20, 5) << 1); } diff --git a/riscv/execute.cc b/riscv/execute.cc index 7313c512e9..84c6d91071 100644 --- a/riscv/execute.cc +++ b/riscv/execute.cc @@ -273,6 +273,7 @@ void processor_t::step(size_t n) if (debug && !state.serialized) disasm(fetch.insn); pc = execute_insn(this, pc, fetch); + pc = this->hwLoops.handle_loops(state.pc, pc, fetch.insn); advance_pc(); } } @@ -305,9 +306,13 @@ void processor_t::step(size_t n) // This macro is included in "icache.h" included within the switch // statement below. The indirect jump corresponding to the instruction // is located within the execute_insn() function call. + + // Todo: Is it a good idea to add hwloops here or is forcing slow-path better? + // trade-off between speed of hwloops and speed of everything else #define ICACHE_ACCESS(i) { \ insn_fetch_t fetch = ic_entry->data; \ pc = execute_insn(this, pc, fetch); \ + pc = this->hwLoops.handle_loops(state.pc, pc, fetch.insn); \ ic_entry = ic_entry->next; \ if (i == mmu_t::ICACHE_ENTRIES-1) break; \ if (unlikely(ic_entry->tag != pc)) break; \ diff --git a/riscv/insns/lp_count.h b/riscv/insns/lp_count.h new file mode 100644 index 0000000000..99c8099882 --- /dev/null +++ b/riscv/insns/lp_count.h @@ -0,0 +1,4 @@ +reg_t num_iter = zext_xlen(RS1); +bool loopNr = insn.p_loop(); + +p->hwLoops.set_count(loopNr, num_iter); diff --git a/riscv/insns/lp_counti.h b/riscv/insns/lp_counti.h new file mode 100644 index 0000000000..eb18d69b1f --- /dev/null +++ b/riscv/insns/lp_counti.h @@ -0,0 +1,4 @@ +reg_t num_iter = insn.p_uimmL(); +bool loopNr = insn.p_loop(); + +p->hwLoops.set_count(loopNr, num_iter); diff --git a/riscv/insns/lp_endi.h b/riscv/insns/lp_endi.h new file mode 100644 index 0000000000..349777c30f --- /dev/null +++ b/riscv/insns/lp_endi.h @@ -0,0 +1,7 @@ +reg_t offset = insn.p_uimmL() << 1; +bool loopNr = insn.p_loop(); + +reg_t end_addr = pc + offset; + +p->hwLoops.set_end(loopNr, end_addr); + diff --git a/riscv/insns/lp_setup.h b/riscv/insns/lp_setup.h new file mode 100644 index 0000000000..423e41d429 --- /dev/null +++ b/riscv/insns/lp_setup.h @@ -0,0 +1,9 @@ +reg_t num_iter = zext_xlen(RS1); +bool loopNr = insn.p_loop(); + +reg_t start_addr = npc; // next pc (pc+4) +reg_t end_addr = pc + (insn.p_uimmS() << 1); + +p->hwLoops.set_end(loopNr, end_addr); +p->hwLoops.set_start(loopNr, start_addr); +p->hwLoops.set_count(loopNr, num_iter); diff --git a/riscv/insns/lp_setupi.h b/riscv/insns/lp_setupi.h new file mode 100644 index 0000000000..9978fa68c9 --- /dev/null +++ b/riscv/insns/lp_setupi.h @@ -0,0 +1,9 @@ +reg_t num_iter = insn.p_uimmL(); +bool loopNr = insn.p_loop(); + +reg_t start_addr = npc; // next pc (pc+4) +reg_t end_addr = pc + (insn.p_uimmS() << 1); + +p->hwLoops.set_end(loopNr, end_addr); +p->hwLoops.set_start(loopNr, start_addr); +p->hwLoops.set_count(loopNr, num_iter); diff --git a/riscv/insns/lp_starti.h b/riscv/insns/lp_starti.h new file mode 100644 index 0000000000..4ef2db1fb2 --- /dev/null +++ b/riscv/insns/lp_starti.h @@ -0,0 +1,6 @@ +reg_t offset = insn.p_uimmL() << 1; +bool loopNr = insn.p_loop(); + +reg_t start_addr = pc + offset; + +p->hwLoops.set_start(loopNr, start_addr); diff --git a/riscv/processor.cc b/riscv/processor.cc index 84be372a9a..a3b607fd1c 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -31,6 +31,11 @@ processor_t::processor_t(const char* isa, const char* priv, const char* varch, { VU.p = this; + // Todo: check for subset, not instruction in subset + #ifdef MATCH_LP_SETUPI + hwLoops.p = this; + #endif + parse_isa_string(isa); parse_priv_string(priv); parse_varch_string(varch); @@ -1219,6 +1224,25 @@ void processor_t::set_csr(int which, reg_t val) dirty_vs_state; VU.vxrm = val & 0x3ul; break; + // xpulphwloop + case CSR_LPSTART0: + hwLoops.set_start(0, val); + break; + case CSR_LPEND0: + hwLoops.set_end(0, val); + break; + case CSR_LPCOUNT0: + hwLoops.set_count(0, val); + break; + case CSR_LPSTART1: + hwLoops.set_start(1, val); + break; + case CSR_LPEND1: + hwLoops.set_end(1, val); + break; + case CSR_LPCOUNT1: + hwLoops.set_count(1, val); + break; } #if defined(RISCV_ENABLE_COMMITLOG) @@ -1640,6 +1664,25 @@ reg_t processor_t::get_csr(int which, insn_t insn, bool write, bool peek) if (!supports_extension('V')) break; ret(VU.vlenb); + // xpulphwloop + case CSR_LPSTART0: + ret(state.lpstart0); + break; + case CSR_LPEND0: + ret(state.lpend0); + break; + case CSR_LPCOUNT0: + ret(state.lpcount0); + break; + case CSR_LPSTART1: + ret(state.lpstart1); + break; + case CSR_LPEND1: + ret(state.lpend1); + break; + case CSR_LPCOUNT1: + ret(state.lpcount1); + break; } #undef ret @@ -1813,3 +1856,92 @@ void processor_t::trigger_updated() } } } + + +// PULP HW-Loop extension (xpulphwloop) + +// sets activate flag for loops and overall unit +void processor_t::hwLoopUnit_t::set_active(int i) +{ + bool valid_body = get_start(i) < get_end(i); + lp_active[i] = valid_body && (get_count(i) > 0); + any_active |= lp_active[i]; + + // Constraint: loop body must be at least 3 instructions + if(lp_active[i] && (get_start(i) +8 > get_end(i))) { + throw trap_illegal_instruction(0); + } +} + +// CSR write methods with the side-effects (set activate, check constraints) +void processor_t::hwLoopUnit_t::set_start(int loopNr, reg_t val) { + if(loopNr) { + p->state.lpstart1 = val; + } else { + p->state.lpstart0 = val; + } + set_active(loopNr); +} + +void processor_t::hwLoopUnit_t::set_end(int loopNr, reg_t val) { + if(loopNr) { + p->state.lpend1 = val; + } else { + p->state.lpend0 = val; + } + // Constraint: outer-end must be at least 2 instructions after inner-end + if(p->state.lpend0 +8 > p->state.lpend1) { + throw trap_illegal_instruction(0); + } + set_active(loopNr); +} + +void processor_t::hwLoopUnit_t::set_count(int loopNr, reg_t val) { + if(loopNr) { + p->state.lpcount1 = val; + } else { + p->state.lpcount0 = val; + } + set_active(loopNr); +} + +// Executed after execution of instruction +// pc: current pc (state.pc) +// npc: next pc (from instruction) +// returns next pc (can be modifed due to hw-loop) +reg_t processor_t::hwLoopUnit_t::handle_loops(reg_t pc, reg_t npc, insn_t insn) +{ + // immediately break if inactive as to not degrade performance + if(!any_active) { + return npc; + } + + for(int i=0; i<2; i++) { + // active and in body + if(lp_active[i] && get_start(i) <= pc && pc <= get_end(i)) { + // Constraints: if not met -> throw trap_illegal_instruction(insn.bits()) + // no compressed instructions + if(insn.length() < 4) { + throw trap_illegal_instruction(insn.bits()); + } + // Todo: Finish constraint checks + // no unconditional jumps + // no conditional branching + // no priviliged instructions except ebreak + // no memory ordering (fence) instr + + if(pc == get_end(i)) { + reg_t remaining = get_count(i) -1; + set_count(i, remaining); + lp_active[i] &= (remaining > 0); + any_active = lp_active[0] || lp_active[1]; + + if(lp_active[i]) { + return get_start(i); + } + } + } + } + + return npc; +} diff --git a/riscv/processor.h b/riscv/processor.h index 87df69f593..70f54bed71 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -213,6 +213,15 @@ struct state_t uint32_t fflags; uint32_t frm; + + // xpulphwloop + reg_t lpstart0; + reg_t lpend0; + reg_t lpcount0; + reg_t lpstart1; + reg_t lpend1; + reg_t lpcount1; + bool serialized; // whether timer CSRs are in a well-defined state // When true, execute a single instruction and then enter debug mode. This @@ -525,6 +534,41 @@ class processor_t : public abstract_device_t }; vectorUnit_t VU; + + + // PULP HW-Loop extension (xpulphwloop) + class hwLoopUnit_t { + public: + processor_t* p; + + bool lp_active[2]; + bool any_active; // set if any group of count, start and end are valid + + hwLoopUnit_t() : + p(0) { + any_active = false; + } + + void set_active(int i); // handles exceptions and sets active-flags + + reg_t handle_loops(reg_t pc, reg_t npc, insn_t insn); // returns npc + + // Control and Status Register access (callable by instructions) + // It woul also be possible to use p->get_csr here but since + // hwloop doesn't have side effect this is fine and faster + reg_t get_start(int lpNr) { return (lpNr) ? p->state.lpstart1 : p->state.lpstart0; } + reg_t get_end(int lpNr) { return (lpNr) ? p->state.lpend1 : p->state.lpend0; } + reg_t get_count(int lpNr) { return (lpNr) ? p->state.lpcount1 : p->state.lpcount0; } + + // also used in set_csr() to make sure csr-write commands also + // check for constraints and throw exception + // setters take insn to throw illegal_instruction + void set_start(int loopNr, reg_t val); + void set_end(int loopNr, reg_t val); + void set_count(int loopNr, reg_t val); + }; + hwLoopUnit_t hwLoops; + }; reg_t illegal_instruction(processor_t* p, insn_t insn, reg_t pc); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 842ab4d8d9..0d8ae891f7 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1046,6 +1046,7 @@ riscv_insn_ext_pulpbitrev = \ riscv_insn_ext_pulpimg = \ $(riscv_insn_ext_pulppostmod) \ + $(riscv_insn_ext_pulphwloop) \ $(riscv_insn_ext_pulpabs) \ $(riscv_insn_ext_pulpslet) \ $(riscv_insn_ext_pulpmacsi) \ From 3ec9a9e7745a72dbb75d57b4e3f2b2f56759c2a5 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Fri, 5 Aug 2022 13:16:32 +0200 Subject: [PATCH 09/23] Fix signedness issue in hwloop instructions --- riscv/insns/lp_counti.h | 2 +- riscv/insns/lp_endi.h | 3 +-- riscv/insns/lp_setup.h | 3 ++- riscv/insns/lp_setupi.h | 5 +++-- riscv/insns/lp_starti.h | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/riscv/insns/lp_counti.h b/riscv/insns/lp_counti.h index eb18d69b1f..6c78ceaa89 100644 --- a/riscv/insns/lp_counti.h +++ b/riscv/insns/lp_counti.h @@ -1,4 +1,4 @@ -reg_t num_iter = insn.p_uimmL(); +reg_t num_iter = zext_xlen(insn.p_uimmL()); bool loopNr = insn.p_loop(); p->hwLoops.set_count(loopNr, num_iter); diff --git a/riscv/insns/lp_endi.h b/riscv/insns/lp_endi.h index 349777c30f..74d155f95d 100644 --- a/riscv/insns/lp_endi.h +++ b/riscv/insns/lp_endi.h @@ -1,7 +1,6 @@ -reg_t offset = insn.p_uimmL() << 1; +reg_t offset = zext_xlen(insn.p_uimmL() << 1); bool loopNr = insn.p_loop(); reg_t end_addr = pc + offset; p->hwLoops.set_end(loopNr, end_addr); - diff --git a/riscv/insns/lp_setup.h b/riscv/insns/lp_setup.h index 423e41d429..2c29a1f4ca 100644 --- a/riscv/insns/lp_setup.h +++ b/riscv/insns/lp_setup.h @@ -1,8 +1,9 @@ reg_t num_iter = zext_xlen(RS1); +reg_t offset = zext_xlen(insn.p_uimmL() << 1); bool loopNr = insn.p_loop(); reg_t start_addr = npc; // next pc (pc+4) -reg_t end_addr = pc + (insn.p_uimmS() << 1); +reg_t end_addr = pc + offset; p->hwLoops.set_end(loopNr, end_addr); p->hwLoops.set_start(loopNr, start_addr); diff --git a/riscv/insns/lp_setupi.h b/riscv/insns/lp_setupi.h index 9978fa68c9..f824b7bcf4 100644 --- a/riscv/insns/lp_setupi.h +++ b/riscv/insns/lp_setupi.h @@ -1,8 +1,9 @@ -reg_t num_iter = insn.p_uimmL(); +reg_t num_iter = zext_xlen(insn.p_uimmL()); +reg_t offset = zext_xlen(insn.p_uimmS() << 1); bool loopNr = insn.p_loop(); reg_t start_addr = npc; // next pc (pc+4) -reg_t end_addr = pc + (insn.p_uimmS() << 1); +reg_t end_addr = pc + offset; p->hwLoops.set_end(loopNr, end_addr); p->hwLoops.set_start(loopNr, start_addr); diff --git a/riscv/insns/lp_starti.h b/riscv/insns/lp_starti.h index 4ef2db1fb2..09b3acd96f 100644 --- a/riscv/insns/lp_starti.h +++ b/riscv/insns/lp_starti.h @@ -1,4 +1,4 @@ -reg_t offset = insn.p_uimmL() << 1; +reg_t offset = zext_xlen(insn.p_uimmL() << 1); bool loopNr = insn.p_loop(); reg_t start_addr = pc + offset; From faf17581fcb3c8d0247573e73c2698b96278b141 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Fri, 5 Aug 2022 13:37:34 +0200 Subject: [PATCH 10/23] Add missing disassembly for lp.counti --- disasm/disasm.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 0ad45836c9..eaa296682b 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -1416,6 +1416,7 @@ disassembler_t::disassembler_t(int xlen) // xpulphwloop DISASM_INSN("lp_starti", lp_starti, 0, {&p_loop, &p_uimmL}); DISASM_INSN("lp_endi", lp_endi, 0, {&p_loop, &p_uimmL}); + DISASM_INSN("lp_counti", lp_counti, 0, {&p_loop, &p_uimmL}); DISASM_INSN("lp_count", lp_count, 0, {&p_loop, &xrs1}); DISASM_INSN("lp_setup", lp_setup, 0, {&p_loop, &xrs1, &p_uimmL}); DISASM_INSN("lp_setupi", lp_setupi, 0, {&p_loop, &p_uimmL, &p_uimmS}); From e0727685ae72e9ff29473e37f5748dd64afcdcc2 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Fri, 5 Aug 2022 16:03:58 +0200 Subject: [PATCH 11/23] Remove ifdef guards for hwloop The guards are not consistently used across all relevant code. This might be a project for the future, a better guard condition is also needed (directly conditioned on the presence of the extension, not on an instruction in it). --- riscv/processor.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/riscv/processor.cc b/riscv/processor.cc index a3b607fd1c..b601f1fbf7 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -31,10 +31,7 @@ processor_t::processor_t(const char* isa, const char* priv, const char* varch, { VU.p = this; - // Todo: check for subset, not instruction in subset - #ifdef MATCH_LP_SETUPI - hwLoops.p = this; - #endif + hwLoops.p = this; parse_isa_string(isa); parse_priv_string(priv); From a0b9951ee7a62fa994f8d0347fc767654192cd78 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Mon, 8 Aug 2022 12:23:29 +0200 Subject: [PATCH 12/23] Fix vector signed/zero-extend macros Now you can pass expressions without violating order-of-operation. --- riscv/decode.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/riscv/decode.h b/riscv/decode.h index 45640736b2..f28f592488 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -404,8 +404,8 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r)); #define e512 512 // 512b elements #define e1024 1024 // 1024b elements -#define vsext(x, sew) (((sreg_t)(x) << (64-sew)) >> (64-sew)) -#define vzext(x, sew) (((reg_t)(x) << (64-sew)) >> (64-sew)) +#define vsext(x, sew) ( ((sreg_t)(x) << (64-(sew))) >> (64-(sew)) ) +#define vzext(x, sew) ( ((reg_t)(x) << (64-(sew))) >> (64-(sew)) ) #define DEBUG_RVV 0 From 32e642aa0834a3f609c7b1a20a3114ee4c644ea8 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Mon, 8 Aug 2022 12:32:52 +0200 Subject: [PATCH 13/23] Implement all instructions in bitop subset - Add disassembly format - Add signed/zero-extend and extract macro to easily access a range of bits in a register - Implement all instructions (if possible using gcc builtins) - Add set properly to make This implementation is tested against automatically generated tests. --- disasm/disasm.cc | 28 +++++++++++++++++++++++----- riscv/decode.h | 3 +++ riscv/insns/p_bclr.h | 8 ++++++++ riscv/insns/p_bclrr.h | 8 ++++++++ riscv/insns/p_bset.h | 8 ++++++++ riscv/insns/p_bsetr.h | 8 ++++++++ riscv/insns/p_clb.h | 34 ++++++++++++++++++++++++++++++++++ riscv/insns/p_cnt.h | 15 ++++++++++----- riscv/insns/p_extract.h | 8 ++++++++ riscv/insns/p_extractr.h | 8 ++++++++ riscv/insns/p_extractu.h | 8 ++++++++ riscv/insns/p_extractur.h | 8 ++++++++ riscv/insns/p_ff1.h | 23 +++++++++++++++++++++++ riscv/insns/p_fl1.h | 23 +++++++++++++++++++++++ riscv/insns/p_insert.h | 19 +++++++++++++++++++ riscv/insns/p_insertr.h | 19 +++++++++++++++++++ riscv/insns/p_ror.h | 6 ++++++ riscv/riscv.mk.in | 38 +++++++++++++++++--------------------- 18 files changed, 241 insertions(+), 31 deletions(-) create mode 100644 riscv/insns/p_bclr.h create mode 100644 riscv/insns/p_bclrr.h create mode 100644 riscv/insns/p_bset.h create mode 100644 riscv/insns/p_bsetr.h create mode 100644 riscv/insns/p_clb.h create mode 100644 riscv/insns/p_extract.h create mode 100644 riscv/insns/p_extractr.h create mode 100644 riscv/insns/p_extractu.h create mode 100644 riscv/insns/p_extractur.h create mode 100644 riscv/insns/p_ff1.h create mode 100644 riscv/insns/p_fl1.h create mode 100644 riscv/insns/p_insert.h create mode 100644 riscv/insns/p_insertr.h create mode 100644 riscv/insns/p_ror.h diff --git a/disasm/disasm.cc b/disasm/disasm.cc index eaa296682b..3fb768ccec 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -511,6 +511,7 @@ disassembler_t::disassembler_t(int xlen) #define DEFINE_PI0TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm5}) #define DEFINE_PI1ZTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm6}) #define DEFINE_PI1STYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm6}) + #define DEFINE_PI2TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm5, &p_zimm5}) #define DEFINE_PBTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm5, &branch_target}) #define DEFINE_PR2ITYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &xrs2, &p_simm5}) @@ -1389,11 +1390,6 @@ disassembler_t::disassembler_t(int xlen) DEFINE_RTYPE(p_minu); DEFINE_RTYPE(p_max); DEFINE_RTYPE(p_maxu); - DEFINE_R1TYPE(p_exths); - DEFINE_R1TYPE(p_exthz); - DEFINE_R1TYPE(p_extbs); - DEFINE_R1TYPE(p_extbz); - DEFINE_R1TYPE(p_cnt); DEFINE_PI0TYPE(p_clip); DEFINE_PI0TYPE(p_clipu); DEFINE_RTYPE(p_clipr); @@ -1403,6 +1399,28 @@ disassembler_t::disassembler_t(int xlen) DEFINE_RTYPE(p_mac); DEFINE_RTYPE(p_msu); + // xpulpbitop + DEFINE_PI2TYPE(p_extract); + DEFINE_RTYPE(p_extractr); + DEFINE_PI2TYPE(p_extractu); + DEFINE_RTYPE(p_extractur); + DEFINE_PI2TYPE(p_insert); + DEFINE_RTYPE(p_insertr); + DEFINE_PI2TYPE(p_bset); + DEFINE_RTYPE(p_bsetr); + DEFINE_PI2TYPE(p_bclr); + DEFINE_RTYPE(p_bclrr); + // xpulpbitopsmall (subset of xpulpbitop) + DEFINE_R1TYPE(p_exths); + DEFINE_R1TYPE(p_exthz); + DEFINE_R1TYPE(p_extbs); + DEFINE_R1TYPE(p_extbz); + DEFINE_RTYPE(p_ror); + DEFINE_R1TYPE(p_ff1); + DEFINE_R1TYPE(p_fl1); + DEFINE_R1TYPE(p_clb); + DEFINE_R1TYPE(p_cnt); + // xpulpmacrnhi DEFINE_PR2ITYPE(p_macuN); DEFINE_PR2ITYPE(p_machhuN); diff --git a/riscv/decode.h b/riscv/decode.h index f28f592488..42100503a5 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -292,6 +292,9 @@ class regfile_t #define sext8(x) ((sreg_t)(int8_t)(x)) #define zext8(x) ((reg_t)(uint8_t)(x)) +#define sextr(x, u, l) ( (sreg_t)( ((sreg_t)x) << (63-(u)) >> ((l)+63-(u)) ) ) // sext(x[u:l]) +#define zextr(x, u, l) ((reg_t)( ((x) >> l) & ( (1 << ((u)-(l)+1))-1 ) )) // zext(x[u:l]) + #define P_RS3 READ_REG(insn.p_rs3()) /* same as RD, just different semantical value */ #define WRITE_RS1(value) WRITE_REG(insn.rs1(), value) diff --git a/riscv/insns/p_bclr.h b/riscv/insns/p_bclr.h new file mode 100644 index 0000000000..b8244e6fc7 --- /dev/null +++ b/riscv/insns/p_bclr.h @@ -0,0 +1,8 @@ +sreg_t val = sext_xlen(RS1); +reg_t first = insn.p_zimm5(); +reg_t upto = insn.p_Luimm5(); + +reg_t bit_mask = ( (((reg_t)1< rd = 0, not 32 + } +#elif + if(val == 0) { + cnt = 0; // rs1 = 0 -> rd = 0, not 32 + } else { + if(val > 0) { + // turn leading 0s into leading 1s + val = ~val; + } + + // log2() from standfords bithacks (find highest '1') + while (val >>= 1) + { + cnt++; + } + } + +#endif + +std::cout << "bin: " << std::hex << (__builtin_clrsb(val)) << std::endl; +std::cout << "val: " << std::hex << val << std::endl; +std::cout << "cnt: " << cnt << std::endl; + + +WRITE_RD(cnt); diff --git a/riscv/insns/p_cnt.h b/riscv/insns/p_cnt.h index f19906934b..e3b97e0e1f 100644 --- a/riscv/insns/p_cnt.h +++ b/riscv/insns/p_cnt.h @@ -1,8 +1,13 @@ reg_t val = zext_xlen(RS1); -reg_t cnt = 0; -for(cnt = 0; val != 0x00; ++cnt) -{ - val &= val - 1; -} +#ifdef __GNUC__ + // "Returns the number of 1-bits in x." + reg_t cnt = __builtin_popcount(val); +#elif + reg_t cnt = 0; + for(cnt = 0; val != 0x00; ++cnt) + { + val &= val - 1; + } +#endif WRITE_RD(cnt); diff --git a/riscv/insns/p_extract.h b/riscv/insns/p_extract.h new file mode 100644 index 0000000000..c45ffde0a8 --- /dev/null +++ b/riscv/insns/p_extract.h @@ -0,0 +1,8 @@ +sreg_t val = sext_xlen(RS1); +reg_t first = insn.p_zimm5(); +reg_t upto = insn.p_Luimm5(); + +sreg_t res = val >> first; +res = vsext(res, upto+1); + +WRITE_RD(res); diff --git a/riscv/insns/p_extractr.h b/riscv/insns/p_extractr.h new file mode 100644 index 0000000000..db131ee423 --- /dev/null +++ b/riscv/insns/p_extractr.h @@ -0,0 +1,8 @@ +sreg_t val = sext_xlen(RS1); +reg_t first = zextr(RS2, 4, 0); // rs1[4:0] +reg_t upto = zextr(RS2, 9, 5); // rs1[9:5] + +sreg_t res = val >> first; +res = vsext(res, upto+1); + +WRITE_RD(res); diff --git a/riscv/insns/p_extractu.h b/riscv/insns/p_extractu.h new file mode 100644 index 0000000000..60b5f78870 --- /dev/null +++ b/riscv/insns/p_extractu.h @@ -0,0 +1,8 @@ +sreg_t val = zext_xlen(RS1); +reg_t first = insn.p_zimm5(); +reg_t upto = insn.p_Luimm5(); + +sreg_t res = val >> first; +res = vzext(res, upto+1); + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_extractur.h b/riscv/insns/p_extractur.h new file mode 100644 index 0000000000..eb30730b7c --- /dev/null +++ b/riscv/insns/p_extractur.h @@ -0,0 +1,8 @@ +sreg_t val = zext_xlen(RS1); +reg_t first = zextr(RS2, 4, 0); // rs1[4:0] +reg_t upto = zextr(RS2, 9, 5); // rs1[9:5] + +sreg_t res = val >> first; +res = vzext(res, upto+1); + +WRITE_RD(sext_xlen(res)); \ No newline at end of file diff --git a/riscv/insns/p_ff1.h b/riscv/insns/p_ff1.h new file mode 100644 index 0000000000..c4777432fd --- /dev/null +++ b/riscv/insns/p_ff1.h @@ -0,0 +1,23 @@ +reg_t val = zext_xlen(RS1); +reg_t cnt; + +#ifdef __GNUC__ + // "Returns one plus the index of the least significant 1-bit of x, or if x is zero, returns zero. " + cnt = __builtin_ffs(val); + if(cnt == 0) { + cnt = 32; // rs = 0 -> rd = 32 + } else { + cnt--; + } +#elif + cnt = 32; + val &= -signed(val); + if (val) c--; + if (val & 0x0000FFFF) cnt -= 16; + if (val & 0x00FF00FF) cnt -= 8; + if (val & 0x0F0F0F0F) cnt -= 4; + if (val & 0x33333333) cnt -= 2; + if (val & 0x55555555) cnt -= 1; +#endif + +WRITE_RD(cnt); diff --git a/riscv/insns/p_fl1.h b/riscv/insns/p_fl1.h new file mode 100644 index 0000000000..7250991f49 --- /dev/null +++ b/riscv/insns/p_fl1.h @@ -0,0 +1,23 @@ +reg_t val = zext_xlen(RS1);; +reg_t cnt; + +#ifdef __GNUC__ + if(val) { + // "Returns the number of leading 0-bits in x" x=0 -> undef + cnt = 31 - __builtin_clz(val); + } else { + cnt = 32; + } +#elif + if(val == 0) { + cnt = 32; // rs1 = 0 -> rd = 0, not 32 + } else { + // log2() from standfords bithacks (find highest '1') + while (val >>= 1) + { + cnt++; + } + } +#endif + +WRITE_RD(cnt); diff --git a/riscv/insns/p_insert.h b/riscv/insns/p_insert.h new file mode 100644 index 0000000000..7989718553 --- /dev/null +++ b/riscv/insns/p_insert.h @@ -0,0 +1,19 @@ +sreg_t res = RD; +sreg_t val = zext_xlen(RS1); +reg_t first = insn.p_zimm5(); +reg_t upto = insn.p_Luimm5(); + +int offset = first+upto-31; +if(offset < 0) { + offset = 0; +} + +reg_t bit_mask = ( (((reg_t)1<> rot) | (((val << 32) >> rot)); + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 0d8ae891f7..2da70598fc 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -840,33 +840,29 @@ riscv_insn_ext_pulpminmax = \ p_minu \ p_maxu \ -riscv_insn_ext_pulpbitop = \ +riscv_insn_ext_pulpbitopsmall = \ p_cnt \ + p_clb \ + p_fl1 \ + p_ff1 \ + p_ror \ p_exths \ p_exthz \ p_extbs \ p_extbz \ -# riscv_insn_ext_pulpbitop = \ -# p_extract \ -# p_extractr \ -# p_extractu \ -# p_extractur \ -# p_insert \ -# p_insertr \ -# p_bset \ -# p_bsetr \ -# p_bclr \ -# p_bclrr \ -# p_cnt \ -# p_clb \ -# p_fl1 \ -# p_ff1 \ -# p_ror \ -# p_exths \ -# p_exthz \ -# p_extbs \ -# p_extbz \ +riscv_insn_ext_pulpbitop = \ + $(riscv_insn_ext_pulpbitopsmall) \ + p_extract \ + p_extractr \ + p_extractu \ + p_extractur \ + p_insert \ + p_insertr \ + p_bset \ + p_bsetr \ + p_bclr \ + p_bclrr \ riscv_insn_ext_pulpvect = \ pv_add_h \ From 1d143a558f58b96383a5ab2f9458bb007669c984 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Mon, 8 Aug 2022 13:03:42 +0200 Subject: [PATCH 14/23] Remove debug output from p.clb (bitop instr) I removed it everywhere else but didn't catch this one. --- riscv/insns/p_clb.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/riscv/insns/p_clb.h b/riscv/insns/p_clb.h index bf5672d1b6..b53d0dc5b9 100644 --- a/riscv/insns/p_clb.h +++ b/riscv/insns/p_clb.h @@ -26,9 +26,4 @@ reg_t cnt; #endif -std::cout << "bin: " << std::hex << (__builtin_clrsb(val)) << std::endl; -std::cout << "val: " << std::hex << val << std::endl; -std::cout << "cnt: " << cnt << std::endl; - - WRITE_RD(cnt); From 59260085124f3807ddf5d4a2c7fffe36cc0d612f Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Mon, 8 Aug 2022 18:04:29 +0200 Subject: [PATCH 15/23] Implement bitrev subset/instruction I can't run the test (can't compile it since xpulpbitrev is not an extension in gcc), therefore this implementation is experimental. I did however compile the exact same code separately and run it against the examples from the docs, so if it fails it is likely in an edge-case. --- disasm/disasm.cc | 3 +++ riscv/insns/p_bitrev.h | 43 ++++++++++++++++++++++++++++++++++++++++++ riscv/riscv.mk.in | 1 + 3 files changed, 47 insertions(+) create mode 100644 riscv/insns/p_bitrev.h diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 3fb768ccec..09a2fb2f27 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -1421,6 +1421,9 @@ disassembler_t::disassembler_t(int xlen) DEFINE_R1TYPE(p_clb); DEFINE_R1TYPE(p_cnt); + // xpulpbitrev + DEFINE_PI2TYPE(p_bitrev); + // xpulpmacrnhi DEFINE_PR2ITYPE(p_macuN); DEFINE_PR2ITYPE(p_machhuN); diff --git a/riscv/insns/p_bitrev.h b/riscv/insns/p_bitrev.h new file mode 100644 index 0000000000..7eae6c07ee --- /dev/null +++ b/riscv/insns/p_bitrev.h @@ -0,0 +1,43 @@ +reg_t val = zext_xlen(RS1); +reg_t shift = insn.p_zimm5(); +reg_t group = insn.p_Luimm5(); +reg_t res; + +// using method from: +// https://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64BitsDiv +// with 6bits per block, padded to 8bit spacing +// groups of 2 and 3 bits could be done faster (12bit blocks) + +const reg_t mult_masks[3] = {0x208208, 0x1001, 0x2008}; +const reg_t and_masks[3] = {0x2240910, 0x3300C, 0x70038}; + +val <<= shift; + +// handle two LSB bits +switch(group) +{ + case 0: // res[1:0] = val[0:1] + res = (val>>1)%2; + res |= (val<<1)%4; + break; + case 1: // res[1:0] = val[1:0] + res = val%4; + break; + default: + res = 0; + break; +} +val >>=2; // remove LSBs from val (prep first block) + + +reg_t temp; +for(int i=0; i<5; i++) +{ + res <<=6; // shift finished part to safety + temp = (val & 0x3F) * mult_masks[group]; // create copies + temp = temp & and_masks[group]; // select bits from copies + res |= (temp % 255); // collapse selected bits together + val >>=6; // prep next block +} + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 2da70598fc..8f09b558ed 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1048,6 +1048,7 @@ riscv_insn_ext_pulpimg = \ $(riscv_insn_ext_pulpmacsi) \ $(riscv_insn_ext_pulpmacrnhi) \ $(riscv_insn_ext_pulpminmax) \ + $(riscv_insn_ext_pulpbitrev) \ $(riscv_insn_ext_pulpbitop) \ $(riscv_insn_ext_pulpvect) \ $(riscv_insn_ext_pulpclip) \ From 6bb6ce75a83806be88991e5331c8e488fbc65ff9 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 9 Aug 2022 10:24:30 +0200 Subject: [PATCH 16/23] Implement all instructions in macrnhi subset - Add disassembly format - Implement all instructions - Add it to make This implementation is tested against generated tests. --- disasm/disasm.cc | 10 ++++++++++ riscv/insns/p_mulhhsN.h | 7 +++++++ riscv/insns/p_mulhhsRN.h | 8 ++++++++ riscv/insns/p_mulhhuN.h | 9 +++++++++ riscv/insns/p_mulhhuRN.h | 10 ++++++++++ riscv/insns/p_mulsN.h | 7 +++++++ riscv/insns/p_mulsRN.h | 8 ++++++++ riscv/insns/p_muluN.h | 9 +++++++++ riscv/insns/p_muluRN.h | 10 ++++++++++ riscv/riscv.mk.in | 5 +---- 10 files changed, 79 insertions(+), 4 deletions(-) create mode 100644 riscv/insns/p_mulhhsN.h create mode 100644 riscv/insns/p_mulhhsRN.h create mode 100644 riscv/insns/p_mulhhuN.h create mode 100644 riscv/insns/p_mulhhuRN.h create mode 100644 riscv/insns/p_mulsN.h create mode 100644 riscv/insns/p_mulsRN.h create mode 100644 riscv/insns/p_muluN.h create mode 100644 riscv/insns/p_muluRN.h diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 09a2fb2f27..71b1645e05 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -1424,6 +1424,16 @@ disassembler_t::disassembler_t(int xlen) // xpulpbitrev DEFINE_PI2TYPE(p_bitrev); + // xpulpmulrnhi + DEFINE_PR2ITYPE(p_muluN); + DEFINE_PR2ITYPE(p_mulhhuN); + DEFINE_PR2ITYPE(p_mulsN); + DEFINE_PR2ITYPE(p_mulhhsN); + DEFINE_PR2ITYPE(p_muluRN); + DEFINE_PR2ITYPE(p_mulhhuRN); + DEFINE_PR2ITYPE(p_mulsRN); + DEFINE_PR2ITYPE(p_mulhhsRN); + // xpulpmacrnhi DEFINE_PR2ITYPE(p_macuN); DEFINE_PR2ITYPE(p_machhuN); diff --git a/riscv/insns/p_mulhhsN.h b/riscv/insns/p_mulhhsN.h new file mode 100644 index 0000000000..e7cb59b6d1 --- /dev/null +++ b/riscv/insns/p_mulhhsN.h @@ -0,0 +1,7 @@ +sreg_t mul1 = sext16(RS1_H(1)); +sreg_t mul2 = sext16(RS2_H(1)); +int norm = insn.p_Luimm5(); + +sreg_t res = (mul1 * mul2) >> norm; + +WRITE_RD(sext_xlen(res)); \ No newline at end of file diff --git a/riscv/insns/p_mulhhsRN.h b/riscv/insns/p_mulhhsRN.h new file mode 100644 index 0000000000..a5ca521559 --- /dev/null +++ b/riscv/insns/p_mulhhsRN.h @@ -0,0 +1,8 @@ +sreg_t mul1 = sext16(RS1_H(1)); +sreg_t mul2 = sext16(RS2_H(1)); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +sreg_t res = ((mul1 * mul2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_mulhhuN.h b/riscv/insns/p_mulhhuN.h new file mode 100644 index 0000000000..5c1a4b38fa --- /dev/null +++ b/riscv/insns/p_mulhhuN.h @@ -0,0 +1,9 @@ +reg_t mul1 = zext16(RS1_H(1)); +reg_t mul2 = zext16(RS2_H(1)); +int norm = insn.p_Luimm5(); + +reg_t res = (mul1 * mul2) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_mulhhuRN.h b/riscv/insns/p_mulhhuRN.h new file mode 100644 index 0000000000..c51bfbcf0e --- /dev/null +++ b/riscv/insns/p_mulhhuRN.h @@ -0,0 +1,10 @@ +reg_t mul1 = zext16(RS1_H(1)); +reg_t mul2 = zext16(RS2_H(1)); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +reg_t res = ((mul1 * mul2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_mulsN.h b/riscv/insns/p_mulsN.h new file mode 100644 index 0000000000..f1cbb96194 --- /dev/null +++ b/riscv/insns/p_mulsN.h @@ -0,0 +1,7 @@ +sreg_t mul1 = sext16(RS1_H(0)); +sreg_t mul2 = sext16(RS2_H(0)); +int norm = insn.p_Luimm5(); + +sreg_t res = (mul1 * mul2) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_mulsRN.h b/riscv/insns/p_mulsRN.h new file mode 100644 index 0000000000..d7dfccdd05 --- /dev/null +++ b/riscv/insns/p_mulsRN.h @@ -0,0 +1,8 @@ +sreg_t mul1 = sext16(RS1_H(0)); +sreg_t mul2 = sext16(RS2_H(0)); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +sreg_t res = ((mul1 * mul2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_muluN.h b/riscv/insns/p_muluN.h new file mode 100644 index 0000000000..539610f750 --- /dev/null +++ b/riscv/insns/p_muluN.h @@ -0,0 +1,9 @@ +reg_t mul1 = zext16(RS1_H(0)); +reg_t mul2 = zext16(RS2_H(0)); +int norm = insn.p_Luimm5(); + +reg_t res = (mul1 * mul2) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_muluRN.h b/riscv/insns/p_muluRN.h new file mode 100644 index 0000000000..9fd8178615 --- /dev/null +++ b/riscv/insns/p_muluRN.h @@ -0,0 +1,10 @@ +reg_t mul1 = zext16(RS1_H(0)); +reg_t mul2 = zext16(RS2_H(0)); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +reg_t res = ((mul1 * mul2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 8f09b558ed..f48fb16968 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -805,10 +805,6 @@ riscv_insn_ext_pulpmacsi = \ p_msu \ riscv_insn_ext_pulpmulrnhi = \ - p_mulhhs \ - p_mulhhu \ - p_muls \ - p_mulu \ p_mulsN \ p_mulsRN \ p_muluN \ @@ -1046,6 +1042,7 @@ riscv_insn_ext_pulpimg = \ $(riscv_insn_ext_pulpabs) \ $(riscv_insn_ext_pulpslet) \ $(riscv_insn_ext_pulpmacsi) \ + $(riscv_insn_ext_pulpmulrnhi) \ $(riscv_insn_ext_pulpmacrnhi) \ $(riscv_insn_ext_pulpminmax) \ $(riscv_insn_ext_pulpbitrev) \ From 99ff721d48e1ac770743f2de71a65277ddcb3128 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 9 Aug 2022 15:38:46 +0200 Subject: [PATCH 17/23] Implement all instructions in addsubrn subset - Add disassembly format - Implement all instructions - Add it to make This implementation is tested against generated tests. --- disasm/disasm.cc | 18 ++++++++++++++++++ riscv/insns/p_addN.h | 7 +++++++ riscv/insns/p_addNr.h | 7 +++++++ riscv/insns/p_addRN.h | 8 ++++++++ riscv/insns/p_addRNr.h | 8 ++++++++ riscv/insns/p_adduN.h | 9 +++++++++ riscv/insns/p_adduNr.h | 9 +++++++++ riscv/insns/p_adduRN.h | 10 ++++++++++ riscv/insns/p_adduRNr.h | 10 ++++++++++ riscv/insns/p_subN.h | 7 +++++++ riscv/insns/p_subNr.h | 7 +++++++ riscv/insns/p_subRN.h | 8 ++++++++ riscv/insns/p_subRNr.h | 8 ++++++++ riscv/insns/p_subuN.h | 9 +++++++++ riscv/insns/p_subuNr.h | 9 +++++++++ riscv/insns/p_subuRN.h | 10 ++++++++++ riscv/insns/p_subuRNr.h | 10 ++++++++++ riscv/riscv.mk.in | 33 +++++++++++++++++---------------- 18 files changed, 171 insertions(+), 16 deletions(-) create mode 100644 riscv/insns/p_addN.h create mode 100644 riscv/insns/p_addNr.h create mode 100644 riscv/insns/p_addRN.h create mode 100644 riscv/insns/p_addRNr.h create mode 100644 riscv/insns/p_adduN.h create mode 100644 riscv/insns/p_adduNr.h create mode 100644 riscv/insns/p_adduRN.h create mode 100644 riscv/insns/p_adduRNr.h create mode 100644 riscv/insns/p_subN.h create mode 100644 riscv/insns/p_subNr.h create mode 100644 riscv/insns/p_subRN.h create mode 100644 riscv/insns/p_subRNr.h create mode 100644 riscv/insns/p_subuN.h create mode 100644 riscv/insns/p_subuNr.h create mode 100644 riscv/insns/p_subuRN.h create mode 100644 riscv/insns/p_subuRNr.h diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 71b1645e05..4cca69f7e5 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -1444,6 +1444,24 @@ disassembler_t::disassembler_t(int xlen) DEFINE_PR2ITYPE(p_macsRN); DEFINE_PR2ITYPE(p_machhsRN); + // xpulpaddsubrn + DEFINE_PR2ITYPE(p_addN); + DEFINE_PR2ITYPE(p_adduN); + DEFINE_PR2ITYPE(p_addRN); + DEFINE_PR2ITYPE(p_adduRN); + DEFINE_PR2ITYPE(p_subN); + DEFINE_PR2ITYPE(p_subuN); + DEFINE_PR2ITYPE(p_subRN); + DEFINE_PR2ITYPE(p_subuRN); + DEFINE_RTYPE(p_addNr); + DEFINE_RTYPE(p_adduNr); + DEFINE_RTYPE(p_addRNr); + DEFINE_RTYPE(p_adduRNr); + DEFINE_RTYPE(p_subNr); + DEFINE_RTYPE(p_subuNr); + DEFINE_RTYPE(p_subRNr); + DEFINE_RTYPE(p_subuRNr); + // xpulphwloop DISASM_INSN("lp_starti", lp_starti, 0, {&p_loop, &p_uimmL}); DISASM_INSN("lp_endi", lp_endi, 0, {&p_loop, &p_uimmL}); diff --git a/riscv/insns/p_addN.h b/riscv/insns/p_addN.h new file mode 100644 index 0000000000..303363ee9c --- /dev/null +++ b/riscv/insns/p_addN.h @@ -0,0 +1,7 @@ +sreg_t term1 = sext_xlen(RS1); +sreg_t term2 = sext_xlen(RS2); +int norm = insn.p_Luimm5(); + +sreg_t res = (term1 + term2) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_addNr.h b/riscv/insns/p_addNr.h new file mode 100644 index 0000000000..22018dd3d0 --- /dev/null +++ b/riscv/insns/p_addNr.h @@ -0,0 +1,7 @@ +sreg_t term1 = sext_xlen(RD); +sreg_t term2 = sext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] + +sreg_t res = (term1 + term2) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_addRN.h b/riscv/insns/p_addRN.h new file mode 100644 index 0000000000..7fe4082288 --- /dev/null +++ b/riscv/insns/p_addRN.h @@ -0,0 +1,8 @@ +sreg_t term1 = sext_xlen(RS1); +sreg_t term2 = sext_xlen(RS2); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +sreg_t res = ((term1 + term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_addRNr.h b/riscv/insns/p_addRNr.h new file mode 100644 index 0000000000..a41ec1569f --- /dev/null +++ b/riscv/insns/p_addRNr.h @@ -0,0 +1,8 @@ +sreg_t term1 = sext_xlen(RD); +sreg_t term2 = sext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +sreg_t res = ((term1 + term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_adduN.h b/riscv/insns/p_adduN.h new file mode 100644 index 0000000000..d1844e2ebf --- /dev/null +++ b/riscv/insns/p_adduN.h @@ -0,0 +1,9 @@ +reg_t term1 = zext_xlen(RS1); +reg_t term2 = zext_xlen(RS2); +int norm = insn.p_Luimm5(); + +reg_t res = (term1 + term2) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_adduNr.h b/riscv/insns/p_adduNr.h new file mode 100644 index 0000000000..e0205d2571 --- /dev/null +++ b/riscv/insns/p_adduNr.h @@ -0,0 +1,9 @@ +reg_t term1 = zext_xlen(RD); +reg_t term2 = zext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] + +reg_t res = (term1 + term2) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_adduRN.h b/riscv/insns/p_adduRN.h new file mode 100644 index 0000000000..53734b3c98 --- /dev/null +++ b/riscv/insns/p_adduRN.h @@ -0,0 +1,10 @@ +reg_t term1 = zext_xlen(RS1); +reg_t term2 = zext_xlen(RS2); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +reg_t res = ((term1 + term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_adduRNr.h b/riscv/insns/p_adduRNr.h new file mode 100644 index 0000000000..7ab19b3cd3 --- /dev/null +++ b/riscv/insns/p_adduRNr.h @@ -0,0 +1,10 @@ +reg_t term1 = zext_xlen(RD); +reg_t term2 = zext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +reg_t res = ((term1 + term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_subN.h b/riscv/insns/p_subN.h new file mode 100644 index 0000000000..dec2151c3a --- /dev/null +++ b/riscv/insns/p_subN.h @@ -0,0 +1,7 @@ +sreg_t term1 = sext_xlen(RS1); +sreg_t term2 = sext_xlen(RS2); +int norm = insn.p_Luimm5(); + +sreg_t res = (term1 - term2) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_subNr.h b/riscv/insns/p_subNr.h new file mode 100644 index 0000000000..e1db45dfcc --- /dev/null +++ b/riscv/insns/p_subNr.h @@ -0,0 +1,7 @@ +sreg_t term1 = sext_xlen(RD); +sreg_t term2 = sext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] + +sreg_t res = (term1 - term2) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_subRN.h b/riscv/insns/p_subRN.h new file mode 100644 index 0000000000..d30ddb9ac3 --- /dev/null +++ b/riscv/insns/p_subRN.h @@ -0,0 +1,8 @@ +sreg_t term1 = sext_xlen(RS1); +sreg_t term2 = sext_xlen(RS2); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +sreg_t res = ((term1 - term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_subRNr.h b/riscv/insns/p_subRNr.h new file mode 100644 index 0000000000..be5d175f47 --- /dev/null +++ b/riscv/insns/p_subRNr.h @@ -0,0 +1,8 @@ +sreg_t term1 = sext_xlen(RD); +sreg_t term2 = sext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +sreg_t res = ((term1 - term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_subuN.h b/riscv/insns/p_subuN.h new file mode 100644 index 0000000000..3363cfe583 --- /dev/null +++ b/riscv/insns/p_subuN.h @@ -0,0 +1,9 @@ +reg_t term1 = zext_xlen(RS1); +reg_t term2 = zext_xlen(RS2); +int norm = insn.p_Luimm5(); + +reg_t res = (term1 - term2) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_subuNr.h b/riscv/insns/p_subuNr.h new file mode 100644 index 0000000000..074ed6c82b --- /dev/null +++ b/riscv/insns/p_subuNr.h @@ -0,0 +1,9 @@ +reg_t term1 = zext_xlen(RD); +reg_t term2 = zext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] + +reg_t res = (term1 - term2) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_subuRN.h b/riscv/insns/p_subuRN.h new file mode 100644 index 0000000000..656c483886 --- /dev/null +++ b/riscv/insns/p_subuRN.h @@ -0,0 +1,10 @@ +reg_t term1 = zext_xlen(RS1); +reg_t term2 = zext_xlen(RS2); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +reg_t res = ((term1 - term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_subuRNr.h b/riscv/insns/p_subuRNr.h new file mode 100644 index 0000000000..da421a4e08 --- /dev/null +++ b/riscv/insns/p_subuRNr.h @@ -0,0 +1,10 @@ +reg_t term1 = zext_xlen(RD); +reg_t term2 = zext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +reg_t res = ((term1 - term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index f48fb16968..f927cd3e7d 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1012,22 +1012,22 @@ riscv_insn_ext_pulpclip = \ p_clipur \ riscv_insn_ext_pulpaddsubrn = \ - p_addn \ - p_addnr \ - p_addun \ - p_addunr \ - p_addrn \ - p_addrnr \ - p_addurn \ - p_addurnr \ - p_subn \ - p_subnr \ - p_subun \ - p_subunr \ - p_subrn \ - p_subrnr \ - p_suburn \ - p_suburnr \ + p_addN \ + p_adduN \ + p_addRN \ + p_adduRN \ + p_subN \ + p_subuN \ + p_subRN \ + p_subuRN \ + p_addNr \ + p_adduNr \ + p_addRNr \ + p_adduRNr \ + p_subNr \ + p_subuNr \ + p_subRNr \ + p_subuRNr \ riscv_insn_ext_pulpbr = \ p_beqimm \ @@ -1044,6 +1044,7 @@ riscv_insn_ext_pulpimg = \ $(riscv_insn_ext_pulpmacsi) \ $(riscv_insn_ext_pulpmulrnhi) \ $(riscv_insn_ext_pulpmacrnhi) \ + $(riscv_insn_ext_pulpaddsubrn) \ $(riscv_insn_ext_pulpminmax) \ $(riscv_insn_ext_pulpbitrev) \ $(riscv_insn_ext_pulpbitop) \ From 5d8b61a45d63146a7eb06183bc25b9167dcb0693 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 9 Aug 2022 19:01:29 +0200 Subject: [PATCH 18/23] Implement all instr in vectshufflepack subset - Add disassembly format - Implement all instructions - Add it to make This implementation is tested against generated tests. --- disasm/disasm.cc | 12 ++++++++ riscv/insns/pv_pack.h | 7 +++++ riscv/insns/pv_pack_h.h | 7 +++++ riscv/insns/pv_packhi_b.h | 8 +++++ riscv/insns/pv_packlo_b.h | 8 +++++ riscv/insns/pv_shuffle_b.h | 16 ++++++++++ riscv/insns/pv_shuffle_h.h | 11 +++++++ riscv/insns/pv_shuffle_sci_h.h | 11 +++++++ riscv/insns/pv_shufflei0_sci_b.h | 15 +++++++++ riscv/insns/pv_shufflei1_sci_b.h | 15 +++++++++ riscv/insns/pv_shufflei2_sci_b.h | 15 +++++++++ riscv/insns/pv_shufflei3_sci_b.h | 15 +++++++++ riscv/riscv.mk.in | 52 +++++++++++--------------------- 13 files changed, 158 insertions(+), 34 deletions(-) create mode 100644 riscv/insns/pv_pack.h create mode 100644 riscv/insns/pv_pack_h.h create mode 100644 riscv/insns/pv_packhi_b.h create mode 100644 riscv/insns/pv_packlo_b.h create mode 100644 riscv/insns/pv_shuffle_b.h create mode 100644 riscv/insns/pv_shuffle_h.h create mode 100644 riscv/insns/pv_shuffle_sci_h.h create mode 100644 riscv/insns/pv_shufflei0_sci_b.h create mode 100644 riscv/insns/pv_shufflei1_sci_b.h create mode 100644 riscv/insns/pv_shufflei2_sci_b.h create mode 100644 riscv/insns/pv_shufflei3_sci_b.h diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 4cca69f7e5..83282c4393 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -1600,8 +1600,20 @@ disassembler_t::disassembler_t(int xlen) DEFINE_RTYPE(pv_sdotsp_sc_b); DEFINE_PI1ZTYPE(pv_sdotsp_sci_b); + // xpulpvectshufflepack + DEFINE_RTYPE(pv_shuffle_h); + DEFINE_PI1ZTYPE(pv_shuffle_sci_h); + DEFINE_RTYPE(pv_shuffle_b); + DEFINE_PI1ZTYPE(pv_shufflei0_sci_b); + DEFINE_PI1ZTYPE(pv_shufflei1_sci_b); + DEFINE_PI1ZTYPE(pv_shufflei2_sci_b); + DEFINE_PI1ZTYPE(pv_shufflei3_sci_b); DEFINE_RTYPE(pv_shuffle2_h); DEFINE_RTYPE(pv_shuffle2_b); + DEFINE_RTYPE(pv_pack); + DEFINE_RTYPE(pv_pack_h); + DEFINE_RTYPE(pv_packhi_b); + DEFINE_RTYPE(pv_packlo_b); // provide a default disassembly for all instructions as a fallback #define DECLARE_INSN(code, match, mask) \ diff --git a/riscv/insns/pv_pack.h b/riscv/insns/pv_pack.h new file mode 100644 index 0000000000..a7c9b69e5b --- /dev/null +++ b/riscv/insns/pv_pack.h @@ -0,0 +1,7 @@ +reg_t src1 = RS1_H(0); +reg_t src2 = RS2_H(0); + +reg_t res = (src1 << 16) | src2; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_pack_h.h b/riscv/insns/pv_pack_h.h new file mode 100644 index 0000000000..7ad94faa9d --- /dev/null +++ b/riscv/insns/pv_pack_h.h @@ -0,0 +1,7 @@ +reg_t src1 = RS1_H(1); +reg_t src2 = RS2_H(1); + +reg_t res = (src1 << 16) | src2; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_packhi_b.h b/riscv/insns/pv_packhi_b.h new file mode 100644 index 0000000000..b8a7a46c8d --- /dev/null +++ b/riscv/insns/pv_packhi_b.h @@ -0,0 +1,8 @@ +reg_t src1 = RS1_B(0); +reg_t src2 = RS2_B(0); +reg_t res = RD & 0x0FFFF; + +res |= (src1 << 24) | (src2 << 16); + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_packlo_b.h b/riscv/insns/pv_packlo_b.h new file mode 100644 index 0000000000..4606fe1f01 --- /dev/null +++ b/riscv/insns/pv_packlo_b.h @@ -0,0 +1,8 @@ +reg_t src1 = RS1_B(0); +reg_t src2 = RS2_B(0); +reg_t res = RD & 0xFFFF0000; + +res |= (src1 << 8) | src2; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_shuffle_b.h b/riscv/insns/pv_shuffle_b.h new file mode 100644 index 0000000000..efd7790c81 --- /dev/null +++ b/riscv/insns/pv_shuffle_b.h @@ -0,0 +1,16 @@ +reg_t selector = RS2; + +reg_t sel3 = (selector >> 24)%4; +reg_t sel2 = (selector >> 16)%4; +reg_t sel1 = (selector >> 8)%4; +reg_t sel0 = selector%4; + +reg_t byte3 = RS1_B(sel3); +reg_t byte2 = RS1_B(sel2); +reg_t byte1 = RS1_B(sel1); +reg_t byte0 = RS1_B(sel0); + +reg_t res = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_shuffle_h.h b/riscv/insns/pv_shuffle_h.h new file mode 100644 index 0000000000..3920c9f619 --- /dev/null +++ b/riscv/insns/pv_shuffle_h.h @@ -0,0 +1,11 @@ +reg_t selector = RS2; + +reg_t h_sel = (selector >> 16)%2; +reg_t l_sel = selector%2; +reg_t hhalf = RS1_H(h_sel); +reg_t lhalf = RS1_H(l_sel); + +reg_t res = (hhalf << 16) | lhalf; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_shuffle_sci_h.h b/riscv/insns/pv_shuffle_sci_h.h new file mode 100644 index 0000000000..2358492738 --- /dev/null +++ b/riscv/insns/pv_shuffle_sci_h.h @@ -0,0 +1,11 @@ +reg_t selector = insn.p_zimm6(); + +reg_t h_sel = (selector >> 1)%2; +reg_t l_sel = selector%2; +reg_t hhalf = RS1_H(h_sel); +reg_t lhalf = RS1_H(l_sel); + +reg_t res = (hhalf << 16) | lhalf; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_shufflei0_sci_b.h b/riscv/insns/pv_shufflei0_sci_b.h new file mode 100644 index 0000000000..a1524c8d46 --- /dev/null +++ b/riscv/insns/pv_shufflei0_sci_b.h @@ -0,0 +1,15 @@ +reg_t selector = insn.p_zimm6();; + +reg_t sel2 = (selector >> 4)%4; +reg_t sel1 = (selector >> 2)%4; +reg_t sel0 = selector%4; + +reg_t byte3 = RS1_B(0); +reg_t byte2 = RS1_B(sel2); +reg_t byte1 = RS1_B(sel1); +reg_t byte0 = RS1_B(sel0); + +reg_t res = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_shufflei1_sci_b.h b/riscv/insns/pv_shufflei1_sci_b.h new file mode 100644 index 0000000000..5ccb2c7549 --- /dev/null +++ b/riscv/insns/pv_shufflei1_sci_b.h @@ -0,0 +1,15 @@ +reg_t selector = insn.p_zimm6();; + +reg_t sel2 = (selector >> 4)%4; +reg_t sel1 = (selector >> 2)%4; +reg_t sel0 = selector%4; + +reg_t byte3 = RS1_B(1); +reg_t byte2 = RS1_B(sel2); +reg_t byte1 = RS1_B(sel1); +reg_t byte0 = RS1_B(sel0); + +reg_t res = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_shufflei2_sci_b.h b/riscv/insns/pv_shufflei2_sci_b.h new file mode 100644 index 0000000000..a84166929d --- /dev/null +++ b/riscv/insns/pv_shufflei2_sci_b.h @@ -0,0 +1,15 @@ +reg_t selector = insn.p_zimm6();; + +reg_t sel2 = (selector >> 4)%4; +reg_t sel1 = (selector >> 2)%4; +reg_t sel0 = selector%4; + +reg_t byte3 = RS1_B(2); +reg_t byte2 = RS1_B(sel2); +reg_t byte1 = RS1_B(sel1); +reg_t byte0 = RS1_B(sel0); + +reg_t res = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_shufflei3_sci_b.h b/riscv/insns/pv_shufflei3_sci_b.h new file mode 100644 index 0000000000..89c1d91132 --- /dev/null +++ b/riscv/insns/pv_shufflei3_sci_b.h @@ -0,0 +1,15 @@ +reg_t selector = insn.p_zimm6();; + +reg_t sel2 = (selector >> 4)%4; +reg_t sel1 = (selector >> 2)%4; +reg_t sel0 = selector%4; + +reg_t byte3 = RS1_B(3); +reg_t byte2 = RS1_B(sel2); +reg_t byte1 = RS1_B(sel1); +reg_t byte0 = RS1_B(sel0); + +reg_t res = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index f927cd3e7d..c4bc8cb4ce 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -991,19 +991,19 @@ riscv_insn_ext_pulpvect = \ pv_sdotsp_sci_b \ riscv_insn_ext_pulpvectshufflepack = \ - pv_shuffle.h \ - pv_shuffle.sci.h \ - pv_shuffle.b \ - pv_shufflei0.sci.b \ - pv_shufflei1.sci.b \ - pv_shufflei2.sci.b \ - pv_shufflei3.sci.b \ - pv_shuffle2.h \ - pv_shuffle2.b \ + pv_shuffle_h \ + pv_shuffle_sci_h \ + pv_shuffle_b \ + pv_shufflei0_sci_b \ + pv_shufflei1_sci_b \ + pv_shufflei2_sci_b \ + pv_shufflei3_sci_b \ + pv_shuffle2_h \ + pv_shuffle2_b \ pv_pack \ - pv_pack.h \ - pv_packhi.b \ - pv_packlo.b \ + pv_pack_h \ + pv_packhi_b \ + pv_packlo_b \ riscv_insn_ext_pulpclip = \ p_clip \ @@ -1037,38 +1037,22 @@ riscv_insn_ext_pulpbitrev = \ p_bitrev \ riscv_insn_ext_pulpimg = \ - $(riscv_insn_ext_pulppostmod) \ $(riscv_insn_ext_pulphwloop) \ + $(riscv_insn_ext_pulppostmod) \ $(riscv_insn_ext_pulpabs) \ $(riscv_insn_ext_pulpslet) \ $(riscv_insn_ext_pulpmacsi) \ $(riscv_insn_ext_pulpmulrnhi) \ $(riscv_insn_ext_pulpmacrnhi) \ - $(riscv_insn_ext_pulpaddsubrn) \ $(riscv_insn_ext_pulpminmax) \ - $(riscv_insn_ext_pulpbitrev) \ $(riscv_insn_ext_pulpbitop) \ $(riscv_insn_ext_pulpvect) \ + $(riscv_insn_ext_pulpvectshufflepack) \ $(riscv_insn_ext_pulpclip) \ - $(riscv_insn_ext_pulpbr) - -# riscv_insn_ext_pulpimg = \ -# $(riscv_insn_ext_pulphwloop) \ -# $(riscv_insn_ext_pulppostmod) \ -# $(riscv_insn_ext_pulpabs) \ -# $(riscv_insn_ext_pulpslet) \ -# $(riscv_insn_ext_pulpmacsi) \ -# $(riscv_insn_ext_pulpmulrnhi) \ -# $(riscv_insn_ext_pulpmacrnhi) \ -# $(riscv_insn_ext_pulppartmac) \ -# $(riscv_insn_ext_pulpminmax) \ -# $(riscv_insn_ext_pulpbitop) \ -# $(riscv_insn_ext_pulpvect) \ -# $(riscv_insn_ext_pulpvectshufflepack) \ -# $(riscv_insn_ext_pulpclip) \ -# $(riscv_insn_ext_pulpaddsubrn) \ -# $(riscv_insn_ext_pulpbr) \ -# $(riscv_insn_ext_pulpbitrev) \ + $(riscv_insn_ext_pulpaddsubrn) \ + $(riscv_insn_ext_pulpbr) \ + $(riscv_insn_ext_pulpbitrev) \ +# $(riscv_insn_ext_pulppartmac) \ riscv_insn_ext_h = \ From 44345c4adf326b81be7fc1f124962e945ceb33ab Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Mon, 15 Aug 2022 12:59:07 +0200 Subject: [PATCH 19/23] Fix disassembly of Luimm5/luimm5 ops --- disasm/disasm.cc | 69 ++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 83282c4393..7dd2cd250a 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -373,6 +373,11 @@ struct : public arg_t { } } p_loop; +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_Luimm5()); + } +} p_Luimm5; struct : public arg_t { std::string to_string(insn_t insn) const { @@ -511,9 +516,9 @@ disassembler_t::disassembler_t(int xlen) #define DEFINE_PI0TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm5}) #define DEFINE_PI1ZTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm6}) #define DEFINE_PI1STYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm6}) - #define DEFINE_PI2TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm5, &p_zimm5}) + #define DEFINE_PLUI2TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_Luimm5, &p_zimm5}) #define DEFINE_PBTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm5, &branch_target}) - #define DEFINE_PR2ITYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &xrs2, &p_simm5}) + #define DEFINE_PR2LUITYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &xrs2, &p_Luimm5}) DEFINE_XLOAD(lb) DEFINE_XLOAD(lbu) @@ -1400,15 +1405,15 @@ disassembler_t::disassembler_t(int xlen) DEFINE_RTYPE(p_msu); // xpulpbitop - DEFINE_PI2TYPE(p_extract); + DEFINE_PLUI2TYPE(p_extract); DEFINE_RTYPE(p_extractr); - DEFINE_PI2TYPE(p_extractu); + DEFINE_PLUI2TYPE(p_extractu); DEFINE_RTYPE(p_extractur); - DEFINE_PI2TYPE(p_insert); + DEFINE_PLUI2TYPE(p_insert); DEFINE_RTYPE(p_insertr); - DEFINE_PI2TYPE(p_bset); + DEFINE_PLUI2TYPE(p_bset); DEFINE_RTYPE(p_bsetr); - DEFINE_PI2TYPE(p_bclr); + DEFINE_PLUI2TYPE(p_bclr); DEFINE_RTYPE(p_bclrr); // xpulpbitopsmall (subset of xpulpbitop) DEFINE_R1TYPE(p_exths); @@ -1422,37 +1427,37 @@ disassembler_t::disassembler_t(int xlen) DEFINE_R1TYPE(p_cnt); // xpulpbitrev - DEFINE_PI2TYPE(p_bitrev); + DEFINE_PLUI2TYPE(p_bitrev); // xpulpmulrnhi - DEFINE_PR2ITYPE(p_muluN); - DEFINE_PR2ITYPE(p_mulhhuN); - DEFINE_PR2ITYPE(p_mulsN); - DEFINE_PR2ITYPE(p_mulhhsN); - DEFINE_PR2ITYPE(p_muluRN); - DEFINE_PR2ITYPE(p_mulhhuRN); - DEFINE_PR2ITYPE(p_mulsRN); - DEFINE_PR2ITYPE(p_mulhhsRN); + DEFINE_PR2LUITYPE(p_muluN); + DEFINE_PR2LUITYPE(p_mulhhuN); + DEFINE_PR2LUITYPE(p_mulsN); + DEFINE_PR2LUITYPE(p_mulhhsN); + DEFINE_PR2LUITYPE(p_muluRN); + DEFINE_PR2LUITYPE(p_mulhhuRN); + DEFINE_PR2LUITYPE(p_mulsRN); + DEFINE_PR2LUITYPE(p_mulhhsRN); // xpulpmacrnhi - DEFINE_PR2ITYPE(p_macuN); - DEFINE_PR2ITYPE(p_machhuN); - DEFINE_PR2ITYPE(p_macsN); - DEFINE_PR2ITYPE(p_machhsN); - DEFINE_PR2ITYPE(p_macuRN); - DEFINE_PR2ITYPE(p_machhuRN); - DEFINE_PR2ITYPE(p_macsRN); - DEFINE_PR2ITYPE(p_machhsRN); + DEFINE_PR2LUITYPE(p_macuN); + DEFINE_PR2LUITYPE(p_machhuN); + DEFINE_PR2LUITYPE(p_macsN); + DEFINE_PR2LUITYPE(p_machhsN); + DEFINE_PR2LUITYPE(p_macuRN); + DEFINE_PR2LUITYPE(p_machhuRN); + DEFINE_PR2LUITYPE(p_macsRN); + DEFINE_PR2LUITYPE(p_machhsRN); // xpulpaddsubrn - DEFINE_PR2ITYPE(p_addN); - DEFINE_PR2ITYPE(p_adduN); - DEFINE_PR2ITYPE(p_addRN); - DEFINE_PR2ITYPE(p_adduRN); - DEFINE_PR2ITYPE(p_subN); - DEFINE_PR2ITYPE(p_subuN); - DEFINE_PR2ITYPE(p_subRN); - DEFINE_PR2ITYPE(p_subuRN); + DEFINE_PR2LUITYPE(p_addN); + DEFINE_PR2LUITYPE(p_adduN); + DEFINE_PR2LUITYPE(p_addRN); + DEFINE_PR2LUITYPE(p_adduRN); + DEFINE_PR2LUITYPE(p_subN); + DEFINE_PR2LUITYPE(p_subuN); + DEFINE_PR2LUITYPE(p_subRN); + DEFINE_PR2LUITYPE(p_subuRN); DEFINE_RTYPE(p_addNr); DEFINE_RTYPE(p_adduNr); DEFINE_RTYPE(p_addRNr); From c75a8c332312efca1b3797f4f8cd05692dd1174f Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Fri, 2 Sep 2022 11:25:59 +0200 Subject: [PATCH 20/23] Implement remaining instr in vect subset - Added union for easier SIMD handling - Add disassembly format - Implement remaining instructions (all compare instrs, add_div{2,4,8} and sub_div{2,4,8} - Add remaining instrs to make This implementation is tested against generated tests. --- disasm/disasm.cc | 93 ++++++++++++++++++++++++++++++----- riscv/decode.h | 14 ++++++ riscv/insns/pv_add_h_div2.h | 18 +++++++ riscv/insns/pv_add_h_div4.h | 18 +++++++ riscv/insns/pv_add_h_div8.h | 18 +++++++ riscv/insns/pv_cmpeq_b.h | 12 +++++ riscv/insns/pv_cmpeq_h.h | 13 +++++ riscv/insns/pv_cmpeq_sc_b.h | 13 +++++ riscv/insns/pv_cmpeq_sc_h.h | 13 +++++ riscv/insns/pv_cmpeq_sci_b.h | 13 +++++ riscv/insns/pv_cmpeq_sci_h.h | 13 +++++ riscv/insns/pv_cmpge_b.h | 12 +++++ riscv/insns/pv_cmpge_h.h | 13 +++++ riscv/insns/pv_cmpge_sc_b.h | 13 +++++ riscv/insns/pv_cmpge_sc_h.h | 13 +++++ riscv/insns/pv_cmpge_sci_b.h | 13 +++++ riscv/insns/pv_cmpge_sci_h.h | 13 +++++ riscv/insns/pv_cmpgeu_b.h | 12 +++++ riscv/insns/pv_cmpgeu_h.h | 13 +++++ riscv/insns/pv_cmpgeu_sc_b.h | 13 +++++ riscv/insns/pv_cmpgeu_sc_h.h | 13 +++++ riscv/insns/pv_cmpgeu_sci_b.h | 13 +++++ riscv/insns/pv_cmpgeu_sci_h.h | 13 +++++ riscv/insns/pv_cmpgt_b.h | 12 +++++ riscv/insns/pv_cmpgt_h.h | 13 +++++ riscv/insns/pv_cmpgt_sc_b.h | 13 +++++ riscv/insns/pv_cmpgt_sc_h.h | 13 +++++ riscv/insns/pv_cmpgt_sci_b.h | 13 +++++ riscv/insns/pv_cmpgt_sci_h.h | 13 +++++ riscv/insns/pv_cmpgtu_b.h | 12 +++++ riscv/insns/pv_cmpgtu_h.h | 13 +++++ riscv/insns/pv_cmpgtu_sc_b.h | 13 +++++ riscv/insns/pv_cmpgtu_sc_h.h | 13 +++++ riscv/insns/pv_cmpgtu_sci_b.h | 13 +++++ riscv/insns/pv_cmpgtu_sci_h.h | 13 +++++ riscv/insns/pv_cmple_b.h | 12 +++++ riscv/insns/pv_cmple_h.h | 13 +++++ riscv/insns/pv_cmple_sc_b.h | 13 +++++ riscv/insns/pv_cmple_sc_h.h | 13 +++++ riscv/insns/pv_cmple_sci_b.h | 13 +++++ riscv/insns/pv_cmple_sci_h.h | 13 +++++ riscv/insns/pv_cmpleu_b.h | 12 +++++ riscv/insns/pv_cmpleu_h.h | 13 +++++ riscv/insns/pv_cmpleu_sc_b.h | 13 +++++ riscv/insns/pv_cmpleu_sc_h.h | 13 +++++ riscv/insns/pv_cmpleu_sci_b.h | 13 +++++ riscv/insns/pv_cmpleu_sci_h.h | 13 +++++ riscv/insns/pv_cmplt_b.h | 12 +++++ riscv/insns/pv_cmplt_h.h | 13 +++++ riscv/insns/pv_cmplt_sc_b.h | 13 +++++ riscv/insns/pv_cmplt_sc_h.h | 13 +++++ riscv/insns/pv_cmplt_sci_b.h | 13 +++++ riscv/insns/pv_cmplt_sci_h.h | 13 +++++ riscv/insns/pv_cmpltu_b.h | 12 +++++ riscv/insns/pv_cmpltu_h.h | 13 +++++ riscv/insns/pv_cmpltu_sc_b.h | 13 +++++ riscv/insns/pv_cmpltu_sc_h.h | 13 +++++ riscv/insns/pv_cmpltu_sci_b.h | 13 +++++ riscv/insns/pv_cmpltu_sci_h.h | 13 +++++ riscv/insns/pv_cmpne_b.h | 12 +++++ riscv/insns/pv_cmpne_h.h | 13 +++++ riscv/insns/pv_cmpne_sc_b.h | 13 +++++ riscv/insns/pv_cmpne_sc_h.h | 13 +++++ riscv/insns/pv_cmpne_sci_b.h | 11 +++++ riscv/insns/pv_cmpne_sci_h.h | 13 +++++ riscv/insns/pv_sub_h_div2.h | 18 +++++++ riscv/insns/pv_sub_h_div4.h | 18 +++++++ riscv/insns/pv_sub_h_div8.h | 18 +++++++ riscv/riscv.mk.in | 66 +++++++++++++++++++++++++ 69 files changed, 1038 insertions(+), 11 deletions(-) create mode 100644 riscv/insns/pv_add_h_div2.h create mode 100644 riscv/insns/pv_add_h_div4.h create mode 100644 riscv/insns/pv_add_h_div8.h create mode 100644 riscv/insns/pv_cmpeq_b.h create mode 100644 riscv/insns/pv_cmpeq_h.h create mode 100644 riscv/insns/pv_cmpeq_sc_b.h create mode 100644 riscv/insns/pv_cmpeq_sc_h.h create mode 100644 riscv/insns/pv_cmpeq_sci_b.h create mode 100644 riscv/insns/pv_cmpeq_sci_h.h create mode 100644 riscv/insns/pv_cmpge_b.h create mode 100644 riscv/insns/pv_cmpge_h.h create mode 100644 riscv/insns/pv_cmpge_sc_b.h create mode 100644 riscv/insns/pv_cmpge_sc_h.h create mode 100644 riscv/insns/pv_cmpge_sci_b.h create mode 100644 riscv/insns/pv_cmpge_sci_h.h create mode 100644 riscv/insns/pv_cmpgeu_b.h create mode 100644 riscv/insns/pv_cmpgeu_h.h create mode 100644 riscv/insns/pv_cmpgeu_sc_b.h create mode 100644 riscv/insns/pv_cmpgeu_sc_h.h create mode 100644 riscv/insns/pv_cmpgeu_sci_b.h create mode 100644 riscv/insns/pv_cmpgeu_sci_h.h create mode 100644 riscv/insns/pv_cmpgt_b.h create mode 100644 riscv/insns/pv_cmpgt_h.h create mode 100644 riscv/insns/pv_cmpgt_sc_b.h create mode 100644 riscv/insns/pv_cmpgt_sc_h.h create mode 100644 riscv/insns/pv_cmpgt_sci_b.h create mode 100644 riscv/insns/pv_cmpgt_sci_h.h create mode 100644 riscv/insns/pv_cmpgtu_b.h create mode 100644 riscv/insns/pv_cmpgtu_h.h create mode 100644 riscv/insns/pv_cmpgtu_sc_b.h create mode 100644 riscv/insns/pv_cmpgtu_sc_h.h create mode 100644 riscv/insns/pv_cmpgtu_sci_b.h create mode 100644 riscv/insns/pv_cmpgtu_sci_h.h create mode 100644 riscv/insns/pv_cmple_b.h create mode 100644 riscv/insns/pv_cmple_h.h create mode 100644 riscv/insns/pv_cmple_sc_b.h create mode 100644 riscv/insns/pv_cmple_sc_h.h create mode 100644 riscv/insns/pv_cmple_sci_b.h create mode 100644 riscv/insns/pv_cmple_sci_h.h create mode 100644 riscv/insns/pv_cmpleu_b.h create mode 100644 riscv/insns/pv_cmpleu_h.h create mode 100644 riscv/insns/pv_cmpleu_sc_b.h create mode 100644 riscv/insns/pv_cmpleu_sc_h.h create mode 100644 riscv/insns/pv_cmpleu_sci_b.h create mode 100644 riscv/insns/pv_cmpleu_sci_h.h create mode 100644 riscv/insns/pv_cmplt_b.h create mode 100644 riscv/insns/pv_cmplt_h.h create mode 100644 riscv/insns/pv_cmplt_sc_b.h create mode 100644 riscv/insns/pv_cmplt_sc_h.h create mode 100644 riscv/insns/pv_cmplt_sci_b.h create mode 100644 riscv/insns/pv_cmplt_sci_h.h create mode 100644 riscv/insns/pv_cmpltu_b.h create mode 100644 riscv/insns/pv_cmpltu_h.h create mode 100644 riscv/insns/pv_cmpltu_sc_b.h create mode 100644 riscv/insns/pv_cmpltu_sc_h.h create mode 100644 riscv/insns/pv_cmpltu_sci_b.h create mode 100644 riscv/insns/pv_cmpltu_sci_h.h create mode 100644 riscv/insns/pv_cmpne_b.h create mode 100644 riscv/insns/pv_cmpne_h.h create mode 100644 riscv/insns/pv_cmpne_sc_b.h create mode 100644 riscv/insns/pv_cmpne_sc_h.h create mode 100644 riscv/insns/pv_cmpne_sci_b.h create mode 100644 riscv/insns/pv_cmpne_sci_h.h create mode 100644 riscv/insns/pv_sub_h_div2.h create mode 100644 riscv/insns/pv_sub_h_div4.h create mode 100644 riscv/insns/pv_sub_h_div8.h diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 7dd2cd250a..094b547bc2 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -1475,18 +1475,25 @@ disassembler_t::disassembler_t(int xlen) DISASM_INSN("lp_setup", lp_setup, 0, {&p_loop, &xrs1, &p_uimmL}); DISASM_INSN("lp_setupi", lp_setupi, 0, {&p_loop, &p_uimmL, &p_uimmS}); + // xpulpvect DEFINE_RTYPE(pv_add_h); DEFINE_RTYPE(pv_add_sc_h); DEFINE_PI1STYPE(pv_add_sci_h); DEFINE_RTYPE(pv_add_b); DEFINE_RTYPE(pv_add_sc_b); DEFINE_PI1STYPE(pv_add_sci_b); + DEFINE_RTYPE(pv_add_h_div2); + DEFINE_RTYPE(pv_add_h_div4); + DEFINE_RTYPE(pv_add_h_div8); DEFINE_RTYPE(pv_sub_h); DEFINE_RTYPE(pv_sub_sc_h); DEFINE_PI1STYPE(pv_sub_sci_h); DEFINE_RTYPE(pv_sub_b); DEFINE_RTYPE(pv_sub_sc_b); DEFINE_PI1STYPE(pv_sub_sci_b); + DEFINE_RTYPE(pv_sub_h_div2); + DEFINE_RTYPE(pv_sub_h_div2); + DEFINE_RTYPE(pv_sub_h_div2); DEFINE_RTYPE(pv_avg_h); DEFINE_RTYPE(pv_avg_sc_h); DEFINE_PI1STYPE(pv_avg_sci_h); @@ -1499,6 +1506,7 @@ disassembler_t::disassembler_t(int xlen) DEFINE_RTYPE(pv_avgu_b); DEFINE_RTYPE(pv_avgu_sc_b); DEFINE_PI1ZTYPE(pv_avgu_sci_b); + DEFINE_RTYPE(pv_min_h); DEFINE_RTYPE(pv_min_sc_h); DEFINE_PI1STYPE(pv_min_sci_h); @@ -1523,6 +1531,10 @@ disassembler_t::disassembler_t(int xlen) DEFINE_RTYPE(pv_maxu_b); DEFINE_RTYPE(pv_maxu_sc_b); DEFINE_PI1ZTYPE(pv_maxu_sci_b); + DEFINE_PI1ZTYPE(pv_and_sci_b); + DEFINE_R1TYPE(pv_abs_h); + DEFINE_R1TYPE(pv_abs_b); + DEFINE_RTYPE(pv_srl_h); DEFINE_RTYPE(pv_srl_sc_h); DEFINE_PI1ZTYPE(pv_srl_sci_h); @@ -1558,9 +1570,7 @@ disassembler_t::disassembler_t(int xlen) DEFINE_PI1ZTYPE(pv_and_sci_h); DEFINE_RTYPE(pv_and_b); DEFINE_RTYPE(pv_and_sc_b); - DEFINE_PI1ZTYPE(pv_and_sci_b); - DEFINE_R1TYPE(pv_abs_h); - DEFINE_R1TYPE(pv_abs_b); + DEFINE_PI1ZTYPE(pv_extract_h); DEFINE_PI1ZTYPE(pv_extract_b); DEFINE_PI1ZTYPE(pv_extractu_h); @@ -1576,16 +1586,16 @@ disassembler_t::disassembler_t(int xlen) DEFINE_PI1ZTYPE(pv_dotup_sci_b); DEFINE_RTYPE(pv_dotusp_h); DEFINE_RTYPE(pv_dotusp_sc_h); - DEFINE_PI1ZTYPE(pv_dotusp_sci_h); + DEFINE_PI1STYPE(pv_dotusp_sci_h); DEFINE_RTYPE(pv_dotusp_b); DEFINE_RTYPE(pv_dotusp_sc_b); - DEFINE_PI1ZTYPE(pv_dotusp_sci_b); + DEFINE_PI1STYPE(pv_dotusp_sci_b); DEFINE_RTYPE(pv_dotsp_h); DEFINE_RTYPE(pv_dotsp_sc_h); - DEFINE_PI1ZTYPE(pv_dotsp_sci_h); + DEFINE_PI1STYPE(pv_dotsp_sci_h); DEFINE_RTYPE(pv_dotsp_b); DEFINE_RTYPE(pv_dotsp_sc_b); - DEFINE_PI1ZTYPE(pv_dotsp_sci_b); + DEFINE_PI1STYPE(pv_dotsp_sci_b); DEFINE_RTYPE(pv_sdotup_h); DEFINE_RTYPE(pv_sdotup_sc_h); DEFINE_PI1ZTYPE(pv_sdotup_sci_h); @@ -1594,16 +1604,77 @@ disassembler_t::disassembler_t(int xlen) DEFINE_PI1ZTYPE(pv_sdotup_sci_b); DEFINE_RTYPE(pv_sdotusp_h); DEFINE_RTYPE(pv_sdotusp_sc_h); - DEFINE_PI1ZTYPE(pv_sdotusp_sci_h); + DEFINE_PI1STYPE(pv_sdotusp_sci_h); DEFINE_RTYPE(pv_sdotusp_b); DEFINE_RTYPE(pv_sdotusp_sc_b); - DEFINE_PI1ZTYPE(pv_sdotusp_sci_b); + DEFINE_PI1STYPE(pv_sdotusp_sci_b); DEFINE_RTYPE(pv_sdotsp_h); DEFINE_RTYPE(pv_sdotsp_sc_h); - DEFINE_PI1ZTYPE(pv_sdotsp_sci_h); + DEFINE_PI1STYPE(pv_sdotsp_sci_h); DEFINE_RTYPE(pv_sdotsp_b); DEFINE_RTYPE(pv_sdotsp_sc_b); - DEFINE_PI1ZTYPE(pv_sdotsp_sci_b); + DEFINE_PI1STYPE(pv_sdotsp_sci_b); + + DEFINE_RTYPE(pv_cmpeq_h); + DEFINE_RTYPE(pv_cmpeq_sc_h); + DEFINE_PI1ZTYPE(pv_cmpeq_sci_h); + DEFINE_RTYPE(pv_cmpeq_b); + DEFINE_RTYPE(pv_cmpeq_sc_b); + DEFINE_PI1ZTYPE(pv_cmpeq_sci_b); + DEFINE_RTYPE(pv_cmpne_h); + DEFINE_RTYPE(pv_cmpne_sc_h); + DEFINE_PI1ZTYPE(pv_cmpne_sci_h); + DEFINE_RTYPE(pv_cmpne_b); + DEFINE_RTYPE(pv_cmpne_sc_b); + DEFINE_PI1ZTYPE(pv_cmpne_sci_b); + DEFINE_RTYPE(pv_cmpgt_h); + DEFINE_RTYPE(pv_cmpgt_sc_h); + DEFINE_PI1STYPE(pv_cmpgt_sci_h); + DEFINE_RTYPE(pv_cmpgt_b); + DEFINE_RTYPE(pv_cmpgt_sc_b); + DEFINE_PI1STYPE(pv_cmpgt_sci_b); + DEFINE_RTYPE(pv_cmpge_h); + DEFINE_RTYPE(pv_cmpge_sc_h); + DEFINE_PI1STYPE(pv_cmpge_sci_h); + DEFINE_RTYPE(pv_cmpge_b); + DEFINE_RTYPE(pv_cmpge_sc_b); + DEFINE_PI1STYPE(pv_cmpge_sci_b); + DEFINE_RTYPE(pv_cmplt_h); + DEFINE_RTYPE(pv_cmplt_sc_h); + DEFINE_PI1STYPE(pv_cmplt_sci_h); + DEFINE_RTYPE(pv_cmplt_b); + DEFINE_RTYPE(pv_cmplt_sc_b); + DEFINE_PI1STYPE(pv_cmplt_sci_b); + DEFINE_RTYPE(pv_cmple_h); + DEFINE_RTYPE(pv_cmple_sc_h); + DEFINE_PI1STYPE(pv_cmple_sci_h); + DEFINE_RTYPE(pv_cmple_b); + DEFINE_RTYPE(pv_cmple_sc_b); + DEFINE_PI1STYPE(pv_cmple_sci_b); + DEFINE_RTYPE(pv_cmpgtu_h); + DEFINE_RTYPE(pv_cmpgtu_sc_h); + DEFINE_PI1ZTYPE(pv_cmpgtu_sci_h); + DEFINE_RTYPE(pv_cmpgtu_b); + DEFINE_RTYPE(pv_cmpgtu_sc_b); + DEFINE_PI1ZTYPE(pv_cmpgtu_sci_b); + DEFINE_RTYPE(pv_cmpgeu_h); + DEFINE_RTYPE(pv_cmpgeu_sc_h); + DEFINE_PI1ZTYPE(pv_cmpgeu_sci_h); + DEFINE_RTYPE(pv_cmpgeu_b); + DEFINE_RTYPE(pv_cmpgeu_sc_b); + DEFINE_PI1ZTYPE(pv_cmpgeu_sci_b); + DEFINE_RTYPE(pv_cmpltu_h); + DEFINE_RTYPE(pv_cmpltu_sc_h); + DEFINE_PI1ZTYPE(pv_cmpltu_sci_h); + DEFINE_RTYPE(pv_cmpltu_b); + DEFINE_RTYPE(pv_cmpltu_sc_b); + DEFINE_PI1ZTYPE(pv_cmpltu_sci_b); + DEFINE_RTYPE(pv_cmpleu_h); + DEFINE_RTYPE(pv_cmpleu_sc_h); + DEFINE_PI1ZTYPE(pv_cmpleu_sci_h); + DEFINE_RTYPE(pv_cmpleu_b); + DEFINE_RTYPE(pv_cmpleu_sc_b); + DEFINE_PI1ZTYPE(pv_cmpleu_sci_b); // xpulpvectshufflepack DEFINE_RTYPE(pv_shuffle_h); diff --git a/riscv/decode.h b/riscv/decode.h index 42100503a5..3bb74996b4 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -397,6 +397,20 @@ inline float to_f(float32_t f){float r; memcpy(&r, &f, sizeof(r)); return r;} inline double to_f(float64_t f){double r; memcpy(&r, &f, sizeof(r)); return r;} inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r)); return r;} + +// Interpret register as packed SIMD +union simd_reg { + reg_t reg; + sreg_t sreg; + // halfwords (signed and unsigned) + int16_t h[4]; + uint16_t hu[4]; + // bytes (signed and unsigned) + int8_t b[8]; + uint8_t bu[8]; +}; + + // Vector macros #define e8 8 // 8b elements #define e16 16 // 16b elements diff --git a/riscv/insns/pv_add_h_div2.h b/riscv/insns/pv_add_h_div2.h new file mode 100644 index 0000000000..d09a9e41f5 --- /dev/null +++ b/riscv/insns/pv_add_h_div2.h @@ -0,0 +1,18 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +int16_t temp; + +for(int i=0; i<(64/e16); i++) +{ + // (src1.h[i] + src2.h[i]) >> 1 doesn't work as shift + // will be performed in int32 (using overflows from add) + temp = src1.h[i] + src2.h[i]; + res.h[i] = temp >> 1; +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_add_h_div4.h b/riscv/insns/pv_add_h_div4.h new file mode 100644 index 0000000000..0dc157025c --- /dev/null +++ b/riscv/insns/pv_add_h_div4.h @@ -0,0 +1,18 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +int16_t temp; + +for(int i=0; i<(64/e16); i++) +{ + // (src1.h[i] + src2.h[i]) >> 2 doesn't work as shift + // will be performed in int32 (using overflows from add) + temp = src1.h[i] + src2.h[i]; + res.h[i] = temp >> 2; +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_add_h_div8.h b/riscv/insns/pv_add_h_div8.h new file mode 100644 index 0000000000..5abd486118 --- /dev/null +++ b/riscv/insns/pv_add_h_div8.h @@ -0,0 +1,18 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +int16_t temp; + +for(int i=0; i<(64/e16); i++) +{ + // (src1.h[i] + src2.h[i]) >> 3 doesn't work as shift + // will be performed in int32 (using overflows from add) + temp = src1.h[i] + src2.h[i]; + res.h[i] = temp >> 3; +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpeq_b.h b/riscv/insns/pv_cmpeq_b.h new file mode 100644 index 0000000000..b2e912cb1f --- /dev/null +++ b/riscv/insns/pv_cmpeq_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] == src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpeq_h.h b/riscv/insns/pv_cmpeq_h.h new file mode 100644 index 0000000000..6524f55b54 --- /dev/null +++ b/riscv/insns/pv_cmpeq_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] == src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpeq_sc_b.h b/riscv/insns/pv_cmpeq_sc_b.h new file mode 100644 index 0000000000..6103f22750 --- /dev/null +++ b/riscv/insns/pv_cmpeq_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] == src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpeq_sc_h.h b/riscv/insns/pv_cmpeq_sc_h.h new file mode 100644 index 0000000000..82eecef2b9 --- /dev/null +++ b/riscv/insns/pv_cmpeq_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] == src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpeq_sci_b.h b/riscv/insns/pv_cmpeq_sci_b.h new file mode 100644 index 0000000000..8ec09d3702 --- /dev/null +++ b/riscv/insns/pv_cmpeq_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] == src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpeq_sci_h.h b/riscv/insns/pv_cmpeq_sci_h.h new file mode 100644 index 0000000000..1079f724bc --- /dev/null +++ b/riscv/insns/pv_cmpeq_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] == src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpge_b.h b/riscv/insns/pv_cmpge_b.h new file mode 100644 index 0000000000..fe8a2f3871 --- /dev/null +++ b/riscv/insns/pv_cmpge_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] >= src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpge_h.h b/riscv/insns/pv_cmpge_h.h new file mode 100644 index 0000000000..f6d8883637 --- /dev/null +++ b/riscv/insns/pv_cmpge_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] >= src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpge_sc_b.h b/riscv/insns/pv_cmpge_sc_b.h new file mode 100644 index 0000000000..5471af6961 --- /dev/null +++ b/riscv/insns/pv_cmpge_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] >= src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpge_sc_h.h b/riscv/insns/pv_cmpge_sc_h.h new file mode 100644 index 0000000000..91a7f412ab --- /dev/null +++ b/riscv/insns/pv_cmpge_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] >= src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpge_sci_b.h b/riscv/insns/pv_cmpge_sci_b.h new file mode 100644 index 0000000000..1f6fa2d92b --- /dev/null +++ b/riscv/insns/pv_cmpge_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] >= src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpge_sci_h.h b/riscv/insns/pv_cmpge_sci_h.h new file mode 100644 index 0000000000..6a1ce3db51 --- /dev/null +++ b/riscv/insns/pv_cmpge_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] >= src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgeu_b.h b/riscv/insns/pv_cmpgeu_b.h new file mode 100644 index 0000000000..e587e93361 --- /dev/null +++ b/riscv/insns/pv_cmpgeu_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] >= src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgeu_h.h b/riscv/insns/pv_cmpgeu_h.h new file mode 100644 index 0000000000..ab67fff8c0 --- /dev/null +++ b/riscv/insns/pv_cmpgeu_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] >= src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpgeu_sc_b.h b/riscv/insns/pv_cmpgeu_sc_b.h new file mode 100644 index 0000000000..a364f6c903 --- /dev/null +++ b/riscv/insns/pv_cmpgeu_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] >= src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgeu_sc_h.h b/riscv/insns/pv_cmpgeu_sc_h.h new file mode 100644 index 0000000000..f8898f4fab --- /dev/null +++ b/riscv/insns/pv_cmpgeu_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] >= src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgeu_sci_b.h b/riscv/insns/pv_cmpgeu_sci_b.h new file mode 100644 index 0000000000..e4840524f0 --- /dev/null +++ b/riscv/insns/pv_cmpgeu_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] >= src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgeu_sci_h.h b/riscv/insns/pv_cmpgeu_sci_h.h new file mode 100644 index 0000000000..09eb4dd8ab --- /dev/null +++ b/riscv/insns/pv_cmpgeu_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] >= src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgt_b.h b/riscv/insns/pv_cmpgt_b.h new file mode 100644 index 0000000000..c2c4ba30e9 --- /dev/null +++ b/riscv/insns/pv_cmpgt_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] > src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgt_h.h b/riscv/insns/pv_cmpgt_h.h new file mode 100644 index 0000000000..338b923ece --- /dev/null +++ b/riscv/insns/pv_cmpgt_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] > src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpgt_sc_b.h b/riscv/insns/pv_cmpgt_sc_b.h new file mode 100644 index 0000000000..3dd660c2be --- /dev/null +++ b/riscv/insns/pv_cmpgt_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] > src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgt_sc_h.h b/riscv/insns/pv_cmpgt_sc_h.h new file mode 100644 index 0000000000..77258f332e --- /dev/null +++ b/riscv/insns/pv_cmpgt_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] > src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgt_sci_b.h b/riscv/insns/pv_cmpgt_sci_b.h new file mode 100644 index 0000000000..597f1c7af0 --- /dev/null +++ b/riscv/insns/pv_cmpgt_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] > src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgt_sci_h.h b/riscv/insns/pv_cmpgt_sci_h.h new file mode 100644 index 0000000000..5281a3c409 --- /dev/null +++ b/riscv/insns/pv_cmpgt_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] > src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgtu_b.h b/riscv/insns/pv_cmpgtu_b.h new file mode 100644 index 0000000000..49a34a6d71 --- /dev/null +++ b/riscv/insns/pv_cmpgtu_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] > src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgtu_h.h b/riscv/insns/pv_cmpgtu_h.h new file mode 100644 index 0000000000..a83073fa8c --- /dev/null +++ b/riscv/insns/pv_cmpgtu_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] > src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpgtu_sc_b.h b/riscv/insns/pv_cmpgtu_sc_b.h new file mode 100644 index 0000000000..641d11040b --- /dev/null +++ b/riscv/insns/pv_cmpgtu_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] > src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgtu_sc_h.h b/riscv/insns/pv_cmpgtu_sc_h.h new file mode 100644 index 0000000000..290d0a9795 --- /dev/null +++ b/riscv/insns/pv_cmpgtu_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] > src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgtu_sci_b.h b/riscv/insns/pv_cmpgtu_sci_b.h new file mode 100644 index 0000000000..acf42fa6b7 --- /dev/null +++ b/riscv/insns/pv_cmpgtu_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] > src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgtu_sci_h.h b/riscv/insns/pv_cmpgtu_sci_h.h new file mode 100644 index 0000000000..7947ab2dea --- /dev/null +++ b/riscv/insns/pv_cmpgtu_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] > src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmple_b.h b/riscv/insns/pv_cmple_b.h new file mode 100644 index 0000000000..cef9591bfe --- /dev/null +++ b/riscv/insns/pv_cmple_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] <= src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmple_h.h b/riscv/insns/pv_cmple_h.h new file mode 100644 index 0000000000..f10b555558 --- /dev/null +++ b/riscv/insns/pv_cmple_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] <= src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmple_sc_b.h b/riscv/insns/pv_cmple_sc_b.h new file mode 100644 index 0000000000..3975bc9047 --- /dev/null +++ b/riscv/insns/pv_cmple_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] <= src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmple_sc_h.h b/riscv/insns/pv_cmple_sc_h.h new file mode 100644 index 0000000000..f4612313dc --- /dev/null +++ b/riscv/insns/pv_cmple_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] <= src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmple_sci_b.h b/riscv/insns/pv_cmple_sci_b.h new file mode 100644 index 0000000000..54a5c1f830 --- /dev/null +++ b/riscv/insns/pv_cmple_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] <= src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmple_sci_h.h b/riscv/insns/pv_cmple_sci_h.h new file mode 100644 index 0000000000..158616e96f --- /dev/null +++ b/riscv/insns/pv_cmple_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] <= src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpleu_b.h b/riscv/insns/pv_cmpleu_b.h new file mode 100644 index 0000000000..038c4ff7ab --- /dev/null +++ b/riscv/insns/pv_cmpleu_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] <= src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpleu_h.h b/riscv/insns/pv_cmpleu_h.h new file mode 100644 index 0000000000..7f0577b409 --- /dev/null +++ b/riscv/insns/pv_cmpleu_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] <= src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpleu_sc_b.h b/riscv/insns/pv_cmpleu_sc_b.h new file mode 100644 index 0000000000..92232bf865 --- /dev/null +++ b/riscv/insns/pv_cmpleu_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] <= src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpleu_sc_h.h b/riscv/insns/pv_cmpleu_sc_h.h new file mode 100644 index 0000000000..19e8a957a0 --- /dev/null +++ b/riscv/insns/pv_cmpleu_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] <= src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpleu_sci_b.h b/riscv/insns/pv_cmpleu_sci_b.h new file mode 100644 index 0000000000..d400010f6f --- /dev/null +++ b/riscv/insns/pv_cmpleu_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] <= src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpleu_sci_h.h b/riscv/insns/pv_cmpleu_sci_h.h new file mode 100644 index 0000000000..dbd2ca7940 --- /dev/null +++ b/riscv/insns/pv_cmpleu_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] <= src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmplt_b.h b/riscv/insns/pv_cmplt_b.h new file mode 100644 index 0000000000..5e1dda25ee --- /dev/null +++ b/riscv/insns/pv_cmplt_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] < src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmplt_h.h b/riscv/insns/pv_cmplt_h.h new file mode 100644 index 0000000000..d6cad0c119 --- /dev/null +++ b/riscv/insns/pv_cmplt_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] < src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmplt_sc_b.h b/riscv/insns/pv_cmplt_sc_b.h new file mode 100644 index 0000000000..d6d7d6823f --- /dev/null +++ b/riscv/insns/pv_cmplt_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] < src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmplt_sc_h.h b/riscv/insns/pv_cmplt_sc_h.h new file mode 100644 index 0000000000..6d0b77f8eb --- /dev/null +++ b/riscv/insns/pv_cmplt_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] < src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmplt_sci_b.h b/riscv/insns/pv_cmplt_sci_b.h new file mode 100644 index 0000000000..1382c7b43b --- /dev/null +++ b/riscv/insns/pv_cmplt_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] < src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmplt_sci_h.h b/riscv/insns/pv_cmplt_sci_h.h new file mode 100644 index 0000000000..43a060c523 --- /dev/null +++ b/riscv/insns/pv_cmplt_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] < src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpltu_b.h b/riscv/insns/pv_cmpltu_b.h new file mode 100644 index 0000000000..fae2d4265f --- /dev/null +++ b/riscv/insns/pv_cmpltu_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] < src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpltu_h.h b/riscv/insns/pv_cmpltu_h.h new file mode 100644 index 0000000000..932f2747c0 --- /dev/null +++ b/riscv/insns/pv_cmpltu_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] < src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpltu_sc_b.h b/riscv/insns/pv_cmpltu_sc_b.h new file mode 100644 index 0000000000..b29e612da8 --- /dev/null +++ b/riscv/insns/pv_cmpltu_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] < src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpltu_sc_h.h b/riscv/insns/pv_cmpltu_sc_h.h new file mode 100644 index 0000000000..0c3ee69195 --- /dev/null +++ b/riscv/insns/pv_cmpltu_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] < src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpltu_sci_b.h b/riscv/insns/pv_cmpltu_sci_b.h new file mode 100644 index 0000000000..67ff64c89e --- /dev/null +++ b/riscv/insns/pv_cmpltu_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] < src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpltu_sci_h.h b/riscv/insns/pv_cmpltu_sci_h.h new file mode 100644 index 0000000000..edff0c5f52 --- /dev/null +++ b/riscv/insns/pv_cmpltu_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] < src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpne_b.h b/riscv/insns/pv_cmpne_b.h new file mode 100644 index 0000000000..4f8763897a --- /dev/null +++ b/riscv/insns/pv_cmpne_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] != src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpne_h.h b/riscv/insns/pv_cmpne_h.h new file mode 100644 index 0000000000..8f74a35562 --- /dev/null +++ b/riscv/insns/pv_cmpne_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] != src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpne_sc_b.h b/riscv/insns/pv_cmpne_sc_b.h new file mode 100644 index 0000000000..c94538bbe5 --- /dev/null +++ b/riscv/insns/pv_cmpne_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] != src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpne_sc_h.h b/riscv/insns/pv_cmpne_sc_h.h new file mode 100644 index 0000000000..f79334cca5 --- /dev/null +++ b/riscv/insns/pv_cmpne_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] != src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpne_sci_b.h b/riscv/insns/pv_cmpne_sci_b.h new file mode 100644 index 0000000000..1930903b60 --- /dev/null +++ b/riscv/insns/pv_cmpne_sci_b.h @@ -0,0 +1,11 @@ +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] != src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpne_sci_h.h b/riscv/insns/pv_cmpne_sci_h.h new file mode 100644 index 0000000000..1a2b03671b --- /dev/null +++ b/riscv/insns/pv_cmpne_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] != src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_sub_h_div2.h b/riscv/insns/pv_sub_h_div2.h new file mode 100644 index 0000000000..a5ea860a0a --- /dev/null +++ b/riscv/insns/pv_sub_h_div2.h @@ -0,0 +1,18 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +int16_t temp; + +for(int i=0; i<(64/e16); i++) +{ + // (src1.h[i] - src2.h[i]) >> 1 doesn't work as shift + // will be performed in int32 (using overflows from add) + temp = src1.h[i] - src2.h[i]; + res.h[i] = temp >> 1; +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_sub_h_div4.h b/riscv/insns/pv_sub_h_div4.h new file mode 100644 index 0000000000..8615108286 --- /dev/null +++ b/riscv/insns/pv_sub_h_div4.h @@ -0,0 +1,18 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +int16_t temp; + +for(int i=0; i<(64/e16); i++) +{ + // (src1.h[i] - src2.h[i]) >> 2 doesn't work as shift + // will be performed in int32 (using overflows from add) + temp = src1.h[i] +-src2.h[i]; + res.h[i] = temp >> 2; +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_sub_h_div8.h b/riscv/insns/pv_sub_h_div8.h new file mode 100644 index 0000000000..027ea66f45 --- /dev/null +++ b/riscv/insns/pv_sub_h_div8.h @@ -0,0 +1,18 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +int16_t temp; + +for(int i=0; i<(64/e16); i++) +{ + // (src1.h[i] - src2.h[i]) >> 3 doesn't work as shift + // will be performed in int32 (using overflows from add) + temp = src1.h[i] - src2.h[i]; + res.h[i] = temp >> 3; +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index c4bc8cb4ce..e77a8f55f2 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -867,12 +867,18 @@ riscv_insn_ext_pulpvect = \ pv_add_b \ pv_add_sc_b \ pv_add_sci_b \ + pv_add_h_div2 \ + pv_add_h_div4 \ + pv_add_h_div8 \ pv_sub_h \ pv_sub_sc_h \ pv_sub_sci_h \ pv_sub_b \ pv_sub_sc_b \ pv_sub_sci_b \ + pv_sub_h_div2 \ + pv_sub_h_div4 \ + pv_sub_h_div8 \ pv_avg_h \ pv_avg_sc_h \ pv_avg_sci_h \ @@ -989,6 +995,66 @@ riscv_insn_ext_pulpvect = \ pv_sdotsp_b \ pv_sdotsp_sc_b \ pv_sdotsp_sci_b \ + pv_cmpeq_h \ + pv_cmpeq_sc_h \ + pv_cmpeq_sci_h \ + pv_cmpeq_b \ + pv_cmpeq_sc_b \ + pv_cmpeq_sci_b \ + pv_cmpne_h \ + pv_cmpne_sc_h \ + pv_cmpne_sci_h \ + pv_cmpne_b \ + pv_cmpne_sc_b \ + pv_cmpne_sci_b \ + pv_cmpgt_h \ + pv_cmpgt_sc_h \ + pv_cmpgt_sci_h \ + pv_cmpgt_b \ + pv_cmpgt_sc_b \ + pv_cmpgt_sci_b \ + pv_cmpge_h \ + pv_cmpge_sc_h \ + pv_cmpge_sci_h \ + pv_cmpge_b \ + pv_cmpge_sc_b \ + pv_cmpge_sci_b \ + pv_cmplt_h \ + pv_cmplt_sc_h \ + pv_cmplt_sci_h \ + pv_cmplt_b \ + pv_cmplt_sc_b \ + pv_cmplt_sci_b \ + pv_cmple_h \ + pv_cmple_sc_h \ + pv_cmple_sci_h \ + pv_cmple_b \ + pv_cmple_sc_b \ + pv_cmple_sci_b \ + pv_cmpgtu_h \ + pv_cmpgtu_sc_h \ + pv_cmpgtu_sci_h \ + pv_cmpgtu_b \ + pv_cmpgtu_sc_b \ + pv_cmpgtu_sci_b \ + pv_cmpgeu_h \ + pv_cmpgeu_sc_h \ + pv_cmpgeu_sci_h \ + pv_cmpgeu_b \ + pv_cmpgeu_sc_b \ + pv_cmpgeu_sci_b \ + pv_cmpltu_h \ + pv_cmpltu_sc_h \ + pv_cmpltu_sci_h \ + pv_cmpltu_b \ + pv_cmpltu_sc_b \ + pv_cmpltu_sci_b \ + pv_cmpleu_h \ + pv_cmpleu_sc_h \ + pv_cmpleu_sci_h \ + pv_cmpleu_b \ + pv_cmpleu_sc_b \ + pv_cmpleu_sci_b \ riscv_insn_ext_pulpvectshufflepack = \ pv_shuffle_h \ From f3df5056d612df6590128077243344b66d896382 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Sat, 3 Sep 2022 17:27:40 +0200 Subject: [PATCH 21/23] Implement all instructions in vectcomplex subset - Add disassembly format - Implement all instructions - Add instructions to make This implementation is tested against generated tests. --- disasm/disasm.cc | 15 +++++++++++++++ riscv/insns/pv_cplxconj_h.h | 11 +++++++++++ riscv/insns/pv_cplxmul_h_i.h | 16 ++++++++++++++++ riscv/insns/pv_cplxmul_h_i_div2.h | 16 ++++++++++++++++ riscv/insns/pv_cplxmul_h_i_div4.h | 16 ++++++++++++++++ riscv/insns/pv_cplxmul_h_i_div8.h | 16 ++++++++++++++++ riscv/insns/pv_cplxmul_h_r.h | 16 ++++++++++++++++ riscv/insns/pv_cplxmul_h_r_div2.h | 16 ++++++++++++++++ riscv/insns/pv_cplxmul_h_r_div4.h | 16 ++++++++++++++++ riscv/insns/pv_cplxmul_h_r_div8.h | 16 ++++++++++++++++ riscv/insns/pv_subrotmj_h.h | 16 ++++++++++++++++ riscv/insns/pv_subrotmj_h_div2.h | 16 ++++++++++++++++ riscv/insns/pv_subrotmj_h_div4.h | 16 ++++++++++++++++ riscv/insns/pv_subrotmj_h_div8.h | 16 ++++++++++++++++ riscv/riscv.mk.in | 16 ++++++++++++++++ 15 files changed, 234 insertions(+) create mode 100644 riscv/insns/pv_cplxconj_h.h create mode 100644 riscv/insns/pv_cplxmul_h_i.h create mode 100644 riscv/insns/pv_cplxmul_h_i_div2.h create mode 100644 riscv/insns/pv_cplxmul_h_i_div4.h create mode 100644 riscv/insns/pv_cplxmul_h_i_div8.h create mode 100644 riscv/insns/pv_cplxmul_h_r.h create mode 100644 riscv/insns/pv_cplxmul_h_r_div2.h create mode 100644 riscv/insns/pv_cplxmul_h_r_div4.h create mode 100644 riscv/insns/pv_cplxmul_h_r_div8.h create mode 100644 riscv/insns/pv_subrotmj_h.h create mode 100644 riscv/insns/pv_subrotmj_h_div2.h create mode 100644 riscv/insns/pv_subrotmj_h_div4.h create mode 100644 riscv/insns/pv_subrotmj_h_div8.h diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 094b547bc2..0b95893299 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -1676,6 +1676,21 @@ disassembler_t::disassembler_t(int xlen) DEFINE_RTYPE(pv_cmpleu_sc_b); DEFINE_PI1ZTYPE(pv_cmpleu_sci_b); + // xpulpvectcomplex + DEFINE_R1TYPE(pv_cplxconj_h); + DEFINE_RTYPE(pv_subrotmj_h); + DEFINE_RTYPE(pv_subrotmj_h_div2); + DEFINE_RTYPE(pv_subrotmj_h_div4); + DEFINE_RTYPE(pv_subrotmj_h_div8); + DEFINE_RTYPE(pv_cplxmul_h_r); + DEFINE_RTYPE(pv_cplxmul_h_r_div2); + DEFINE_RTYPE(pv_cplxmul_h_r_div4); + DEFINE_RTYPE(pv_cplxmul_h_r_div8); + DEFINE_RTYPE(pv_cplxmul_h_i); + DEFINE_RTYPE(pv_cplxmul_h_i_div2); + DEFINE_RTYPE(pv_cplxmul_h_i_div4); + DEFINE_RTYPE(pv_cplxmul_h_i_div8); + // xpulpvectshufflepack DEFINE_RTYPE(pv_shuffle_h); DEFINE_PI1ZTYPE(pv_shuffle_sci_h); diff --git a/riscv/insns/pv_cplxconj_h.h b/riscv/insns/pv_cplxconj_h.h new file mode 100644 index 0000000000..95c91e5bd5 --- /dev/null +++ b/riscv/insns/pv_cplxconj_h.h @@ -0,0 +1,11 @@ +reg_t src1 = RS1; +reg_t res; + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = src1; +int16_t imag = (src1 >> 16); + +imag = -imag; +res = (imag << 16) | real; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_i.h b/riscv/insns/pv_cplxmul_h_i.h new file mode 100644 index 0000000000..e899c49e78 --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_i.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real3 = src3; + +int16_t imag = ((sreg_t)real1*imag2 + imag1*real2) >> 15; +res = (imag << 16) | real3; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_i_div2.h b/riscv/insns/pv_cplxmul_h_i_div2.h new file mode 100644 index 0000000000..9d477176cd --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_i_div2.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real3 = src3; + +int16_t imag = ((sreg_t)real1*imag2 + imag1*real2) >> 16; +res = (imag << 16) | real3; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_i_div4.h b/riscv/insns/pv_cplxmul_h_i_div4.h new file mode 100644 index 0000000000..f23395b070 --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_i_div4.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real3 = src3; + +int16_t imag = ((sreg_t)real1*imag2 + imag1*real2) >> 17; +res = (imag << 16) | real3; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_i_div8.h b/riscv/insns/pv_cplxmul_h_i_div8.h new file mode 100644 index 0000000000..735bf9be2d --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_i_div8.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real3 = src3; + +int16_t imag = ((sreg_t)real1*imag2 + imag1*real2) >> 18; +res = (imag << 16) | real3; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_r.h b/riscv/insns/pv_cplxmul_h_r.h new file mode 100644 index 0000000000..b101bfe153 --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_r.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +int16_t imag3 = (src3 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = ((sreg_t)real1*real2 - imag1*imag2) >> 15; +res = (imag3 << 16) | real; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_r_div2.h b/riscv/insns/pv_cplxmul_h_r_div2.h new file mode 100644 index 0000000000..3b72aed003 --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_r_div2.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +int16_t imag3 = (src3 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = ((sreg_t)real1*real2 - imag1*imag2) >> 16; +res = (imag3 << 16) | real; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_r_div4.h b/riscv/insns/pv_cplxmul_h_r_div4.h new file mode 100644 index 0000000000..d5091837ed --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_r_div4.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +int16_t imag3 = (src3 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = ((sreg_t)real1*real2 - imag1*imag2) >> 17; +res = (imag3 << 16) | real; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_r_div8.h b/riscv/insns/pv_cplxmul_h_r_div8.h new file mode 100644 index 0000000000..0b503ad84a --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_r_div8.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +int16_t imag3 = (src3 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = ((sreg_t)real1*real2 - imag1*imag2) >> 18; +res = (imag3 << 16) | real; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_subrotmj_h.h b/riscv/insns/pv_subrotmj_h.h new file mode 100644 index 0000000000..04289fcead --- /dev/null +++ b/riscv/insns/pv_subrotmj_h.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = imag1 - imag2; +uint16_t imag = real2 - real1; +res = (imag << 16) | real; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_subrotmj_h_div2.h b/riscv/insns/pv_subrotmj_h_div2.h new file mode 100644 index 0000000000..3e2d698e3d --- /dev/null +++ b/riscv/insns/pv_subrotmj_h_div2.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = (int16_t)(imag1 - imag2) >> 1; +uint16_t imag = (int16_t)(real2 - real1) >> 1; +res = (imag << 16) | real; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_subrotmj_h_div4.h b/riscv/insns/pv_subrotmj_h_div4.h new file mode 100644 index 0000000000..21e980869c --- /dev/null +++ b/riscv/insns/pv_subrotmj_h_div4.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = (int16_t)(imag1 - imag2) >> 2; +uint16_t imag = (int16_t)(real2 - real1) >> 2; +res = (imag << 16) | real; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_subrotmj_h_div8.h b/riscv/insns/pv_subrotmj_h_div8.h new file mode 100644 index 0000000000..13babf74db --- /dev/null +++ b/riscv/insns/pv_subrotmj_h_div8.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = (int16_t)(imag1 - imag2) >> 3; +uint16_t imag = (int16_t)(real2 - real1) >> 3; +res = (imag << 16) | real; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index e77a8f55f2..d547a1efac 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1056,6 +1056,21 @@ riscv_insn_ext_pulpvect = \ pv_cmpleu_sc_b \ pv_cmpleu_sci_b \ +riscv_insn_ext_pulpvectcomplex = \ + pv_cplxconj_h \ + pv_subrotmj_h \ + pv_subrotmj_h_div2 \ + pv_subrotmj_h_div4 \ + pv_subrotmj_h_div8 \ + pv_cplxmul_h_r \ + pv_cplxmul_h_r_div2 \ + pv_cplxmul_h_r_div4 \ + pv_cplxmul_h_r_div8 \ + pv_cplxmul_h_i \ + pv_cplxmul_h_i_div2 \ + pv_cplxmul_h_i_div4 \ + pv_cplxmul_h_i_div8 \ + riscv_insn_ext_pulpvectshufflepack = \ pv_shuffle_h \ pv_shuffle_sci_h \ @@ -1113,6 +1128,7 @@ riscv_insn_ext_pulpimg = \ $(riscv_insn_ext_pulpminmax) \ $(riscv_insn_ext_pulpbitop) \ $(riscv_insn_ext_pulpvect) \ + $(riscv_insn_ext_pulpvectcomplex) \ $(riscv_insn_ext_pulpvectshufflepack) \ $(riscv_insn_ext_pulpclip) \ $(riscv_insn_ext_pulpaddsubrn) \ From cc940b3402a2e1d5c280bc1e33665897e638794b Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Sun, 4 Sep 2022 13:14:31 +0200 Subject: [PATCH 22/23] Fix non-gcc bitop implementations For gcc buitins are used, until now only they were tested. The alternative implementations have now been fixed and tested as well. --- riscv/insns/p_clb.h | 30 +++++++++++++++++++----------- riscv/insns/p_cnt.h | 7 ++++--- riscv/insns/p_ff1.h | 5 +++-- riscv/insns/p_fl1.h | 21 +++++++++++++++------ 4 files changed, 41 insertions(+), 22 deletions(-) diff --git a/riscv/insns/p_clb.h b/riscv/insns/p_clb.h index b53d0dc5b9..b124f9f2dc 100644 --- a/riscv/insns/p_clb.h +++ b/riscv/insns/p_clb.h @@ -1,5 +1,5 @@ -sreg_t val = sext_xlen(RS1);; -reg_t cnt; +reg_t val = sext_xlen(RS1);; +reg_t cnt = 0; #ifdef __GNUC__ if(val) { @@ -8,20 +8,28 @@ reg_t cnt; } else { cnt = 0; // rs1 = 0 -> rd = 0, not 32 } -#elif - if(val == 0) { - cnt = 0; // rs1 = 0 -> rd = 0, not 32 - } else { - if(val > 0) { - // turn leading 0s into leading 1s +#else + if(val != 0x00) // rs1 = 0 -> rd = 0, not 32 + { + if(val >= ((reg_t)1 << 31)) { + // turn leading 1s into leading 0s val = ~val; } + val <<= 1; // to distinguish -1 from -2 - // log2() from standfords bithacks (find highest '1') - while (val >>= 1) + // modified log2() from standfords bithacks (find highest '1') + const unsigned int b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; + const unsigned int S[] = {1, 2, 4, 8, 16}; + for (int i = 4; i >= 0; i--) { - cnt++; + if (val & b[i]) + { + val >>= S[i]; + cnt |= S[i]; + } } + + cnt = 32 - cnt; // pos of MSB+1 to #leading bits } #endif diff --git a/riscv/insns/p_cnt.h b/riscv/insns/p_cnt.h index e3b97e0e1f..7ffc93ad2b 100644 --- a/riscv/insns/p_cnt.h +++ b/riscv/insns/p_cnt.h @@ -1,10 +1,11 @@ reg_t val = zext_xlen(RS1); +reg_t cnt; #ifdef __GNUC__ // "Returns the number of 1-bits in x." - reg_t cnt = __builtin_popcount(val); -#elif - reg_t cnt = 0; + cnt = __builtin_popcount(val); +#else + cnt = 0; for(cnt = 0; val != 0x00; ++cnt) { val &= val - 1; diff --git a/riscv/insns/p_ff1.h b/riscv/insns/p_ff1.h index c4777432fd..8b8a0dc519 100644 --- a/riscv/insns/p_ff1.h +++ b/riscv/insns/p_ff1.h @@ -9,10 +9,11 @@ reg_t cnt; } else { cnt--; } -#elif +#else + // count trailing zero bits from standfords bithacks cnt = 32; val &= -signed(val); - if (val) c--; + if (val) cnt--; if (val & 0x0000FFFF) cnt -= 16; if (val & 0x00FF00FF) cnt -= 8; if (val & 0x0F0F0F0F) cnt -= 4; diff --git a/riscv/insns/p_fl1.h b/riscv/insns/p_fl1.h index 7250991f49..37ee52b37c 100644 --- a/riscv/insns/p_fl1.h +++ b/riscv/insns/p_fl1.h @@ -6,17 +6,26 @@ reg_t cnt; // "Returns the number of leading 0-bits in x" x=0 -> undef cnt = 31 - __builtin_clz(val); } else { + // rs1 = 0 -> rd = 0, not 32 cnt = 32; } -#elif - if(val == 0) { - cnt = 32; // rs1 = 0 -> rd = 0, not 32 - } else { +#else + if(val){ // log2() from standfords bithacks (find highest '1') - while (val >>= 1) + const unsigned int b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; + const unsigned int S[] = {1, 2, 4, 8, 16}; + cnt = 0; + for (int i = 4; i >= 0; i--) { - cnt++; + if (val & b[i]) + { + val >>= S[i]; + cnt |= S[i]; + } } + } else { + // rs1 = 0 -> rd = 0, not 32 + cnt = 32; } #endif From cd888eddf3deeb57274fb3031eb2f1b87d0f1276 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Sun, 4 Sep 2022 13:50:39 +0200 Subject: [PATCH 23/23] Update Readme.md to current state of project Mention xpulp isa and other useful repos. Add baremetal example and link to more. --- README.md | 49 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 1cc4c24df3..42f19f806c 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,9 @@ Spike, the RISC-V ISA Simulator, implements a functional model of one or more RISC-V harts. It is named after the golden spike used to celebrate the completion of the US transcontinental railway. +This fork extends Spike to support custom PULP instructions. +Together with the repos riscv-opcodes and riscv-tests, it forms a framework that aids in developing extensions, testing implementations and running applications. + Spike supports the following RISC-V ISA features: - RV32I and RV64I base ISAs, v2.1 - Zifencei extension, v2.0 @@ -22,6 +25,7 @@ Spike supports the following RISC-V ISA features: - Conformance to both RVWMO and RVTSO (Spike is sequentially consistent) - Machine, Supervisor, and User modes, v1.11 - Debug v0.14 + - All xpulpv3 extension subsets except xpulpelw Versioning and APIs ------------------- @@ -74,30 +78,49 @@ Install spike (see Build Steps), riscv-gnu-toolchain, and riscv-pk. Write a short C program and name it hello.c. Then, compile it into a RISC-V ELF binary named hello: - $ riscv64-unknown-elf-gcc -o hello hello.c + $ riscv32-unknown-elf-gcc -o hello hello.c Now you can simulate the program atop the proxy kernel: $ spike pk hello +Or on bare metal: + +``` +$ spike hello +``` + +[jonesinator/riscv-spike-minimal-assembly](https://github.com/jonesinator/riscv-spike-minimal-assembly) provides a well documented minimal bare metal program and also one which uses syscall to communicate with the host. + +For xpulp-specific examples take a look at the riscv-tests repo, in riscv-tests/isa it contains functional tests for all supported xpulp instructions. + Simulating a New Instruction ------------------------------------ -Adding an instruction to the simulator requires two steps: +Adding an instruction to the simulator requires these steps: + + 1. Clone riscv-opcodes, add the opcode to it and generate encoding_out.h + + 2. Create a soft-link for riscv/encoding.h to the generated encoding_out.h + + ``` + $ ln -sfr riscv-opcodes/encoding_out.h riscv-isa-sim/riscv/encoding.h + ``` + + 3. Describe the instruction's functional behavior in the file + riscv/insns/.h. Examine other instructions + in that directory as a starting point. Use macros from riscv/decode.h. + + 4. Add the mnemonic format (disassembly format) of the instruction to diasm/diasm.cc - 1. Describe the instruction's functional behavior in the file - riscv/insns/.h. Examine other instructions - in that directory as a starting point. + 5. In riscv/riscv.mk.in add the instruction to riscv_insn_list. + You can get all instructions from your current encoding.h (encoding_out.h) using: - 2. Add the opcode and opcode mask to riscv/opcodes.h. Alternatively, - add it to the riscv-opcodes package, and it will do so for you: - ``` - $ cd ../riscv-opcodes - $ vi opcodes // add a line for the new instruction - $ make install - ``` + ``` + $ grep ^DECLARE_INSN encoding.h | sed 's/DECLARE_INSN(\(.*\),.*,.*)/\1/' + ``` - 3. Rebuild the simulator. + 6. Rebuild the simulator. Interactive Debug Mode ---------------------------