diff --git a/ChangeLog.md b/ChangeLog.md new file mode 100644 index 0000000000..2be2d4adbe --- /dev/null +++ b/ChangeLog.md @@ -0,0 +1,21 @@ +Version 1.0.1-dev +----------------- +- Preliminary support for a subset of the Vector Extension, v0.7.1. +- Support S-mode vectored interrupts (i.e. `stvec[0]` is now writable). +- Added support for dynamic linking of libraries containing MMIO devices. +- Added `--priv` flag to control which privilege modes are available. +- When the commit log is enabled at configure time (`--enable-commitlog`), + it must also be enabled at runtime with the `--log-commits` option. +- Several debug-related additions and changes: + - Added `hasel` debug feature. + - Added `--dm-no-abstract-csr` command-line option. + - Added `--dm-no-halt-groups` command line option. + - Renamed `--progsize` to `--dm-progsize`. + - Renamed `--debug-sba` to `--dm-sba`. + - Renamed `--debug-auth` to `--dm-auth`. + - Renamed `--abstract-rti` to `--dm-abstract-rti`. + - Renamed `--without-hasel` to `--dm-no-hasel`. + +Version 1.0.0 (2019-03-30) +-------------------------- +- First versioned release. diff --git a/Makefile.in b/Makefile.in index c09fc50588..66e8df08c5 100644 --- a/Makefile.in +++ b/Makefile.in @@ -36,6 +36,9 @@ project_name := @PACKAGE_TARNAME@ src_dir := @srcdir@ scripts_dir := $(src_dir)/scripts +HAVE_INT128 := @HAVE_INT128@ +HAVE_DLOPEN := @HAVE_DLOPEN@ + # If the version information is not in the configure script, then we # assume that we are in a working directory. We use the vcs-version.sh # script in the scripts directory to generate an appropriate version @@ -50,17 +53,11 @@ endif # Installation directories -prefix := @prefix@ -enable_stow := @enable_stow@ +prefix ?= @prefix@ -ifeq ($(enable_stow),yes) - stow_pkg_dir := $(prefix)/pkgs - INSTALLDIR ?= $(DESTDIR)$(stow_pkg_dir)/$(project_name)-$(project_ver) -else - INSTALLDIR ?= $(DESTDIR)$(prefix) -endif +INSTALLDIR ?= $(DESTDIR)$(prefix) -install_hdrs_dir := $(INSTALLDIR)/include/$(project_name) +install_hdrs_dir := $(INSTALLDIR)/include install_libs_dir := $(INSTALLDIR)/lib install_exes_dir := $(INSTALLDIR)/bin @@ -81,25 +78,52 @@ VPATH := $(addprefix $(src_dir)/, $(sprojs_enabled)) # C++ compiler # - CPPFLAGS : flags for the preprocessor (eg. -I,-D) # - CXXFLAGS : flags for C++ compiler (eg. -Wall,-g,-O3) +# +# To allow a user to specify CFLAGS or similar as part of the Make +# command, we also have mcpps-CFLAGS etc. with stuff that shouldn't be +# lost in such a case. +# +# The order of precedence (highest to lowest) is then: +# +# - Specified as part of Make command line +# - Specified as part of running configure +# - Specified here (default-CFLAGS) +# +# These all appear on the command line, from lowest precedence to +# highest. + +default-CFLAGS := -DPREFIX=\"$(prefix)\" -Wall -Wno-unused -g -O2 +default-CXXFLAGS := $(default-CFLAGS) -std=c++11 + +mcppbs-CPPFLAGS := @CPPFLAGS@ +mcppbs-CFLAGS := $(default-CFLAGS) @CFLAGS@ +mcppbs-CXXFLAGS := $(default-CXXFLAGS) @CXXFLAGS@ CC := @CC@ CXX := @CXX@ -CFLAGS += @CFLAGS@ -DPREFIX=\"$(prefix)\" -CPPFLAGS += @CPPFLAGS@ -CXXFLAGS += @CXXFLAGS@ -DPREFIX=\"$(prefix)\" -COMPILE := $(CXX) -fPIC -MMD -MP $(CPPFLAGS) $(CXXFLAGS) \ - $(sprojs_include) -COMPILE_C := $(CC) -fPIC -MMD -MP $(CPPFLAGS) $(CFLAGS) \ - $(sprojs_include) + +# These are the flags actually used for a C++ compile or a C compile. +# The language-specific flags come after the preprocessor flags, but +# user-supplied flags always take precedence. +all-cxx-flags := \ + $(mcppbs-CPPFLAGS) $(mcppbs-CXXFLAGS) $(CPPFLAGS) $(CXXFLAGS) +all-c-flags := \ + $(mcppbs-CPPFLAGS) $(mcppbs-CFLAGS) $(CPPFLAGS) $(CFLAGS) + +COMPILE := $(CXX) -MMD -MP $(all-cxx-flags) $(sprojs_include) +COMPILE_C := $(CC) -MMD -MP $(all-c-flags) $(sprojs_include) + # Linker # - LDFLAGS : Flags for the linker (eg. -L) # - LIBS : Library flags (eg. -l) +mcppbs-LDFLAGS := @LDFLAGS@ +all-link-flags := $(mcppbs-LDFLAGS) $(LDFLAGS) + comma := , LD := $(CXX) -LDFLAGS := @LDFLAGS@ LIBS := @LIBS@ -LINK := $(LD) -L. $(LDFLAGS) -Wl,-rpath,$(install_libs_dir) $(patsubst -L%,-Wl$(comma)-rpath$(comma)%,$(filter -L%,$(LDFLAGS))) +LINK := $(LD) -L. $(all-link-flags) -Wl,-rpath,$(install_libs_dir) $(patsubst -L%,-Wl$(comma)-rpath$(comma)%,$(filter -L%,$(LDFLAGS))) # Library creation @@ -115,9 +139,9 @@ RUNFLAGS := @RUNFLAGS@ MKINSTALLDIRS := $(scripts_dir)/mk-install-dirs.sh INSTALL := @INSTALL@ -INSTALL_HDR := $(INSTALL) -m 444 +INSTALL_HDR := $(INSTALL) -m 644 INSTALL_LIB := $(INSTALL) -m 644 -INSTALL_EXE := $(INSTALL) -m 555 +INSTALL_EXE := $(INSTALL) -m 755 STOW := @stow@ # Tests @@ -194,12 +218,12 @@ $(2)_deps := $$(patsubst %.o, %.d, $$($(2)_objs)) $(2)_deps += $$(patsubst %.o, %.d, $$($(2)_c_objs)) $(2)_deps += $$(patsubst %.h, %.h.d, $$($(2)_precompiled_hdrs)) $$($(2)_pch) : %.h.gch : %.h - $(COMPILE) -x c++-header $$< -o $$@ + $(COMPILE) -x c++-header -c $$< -o $$@ # If using clang, don't depend (and thus don't build) precompiled headers $$($(2)_objs) : %.o : %.cc $$($(2)_gen_hdrs) $(if $(filter-out clang,$(CC)),$$($(2)_pch)) - $(COMPILE) -c $$< + $(COMPILE) $$($(2)_CFLAGS) -c $$< $$($(2)_c_objs) : %.o : %.c $$($(2)_gen_hdrs) - $(COMPILE_C) -c $$< + $(COMPILE_C) $$($(2)_CFLAGS) -c $$< $(2)_junk += $$($(2)_pch) $$($(2)_objs) $$($(2)_c_objs) $$($(2)_deps) \ $$($(2)_gen_hdrs) @@ -213,13 +237,17 @@ $(2)_reverse_deps := $$(call reverse_list,$$($(2)_subproject_deps)) # Build a library for this subproject $(2)_lib_libs := $$($(2)_reverse_deps) -$(2)_lib_libnames := $$(patsubst %, lib%.so, $$($(2)_lib_libs)) +$(2)_lib_libnames := $$(patsubst %, lib%.a, $$($(2)_lib_libs)) $(2)_lib_libarg := $$(patsubst %, -l%, $$($(2)_lib_libs)) +$(2)_lib_libnames_shared := $$(if $$($(2)_install_shared_lib),lib$(1).so,) -lib$(1).so : $$($(2)_objs) $$($(2)_c_objs) $$($(2)_lib_libnames) - $(LINK) -shared -o $$@ $(if $(filter Darwin,$(shell uname -s)),-install_name $(install_libs_dir)/$$@) $$^ $$($(2)_lib_libarg) $(LIBS) +lib$(1).a : $$($(2)_objs) $$($(2)_c_objs) $$($(2)_lib_libnames) + $(AR) rcs $$@ $$^ +lib$(1).so : $$($(2)_objs) $$($(2)_c_objs) $$($(2)_lib_libnames_shared) $$($(2)_lib_libnames) + $(LINK) -shared -o $$@ $(if $(filter Darwin,$(shell uname -s)),-install_name $(install_libs_dir)/$$@) $$^ $$($(2)_lib_libnames) $(LIBS) -$(2)_junk += lib$(1).so +$(2)_junk += lib$(1).a +$(2)_junk += $$(if $$($(2)_install_shared_lib),lib$(1).so,) # Build unit tests @@ -228,14 +256,14 @@ $(2)_test_deps := $$(patsubst %.o, %.d, $$($(2)_test_objs)) $(2)_test_exes := $$(patsubst %.t.cc, %-utst, $$($(2)_test_srcs)) $(2)_test_outs := $$(patsubst %, %.out, $$($(2)_test_exes)) $(2)_test_libs := $(1) $$($(2)_reverse_deps) utst -$(2)_test_libnames := $$(patsubst %, lib%.so, $$($(2)_test_libs)) +$(2)_test_libnames := $$(patsubst %, lib%.a, $$($(2)_test_libs)) $(2)_test_libarg := $$(patsubst %, -l%, $$($(2)_test_libs)) $$($(2)_test_objs) : %.o : %.cc $(COMPILE) -c $$< $$($(2)_test_exes) : %-utst : %.t.o $$($(2)_test_libnames) - $(LINK) -o $$@ $$< $$($(2)_test_libarg) $(LIBS) + $(LINK) -o $$@ $$< $$($(2)_test_libnames) $(LIBS) $(2)_deps += $$($(2)_test_deps) $(2)_junk += \ @@ -255,14 +283,14 @@ $(2)_prog_objs := $$(patsubst %.cc, %.o, $$($(2)_prog_srcs)) $(2)_prog_deps := $$(patsubst %.o, %.d, $$($(2)_prog_objs)) $(2)_prog_exes := $$(patsubst %.cc, %, $$($(2)_prog_srcs)) $(2)_prog_libs := $(1) $$($(2)_reverse_deps) -$(2)_prog_libnames := $$(patsubst %, lib%.so, $$($(2)_prog_libs)) +$(2)_prog_libnames := $$(patsubst %, lib%.a, $$($(2)_prog_libs)) $(2)_prog_libarg := $$(patsubst %, -l%, $$($(2)_prog_libs)) $$($(2)_prog_objs) : %.o : %.cc $(COMPILE) -c $$< $$($(2)_prog_exes) : % : %.o $$($(2)_prog_libnames) - $(LINK) -o $$@ $$< $$($(2)_prog_libarg) $(LIBS) + $(LINK) -o $$@ $$< $$($(2)_prog_libnames) $(LIBS) $(2)_deps += $$($(2)_prog_deps) $(2)_junk += $$($(2)_prog_objs) $$($(2)_prog_deps) $$($(2)_prog_exes) @@ -277,7 +305,7 @@ $$($(2)_install_prog_objs) : %.o : %.cc $$($(2)_gen_hdrs) $(COMPILE) -c $$< $$($(2)_install_prog_exes) : % : %.o $$($(2)_prog_libnames) - $(LINK) -o $$@ $$< $$($(2)_prog_libarg) $(LIBS) + $(LINK) -o $$@ $$< $$($(2)_prog_libnames) $(LIBS) $(2)_deps += $$($(2)_install_prog_deps) $(2)_junk += \ @@ -286,7 +314,7 @@ $(2)_junk += \ # Subproject specific targets -all-$(1) : lib$(1).so $$($(2)_install_prog_exes) +all-$(1) : lib$(1).a $$($(2)_install_prog_exes) check-$(1) : $$($(2)_test_outs) echo; grep -h -e'Unit Tests' -e'FAILED' -e'Segementation' $$^; echo @@ -298,7 +326,7 @@ clean-$(1) : # Update running variables -libs += lib$(1).so +libs += lib$(1).a objs += $$($(2)_objs) srcs += $$(addprefix $(src_dir)/$(1)/, $$($(2)_srcs)) hdrs += $$(addprefix $(src_dir)/$(1)/, $$($(2)_hdrs)) $$($(2)_gen_hdrs) @@ -307,10 +335,11 @@ deps += $$($(2)_deps) test_outs += $$($(2)_test_outs) -install_hdrs += $$(addprefix $(src_dir)/$(1)/, $$($(2)_hdrs)) $$($(2)_gen_hdrs) -install_libs += lib$(1).so +install_hdrs += $$(addprefix $(src_dir)/$(1)/, $$($(2)_install_hdrs)) +install_libs += $$(if $$($(2)_install_lib),lib$(1).a,) +install_libs += $$(if $$($(2)_install_shared_lib),lib$(1).so,) install_exes += $$($(2)_install_prog_exes) -install_pcs += riscv-$(1).pc +install_pcs += $$(if $$($(2)_install_lib),riscv-$(1).pc,) endef @@ -353,11 +382,12 @@ check : check-cpp check-bin # Installation #------------------------------------------------------------------------- -install-hdrs : $(install_hdrs) config.h +install-hdrs : $(install_hdrs) $(MKINSTALLDIRS) $(install_hdrs_dir) - for file in $^; \ + for file in $(subst $(src_dir)/,,$^); \ do \ - $(INSTALL_HDR) $$file $(install_hdrs_dir); \ + $(MKINSTALLDIRS) $(install_hdrs_dir)/`dirname $$file`; \ + $(INSTALL_HDR) $(src_dir)/$$file $(install_hdrs_dir)/`dirname $$file`; \ done install-libs : $(install_libs) @@ -382,12 +412,6 @@ install-pc : $(install_pcs) done install : install-hdrs install-libs install-exes install-pc -ifeq ($(enable_stow),yes) - $(MKINSTALLDIRS) $(stow_pkg_dir) - cd $(stow_pkg_dir) && \ - $(STOW) --delete $(project_name)-* && \ - $(STOW) $(project_name)-$(project_ver) -endif .PHONY : install install-hdrs install-libs install-exes diff --git a/README.md b/README.md index 018c7d3ead..42f19f806c 100644 --- a/README.md +++ b/README.md @@ -5,24 +5,71 @@ About ------------- Spike, the RISC-V ISA Simulator, implements a functional model of one or more -RISC-V processors. - -Spike is named after the golden spike used to celebrate the completion of the -US transcontinental railway. +RISC-V harts. It is named after the golden spike used to celebrate the +completion of the US transcontinental railway. + +This fork extends Spike to support custom PULP instructions. +Together with the repos riscv-opcodes and riscv-tests, it forms a framework that aids in developing extensions, testing implementations and running applications. + +Spike supports the following RISC-V ISA features: + - RV32I and RV64I base ISAs, v2.1 + - Zifencei extension, v2.0 + - Zicsr extension, v2.0 + - M extension, v2.0 + - A extension, v2.1 + - F extension, v2.2 + - D extension, v2.2 + - Q extension, v2.2 + - C extension, v2.0 + - V extension, v0.9, w/ Zvlsseg/Zvamo/Zvqmac, w/o Zvediv, (_requires a 64-bit host_) + - Conformance to both RVWMO and RVTSO (Spike is sequentially consistent) + - Machine, Supervisor, and User modes, v1.11 + - Debug v0.14 + - All xpulpv3 extension subsets except xpulpelw + +Versioning and APIs +------------------- + +Projects are versioned primarily to indicate when the API has been extended or +rendered incompatible. In that spirit, Spike aims to follow the +[SemVer](https://semver.org/spec/v2.0.0.html) versioning scheme, in which +major version numbers are incremented when backwards-incompatible API changes +are made; minor version numbers are incremented when new APIs are added; and +patch version numbers are incremented when bugs are fixed in +a backwards-compatible manner. + +Spike's principal public API is the RISC-V ISA. _The C++ interface to Spike's +internals is **not** considered a public API at this time_, and +backwards-incompatible changes to this interface _will_ be made without +incrementing the major version number. Build Steps --------------- We assume that the RISCV environment variable is set to the RISC-V tools -install path, and that the riscv-fesvr package is installed there. +install path. $ apt-get install device-tree-compiler $ mkdir build $ cd build - $ ../configure --prefix=$RISCV --with-fesvr=$RISCV + $ ../configure --prefix=$RISCV $ make $ [sudo] make install +Build Steps on OpenBSD +---------------------- + +Install bash, gmake, dtc, and use clang. + + $ pkg_add bash gmake dtc + $ exec bash + $ export CC=cc; export CXX=c++ + $ mkdir build + $ cd build + $ ../configure --prefix=$RISCV + $ gmake + $ [doas] make install + Compiling and Running a Simple C Program ------------------------------------------- @@ -31,29 +78,49 @@ Install spike (see Build Steps), riscv-gnu-toolchain, and riscv-pk. Write a short C program and name it hello.c. Then, compile it into a RISC-V ELF binary named hello: - $ riscv64-unknown-elf-gcc -o hello hello.c + $ riscv32-unknown-elf-gcc -o hello hello.c Now you can simulate the program atop the proxy kernel: $ spike pk hello +Or on bare metal: + +``` +$ spike hello +``` + +[jonesinator/riscv-spike-minimal-assembly](https://github.com/jonesinator/riscv-spike-minimal-assembly) provides a well documented minimal bare metal program and also one which uses syscall to communicate with the host. + +For xpulp-specific examples take a look at the riscv-tests repo, in riscv-tests/isa it contains functional tests for all supported xpulp instructions. + Simulating a New Instruction ------------------------------------ -Adding an instruction to the simulator requires two steps: +Adding an instruction to the simulator requires these steps: + + 1. Clone riscv-opcodes, add the opcode to it and generate encoding_out.h + + 2. Create a soft-link for riscv/encoding.h to the generated encoding_out.h + + ``` + $ ln -sfr riscv-opcodes/encoding_out.h riscv-isa-sim/riscv/encoding.h + ``` + + 3. Describe the instruction's functional behavior in the file + riscv/insns/.h. Examine other instructions + in that directory as a starting point. Use macros from riscv/decode.h. - 1. Describe the instruction's functional behavior in the file - riscv/insns/.h. Examine other instructions - in that directory as a starting point. + 4. Add the mnemonic format (disassembly format) of the instruction to diasm/diasm.cc - 2. Add the opcode and opcode mask to riscv/opcodes.h. Alternatively, - add it to the riscv-opcodes package, and it will do so for you: + 5. In riscv/riscv.mk.in add the instruction to riscv_insn_list. + You can get all instructions from your current encoding.h (encoding_out.h) using: - $ cd ../riscv-opcodes - $ vi opcodes // add a line for the new instruction - $ make install + ``` + $ grep ^DECLARE_INSN encoding.h | sed 's/DECLARE_INSN(\(.*\),.*,.*)/\1/' + ``` - 3. Rebuild the simulator. + 6. Rebuild the simulator. Interactive Debug Mode --------------------------- @@ -84,7 +151,7 @@ To see the contents of memory with a virtual address (0 for core 0): : mem 0 2020 -You can advance by one instruction by pressing . You can also +You can advance by one instruction by pressing the enter key. You can also execute until a desired equality is reached: : until pc 0 2020 (stop when pc=2020) @@ -136,6 +203,7 @@ int main() i++; } +done: while (!wait) ; } @@ -184,8 +252,8 @@ riscv.cpu: target state: halted In yet another shell, start your gdb debug session: ``` tnewsome@compy-vm:~/SiFive/spike-test$ riscv64-unknown-elf-gdb rot13-64 -GNU gdb (GDB) 7.12.50.20170505-git -Copyright (C) 2016 Free Software Foundation, Inc. +GNU gdb (GDB) 8.0.50.20170724-git +Copyright (C) 2017 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. Type "show copying" @@ -201,21 +269,22 @@ Type "apropos word" to search for commands related to "word"... Reading symbols from rot13-64...done. (gdb) target remote localhost:3333 Remote debugging using localhost:3333 -0x000000001001000a in main () at rot13.c:8 -8 while (wait) +0x0000000010010004 in main () at rot13.c:8 +8 while (wait) (gdb) print wait $1 = 1 (gdb) print wait=0 $2 = 0 (gdb) print text $3 = "Vafgehpgvba frgf jnag gb or serr!" -(gdb) b 23 -Breakpoint 1 at 0x10010064: file rot13.c, line 23. +(gdb) b done +Breakpoint 1 at 0x10010064: file rot13.c, line 22. (gdb) c Continuing. +Disabling abstract command writes to CSRs. Breakpoint 1, main () at rot13.c:23 -23 while (!wait) +23 while (!wait) (gdb) print wait $4 = 0 (gdb) print text diff --git a/VERSION b/VERSION new file mode 100644 index 0000000000..3af5f50aff --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +#define SPIKE_VERSION "1.0.1-dev" diff --git a/aclocal.m4 b/aclocal.m4 index 15353f2c95..def74dbadf 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -59,49 +59,6 @@ AC_DEFUN([MCPPBS_PROG_INSTALL], # Check for install script AC_PROG_INSTALL - - # Deterimine if native build and set prefix appropriately - - AS_IF([ test ${enable_stow} = "yes" ], - [ - AC_CHECK_PROGS([stow],[stow],[no]) - AS_IF([ test ${stow} = "no" ], - [ - AC_MSG_ERROR([Cannot use --enable-stow since stow is not available]) - ]) - - # Check if native or non-native build - - AS_IF([ test "${build}" = "${host}" ], - [ - - # build == host so this is a native build. Make sure --prefix not - # set and $STOW_PREFIX is set, then set prefix=$STOW_PREFIX. - - AS_IF([ test "${prefix}" = "NONE" && test -n "${STOW_PREFIX}" ], - [ - prefix="${STOW_PREFIX}" - AC_MSG_NOTICE([Using \$STOW_PREFIX from environment]) - AC_MSG_NOTICE([prefix=${prefix}]) - ]) - - ],[ - - # build != host so this is a non-native build. Make sure --prefix - # not set and $STOW_ROOT is set, then set - # prefix=$STOW_ROOT/${host_alias}. - - AS_IF([ test "${prefix}" = "NONE" && test -n "${STOW_ROOT}" ], - [ - prefix="${STOW_ROOT}/${host_alias}" - AC_MSG_NOTICE([Using \$STOW_ROOT from environment]) - AC_MSG_NOTICE([prefix=${prefix}]) - ]) - - ]) - - ]) - ]) #------------------------------------------------------------------------- diff --git a/ax_append_flag.m4 b/ax_append_flag.m4 new file mode 100644 index 0000000000..dd6d8b6140 --- /dev/null +++ b/ax_append_flag.m4 @@ -0,0 +1,50 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_append_flag.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_APPEND_FLAG(FLAG, [FLAGS-VARIABLE]) +# +# DESCRIPTION +# +# FLAG is appended to the FLAGS-VARIABLE shell variable, with a space +# added in between. +# +# If FLAGS-VARIABLE is not specified, the current language's flags (e.g. +# CFLAGS) is used. FLAGS-VARIABLE is not changed if it already contains +# FLAG. If FLAGS-VARIABLE is unset in the shell, it is set to exactly +# FLAG. +# +# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2011 Maarten Bosmans +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 8 + +AC_DEFUN([AX_APPEND_FLAG], +[dnl +AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_SET_IF +AS_VAR_PUSHDEF([FLAGS], [m4_default($2,_AC_LANG_PREFIX[FLAGS])]) +AS_VAR_SET_IF(FLAGS,[ + AS_CASE([" AS_VAR_GET(FLAGS) "], + [*" $1 "*], [AC_RUN_LOG([: FLAGS already contains $1])], + [ + AS_VAR_APPEND(FLAGS,[" $1"]) + AC_RUN_LOG([: FLAGS="$FLAGS"]) + ]) + ], + [ + AS_VAR_SET(FLAGS,[$1]) + AC_RUN_LOG([: FLAGS="$FLAGS"]) + ]) +AS_VAR_POPDEF([FLAGS])dnl +])dnl AX_APPEND_FLAG diff --git a/ax_append_link_flags.m4 b/ax_append_link_flags.m4 new file mode 100644 index 0000000000..99b9fa5b4e --- /dev/null +++ b/ax_append_link_flags.m4 @@ -0,0 +1,44 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_append_link_flags.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_APPEND_LINK_FLAGS([FLAG1 FLAG2 ...], [FLAGS-VARIABLE], [EXTRA-FLAGS], [INPUT]) +# +# DESCRIPTION +# +# For every FLAG1, FLAG2 it is checked whether the linker works with the +# flag. If it does, the flag is added FLAGS-VARIABLE +# +# If FLAGS-VARIABLE is not specified, the linker's flags (LDFLAGS) is +# used. During the check the flag is always added to the linker's flags. +# +# If EXTRA-FLAGS is defined, it is added to the linker's default flags +# when the check is done. The check is thus made with the flags: "LDFLAGS +# EXTRA-FLAGS FLAG". This can for example be used to force the linker to +# issue an error when a bad flag is given. +# +# INPUT gives an alternative input source to AC_COMPILE_IFELSE. +# +# NOTE: This macro depends on the AX_APPEND_FLAG and AX_CHECK_LINK_FLAG. +# Please keep this macro in sync with AX_APPEND_COMPILE_FLAGS. +# +# LICENSE +# +# Copyright (c) 2011 Maarten Bosmans +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 7 + +AC_DEFUN([AX_APPEND_LINK_FLAGS], +[AX_REQUIRE_DEFINED([AX_CHECK_LINK_FLAG]) +AX_REQUIRE_DEFINED([AX_APPEND_FLAG]) +for flag in $1; do + AX_CHECK_LINK_FLAG([$flag], [AX_APPEND_FLAG([$flag], [m4_default([$2], [LDFLAGS])])], [], [$3], [$4]) +done +])dnl AX_APPEND_LINK_FLAGS diff --git a/ax_check_link_flag.m4 b/ax_check_link_flag.m4 new file mode 100644 index 0000000000..03a30ce4c7 --- /dev/null +++ b/ax_check_link_flag.m4 @@ -0,0 +1,53 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_check_link_flag.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CHECK_LINK_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) +# +# DESCRIPTION +# +# Check whether the given FLAG works with the linker or gives an error. +# (Warnings, however, are ignored) +# +# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on +# success/failure. +# +# If EXTRA-FLAGS is defined, it is added to the linker's default flags +# when the check is done. The check is thus made with the flags: "LDFLAGS +# EXTRA-FLAGS FLAG". This can for example be used to force the linker to +# issue an error when a bad flag is given. +# +# INPUT gives an alternative input source to AC_LINK_IFELSE. +# +# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this +# macro in sync with AX_CHECK_{PREPROC,COMPILE}_FLAG. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2011 Maarten Bosmans +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 6 + +AC_DEFUN([AX_CHECK_LINK_FLAG], +[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF +AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_ldflags_$4_$1])dnl +AC_CACHE_CHECK([whether the linker accepts $1], CACHEVAR, [ + ax_check_save_flags=$LDFLAGS + LDFLAGS="$LDFLAGS $4 $1" + AC_LINK_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], + [AS_VAR_SET(CACHEVAR,[yes])], + [AS_VAR_SET(CACHEVAR,[no])]) + LDFLAGS=$ax_check_save_flags]) +AS_VAR_IF(CACHEVAR,yes, + [m4_default([$2], :)], + [m4_default([$3], :)]) +AS_VAR_POPDEF([CACHEVAR])dnl +])dnl AX_CHECK_LINK_FLAGS diff --git a/ax_require_defined.m4 b/ax_require_defined.m4 new file mode 100644 index 0000000000..17c3eab7da --- /dev/null +++ b/ax_require_defined.m4 @@ -0,0 +1,37 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_require_defined.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_REQUIRE_DEFINED(MACRO) +# +# DESCRIPTION +# +# AX_REQUIRE_DEFINED is a simple helper for making sure other macros have +# been defined and thus are available for use. This avoids random issues +# where a macro isn't expanded. Instead the configure script emits a +# non-fatal: +# +# ./configure: line 1673: AX_CFLAGS_WARN_ALL: command not found +# +# It's like AC_REQUIRE except it doesn't expand the required macro. +# +# Here's an example: +# +# AX_REQUIRE_DEFINED([AX_CHECK_LINK_FLAG]) +# +# LICENSE +# +# Copyright (c) 2014 Mike Frysinger +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 2 + +AC_DEFUN([AX_REQUIRE_DEFINED], [dnl + m4_ifndef([$1], [m4_fatal([macro ]$1[ is not defined; is a m4 file missing?])]) +])dnl AX_REQUIRE_DEFINED diff --git a/ci-tests/test-spike b/ci-tests/test-spike new file mode 100755 index 0000000000..3d5ed6d79a --- /dev/null +++ b/ci-tests/test-spike @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +mkdir build +cd build +mkdir install +$DIR/../configure --prefix=`pwd`/install +make -j4 +make install diff --git a/config.h.in b/config.h.in index 137f195005..f5bbab1325 100644 --- a/config.h.in +++ b/config.h.in @@ -3,21 +3,33 @@ /* Define if building universal (internal helper macro) */ #undef AC_APPLE_UNIVERSAL_BUILD +/* Define if subproject MCPPBS_SPROJ_NORM is enabled */ +#undef CUSTOMEXT_ENABLED + /* Default value for --isa switch */ #undef DEFAULT_ISA -/* Path to the device-tree-compiler */ +/* Default value for --priv switch */ +#undef DEFAULT_PRIV + +/* Default value for --varch switch */ +#undef DEFAULT_VARCH + +/* Executable name of device-tree-compiler */ #undef DTC /* Define if subproject MCPPBS_SPROJ_NORM is enabled */ -#undef DUMMY_ROCC_ENABLED +#undef FDT_ENABLED + +/* Define if subproject MCPPBS_SPROJ_NORM is enabled */ +#undef FESVR_ENABLED + +/* Dynamic library loading is supported */ +#undef HAVE_DLOPEN /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H -/* Define to 1 if you have the `fesvr' library (-lfesvr). */ -#undef HAVE_LIBFESVR - /* Define to 1 if you have the `pthread' library (-lpthread). */ #undef HAVE_LIBPTHREAD diff --git a/configure b/configure index 015f63e965..7b9da7e86d 100755 --- a/configure +++ b/configure @@ -626,7 +626,8 @@ ac_subst_vars='LTLIBOBJS LIBOBJS subprojects_enabled subprojects -stow +HAVE_DLOPEN +HAVE_INT128 INSTALL_DATA INSTALL_SCRIPT INSTALL_PROGRAM @@ -702,7 +703,8 @@ enable_option_checking enable_stow enable_optional_subprojects with_isa -with_fesvr +with_priv +with_varch enable_commitlog enable_histogram enable_dirty @@ -1360,8 +1362,9 @@ Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-isa=RV64IMAFDC Sets the default RISC-V ISA - --with-fesvr path to your fesvr installation if not in a standard - location + --with-priv=MSU Sets the default RISC-V privilege modes supported + --with-varch=vlen:128,elen:64,slen:128 + Sets the default vector config Some influential environment variables: CC C compiler command @@ -1643,6 +1646,60 @@ $as_echo "$ac_res" >&6; } } # ac_fn_cxx_check_header_compile +# ac_fn_cxx_check_type LINENO TYPE VAR INCLUDES +# --------------------------------------------- +# Tests whether TYPE exists after having included INCLUDES, setting cache +# variable VAR accordingly. +ac_fn_cxx_check_type () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof ($2)) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof (($2))) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + +else + eval "$3=yes" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_cxx_check_type + # ac_fn_cxx_try_link LINENO # ------------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. @@ -2148,6 +2205,152 @@ case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_require_defined.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_REQUIRE_DEFINED(MACRO) +# +# DESCRIPTION +# +# AX_REQUIRE_DEFINED is a simple helper for making sure other macros have +# been defined and thus are available for use. This avoids random issues +# where a macro isn't expanded. Instead the configure script emits a +# non-fatal: +# +# ./configure: line 1673: AX_CFLAGS_WARN_ALL: command not found +# +# It's like AC_REQUIRE except it doesn't expand the required macro. +# +# Here's an example: +# +# AX_REQUIRE_DEFINED([AX_CHECK_LINK_FLAG]) +# +# LICENSE +# +# Copyright (c) 2014 Mike Frysinger +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 2 + + +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_append_flag.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_APPEND_FLAG(FLAG, [FLAGS-VARIABLE]) +# +# DESCRIPTION +# +# FLAG is appended to the FLAGS-VARIABLE shell variable, with a space +# added in between. +# +# If FLAGS-VARIABLE is not specified, the current language's flags (e.g. +# CFLAGS) is used. FLAGS-VARIABLE is not changed if it already contains +# FLAG. If FLAGS-VARIABLE is unset in the shell, it is set to exactly +# FLAG. +# +# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2011 Maarten Bosmans +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 8 + + +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_check_link_flag.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CHECK_LINK_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) +# +# DESCRIPTION +# +# Check whether the given FLAG works with the linker or gives an error. +# (Warnings, however, are ignored) +# +# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on +# success/failure. +# +# If EXTRA-FLAGS is defined, it is added to the linker's default flags +# when the check is done. The check is thus made with the flags: "LDFLAGS +# EXTRA-FLAGS FLAG". This can for example be used to force the linker to +# issue an error when a bad flag is given. +# +# INPUT gives an alternative input source to AC_LINK_IFELSE. +# +# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this +# macro in sync with AX_CHECK_{PREPROC,COMPILE}_FLAG. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2011 Maarten Bosmans +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 6 + + +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_append_link_flags.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_APPEND_LINK_FLAGS([FLAG1 FLAG2 ...], [FLAGS-VARIABLE], [EXTRA-FLAGS], [INPUT]) +# +# DESCRIPTION +# +# For every FLAG1, FLAG2 it is checked whether the linker works with the +# flag. If it does, the flag is added FLAGS-VARIABLE +# +# If FLAGS-VARIABLE is not specified, the linker's flags (LDFLAGS) is +# used. During the check the flag is always added to the linker's flags. +# +# If EXTRA-FLAGS is defined, it is added to the linker's default flags +# when the check is done. The check is thus made with the flags: "LDFLAGS +# EXTRA-FLAGS FLAG". This can for example be used to force the linker to +# issue an error when a bad flag is given. +# +# INPUT gives an alternative input source to AC_COMPILE_IFELSE. +# +# NOTE: This macro depends on the AX_APPEND_FLAG and AX_CHECK_LINK_FLAG. +# Please keep this macro in sync with AX_APPEND_COMPILE_FLAGS. +# +# LICENSE +# +# Copyright (c) 2011 Maarten Bosmans +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 7 + + + #------------------------------------------------------------------------- # Checks for programs #------------------------------------------------------------------------- @@ -3428,7 +3631,7 @@ if test x"$DTC" == xno; then : fi cat >>confdefs.h <<_ACEOF -#define DTC "$DTC" +#define DTC "dtc" _ACEOF @@ -4036,7 +4239,8 @@ fi $as_echo "$ac_cv_c_bigendian" >&6; } case $ac_cv_c_bigendian in #( yes) - as_fn_error $? "Spike requires a little-endian host" "$LINENO" 5;; #( + $as_echo "#define WORDS_BIGENDIAN 1" >>confdefs.h +;; #( no) ;; #( universal) @@ -4173,102 +4377,6 @@ fi - # Deterimine if native build and set prefix appropriately - - if test ${enable_stow} = "yes" ; then : - - for ac_prog in stow -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_stow+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$stow"; then - ac_cv_prog_stow="$stow" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_stow="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -stow=$ac_cv_prog_stow -if test -n "$stow"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $stow" >&5 -$as_echo "$stow" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$stow" && break -done -test -n "$stow" || stow="no" - - if test ${stow} = "no" ; then : - - as_fn_error $? "Cannot use --enable-stow since stow is not available" "$LINENO" 5 - -fi - - # Check if native or non-native build - - if test "${build}" = "${host}" ; then : - - - # build == host so this is a native build. Make sure --prefix not - # set and $STOW_PREFIX is set, then set prefix=$STOW_PREFIX. - - if test "${prefix}" = "NONE" && test -n "${STOW_PREFIX}" ; then : - - prefix="${STOW_PREFIX}" - { $as_echo "$as_me:${as_lineno-$LINENO}: Using \$STOW_PREFIX from environment" >&5 -$as_echo "$as_me: Using \$STOW_PREFIX from environment" >&6;} - { $as_echo "$as_me:${as_lineno-$LINENO}: prefix=${prefix}" >&5 -$as_echo "$as_me: prefix=${prefix}" >&6;} - -fi - - -else - - - # build != host so this is a non-native build. Make sure --prefix - # not set and $STOW_ROOT is set, then set - # prefix=$STOW_ROOT/${host_alias}. - - if test "${prefix}" = "NONE" && test -n "${STOW_ROOT}" ; then : - - prefix="${STOW_ROOT}/${host_alias}" - { $as_echo "$as_me:${as_lineno-$LINENO}: Using \$STOW_ROOT from environment" >&5 -$as_echo "$as_me: Using \$STOW_ROOT from environment" >&6;} - { $as_echo "$as_me:${as_lineno-$LINENO}: prefix=${prefix}" >&5 -$as_echo "$as_me: prefix=${prefix}" >&6;} - -fi - - -fi - - -fi - - #------------------------------------------------------------------------- # Checks for header files @@ -4387,13 +4495,96 @@ $as_echo "#define STDC_HEADERS 1" >>confdefs.h fi +#------------------------------------------------------------------------- +# Checks for type +#------------------------------------------------------------------------- + +ac_fn_cxx_check_type "$LINENO" "__int128_t" "ac_cv_type___int128_t" "$ac_includes_default" +if test "x$ac_cv_type___int128_t" = xyes; then : + HAVE_INT128=yes + +fi + + #------------------------------------------------------------------------- # Default compiler flags #------------------------------------------------------------------------- -CFLAGS="-Wall -Wno-unused -g -O2" -CXXFLAGS="-Wall -Wno-unused -g -O2 -std=c++11" + + + +for flag in -Wl,--export-dynamic; do + as_CACHEVAR=`$as_echo "ax_cv_check_ldflags__$flag" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the linker accepts $flag" >&5 +$as_echo_n "checking whether the linker accepts $flag... " >&6; } +if eval \${$as_CACHEVAR+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$LDFLAGS + LDFLAGS="$LDFLAGS $flag" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + eval "$as_CACHEVAR=yes" +else + eval "$as_CACHEVAR=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$ax_check_save_flags +fi +eval ac_res=\$$as_CACHEVAR + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +if eval test \"x\$"$as_CACHEVAR"\" = x"yes"; then : + +if ${LDFLAGS+:} false; then : + + case " $LDFLAGS " in #( + *" $flag "*) : + { { $as_echo "$as_me:${as_lineno-$LINENO}: : LDFLAGS already contains \$flag"; } >&5 + (: LDFLAGS already contains $flag) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } ;; #( + *) : + + as_fn_append LDFLAGS " $flag" + { { $as_echo "$as_me:${as_lineno-$LINENO}: : LDFLAGS=\"\$LDFLAGS\""; } >&5 + (: LDFLAGS="$LDFLAGS") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + ;; +esac + +else + + LDFLAGS=$flag + { { $as_echo "$as_me:${as_lineno-$LINENO}: : LDFLAGS=\"\$LDFLAGS\""; } >&5 + (: LDFLAGS="$LDFLAGS") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + +fi + +else + : +fi + +done #------------------------------------------------------------------------- @@ -4438,6 +4629,98 @@ fi + # Add subproject to our running list + + subprojects="$subprojects fesvr" + + # Process the subproject appropriately. If enabled add it to the + # $enabled_subprojects running shell variable, set a + # SUBPROJECT_ENABLED C define, and include the appropriate + # 'subproject.ac'. + + + { $as_echo "$as_me:${as_lineno-$LINENO}: configuring default subproject : fesvr" >&5 +$as_echo "$as_me: configuring default subproject : fesvr" >&6;} + ac_config_files="$ac_config_files fesvr.mk:fesvr/fesvr.mk.in" + + enable_fesvr_sproj="yes" + subprojects_enabled="$subprojects_enabled fesvr" + +$as_echo "#define FESVR_ENABLED /**/" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 +$as_echo_n "checking for pthread_create in -lpthread... " >&6; } +if ${ac_cv_lib_pthread_pthread_create+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpthread $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_create (); +int +main () +{ +return pthread_create (); + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + ac_cv_lib_pthread_pthread_create=yes +else + ac_cv_lib_pthread_pthread_create=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 +$as_echo "$ac_cv_lib_pthread_pthread_create" >&6; } +if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBPTHREAD 1 +_ACEOF + + LIBS="-lpthread $LIBS" + +else + as_fn_error $? "libpthread is required" "$LINENO" 5 +fi + + + + + + + # Determine if this is a required or an optional subproject + + + + # Determine if there is a group with the same name + + + + # Create variations of the subproject name suitable for use as a CPP + # enabled define, a shell enabled variable, and a shell function + + + + + + + + + + + # Add subproject to our running list subprojects="$subprojects riscv" @@ -4481,6 +4764,41 @@ _ACEOF fi + +# Check whether --with-priv was given. +if test "${with_priv+set}" = set; then : + withval=$with_priv; +cat >>confdefs.h <<_ACEOF +#define DEFAULT_PRIV "$withval" +_ACEOF + +else + +cat >>confdefs.h <<_ACEOF +#define DEFAULT_PRIV "MSU" +_ACEOF + +fi + + + +# Check whether --with-varch was given. +if test "${with_varch+set}" = set; then : + withval=$with_varch; +cat >>confdefs.h <<_ACEOF +#define DEFAULT_VARCH "$withval" +_ACEOF + +else + +cat >>confdefs.h <<_ACEOF +#define DEFAULT_VARCH "vlen:128,elen:64,slen:128" +_ACEOF + +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing dlopen" >&5 $as_echo_n "checking for library containing dlopen... " >&6; } if ${ac_cv_search_dlopen+:} false; then : @@ -4535,69 +4853,12 @@ ac_res=$ac_cv_search_dlopen if test "$ac_res" != no; then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" -else - - as_fn_error $? "unable to find the dlopen() function" "$LINENO" 5 - -fi - - -# Check whether --with-fesvr was given. -if test "${with_fesvr+set}" = set; then : - withval=$with_fesvr; - LDFLAGS="-L$withval/lib $LDFLAGS" - CPPFLAGS="-I$withval/include $CPPFLAGS" - - -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for libfesvr_is_present in -lfesvr" >&5 -$as_echo_n "checking for libfesvr_is_present in -lfesvr... " >&6; } -if ${ac_cv_lib_fesvr_libfesvr_is_present+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-lfesvr -pthread $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char libfesvr_is_present (); -int -main () -{ -return libfesvr_is_present (); - ; - return 0; -} -_ACEOF -if ac_fn_cxx_try_link "$LINENO"; then : - ac_cv_lib_fesvr_libfesvr_is_present=yes -else - ac_cv_lib_fesvr_libfesvr_is_present=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_fesvr_libfesvr_is_present" >&5 -$as_echo "$ac_cv_lib_fesvr_libfesvr_is_present" >&6; } -if test "x$ac_cv_lib_fesvr_libfesvr_is_present" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_LIBFESVR 1 -_ACEOF +$as_echo "#define HAVE_DLOPEN /**/" >>confdefs.h +, + HAVE_DLOPEN=yes - LIBS="-lfesvr $LIBS" -else - as_fn_error $? "libfesvr is required" "$LINENO" 5 fi @@ -4728,7 +4989,7 @@ fi # Add subproject to our running list - subprojects="$subprojects dummy_rocc" + subprojects="$subprojects disasm" # Process the subproject appropriately. If enabled add it to the # $enabled_subprojects running shell variable, set a @@ -4736,14 +4997,104 @@ fi # 'subproject.ac'. - { $as_echo "$as_me:${as_lineno-$LINENO}: configuring default subproject : dummy_rocc" >&5 -$as_echo "$as_me: configuring default subproject : dummy_rocc" >&6;} - ac_config_files="$ac_config_files dummy_rocc.mk:dummy_rocc/dummy_rocc.mk.in" + { $as_echo "$as_me:${as_lineno-$LINENO}: configuring default subproject : disasm" >&5 +$as_echo "$as_me: configuring default subproject : disasm" >&6;} + ac_config_files="$ac_config_files disasm.mk:disasm/disasm.mk.in" - enable_dummy_rocc_sproj="yes" - subprojects_enabled="$subprojects_enabled dummy_rocc" + enable_disasm_sproj="yes" + subprojects_enabled="$subprojects_enabled disasm" -$as_echo "#define DUMMY_ROCC_ENABLED /**/" >>confdefs.h +$as_echo "#define DISASM_ENABLED /**/" >>confdefs.h + + + + + + + # Determine if this is a required or an optional subproject + + + + # Determine if there is a group with the same name + + + + # Create variations of the subproject name suitable for use as a CPP + # enabled define, a shell enabled variable, and a shell function + + + + + + + + + + + + # Add subproject to our running list + + subprojects="$subprojects customext" + + # Process the subproject appropriately. If enabled add it to the + # $enabled_subprojects running shell variable, set a + # SUBPROJECT_ENABLED C define, and include the appropriate + # 'subproject.ac'. + + + { $as_echo "$as_me:${as_lineno-$LINENO}: configuring default subproject : customext" >&5 +$as_echo "$as_me: configuring default subproject : customext" >&6;} + ac_config_files="$ac_config_files customext.mk:customext/customext.mk.in" + + enable_customext_sproj="yes" + subprojects_enabled="$subprojects_enabled customext" + +$as_echo "#define CUSTOMEXT_ENABLED /**/" >>confdefs.h + + + + + + + # Determine if this is a required or an optional subproject + + + + # Determine if there is a group with the same name + + + + # Create variations of the subproject name suitable for use as a CPP + # enabled define, a shell enabled variable, and a shell function + + + + + + + + + + + + # Add subproject to our running list + + subprojects="$subprojects fdt" + + # Process the subproject appropriately. If enabled add it to the + # $enabled_subprojects running shell variable, set a + # SUBPROJECT_ENABLED C define, and include the appropriate + # 'subproject.ac'. + + + { $as_echo "$as_me:${as_lineno-$LINENO}: configuring default subproject : fdt" >&5 +$as_echo "$as_me: configuring default subproject : fdt" >&6;} + ac_config_files="$ac_config_files fdt.mk:fdt/fdt.mk.in" + + enable_fdt_sproj="yes" + subprojects_enabled="$subprojects_enabled fdt" + +$as_echo "#define FDT_ENABLED /**/" >>confdefs.h @@ -4840,6 +5191,51 @@ $as_echo "#define SPIKE_MAIN_ENABLED /**/" >>confdefs.h + # Determine if this is a required or an optional subproject + + + + # Determine if there is a group with the same name + + + + # Create variations of the subproject name suitable for use as a CPP + # enabled define, a shell enabled variable, and a shell function + + + + + + + + + + + + # Add subproject to our running list + + subprojects="$subprojects spike_dasm" + + # Process the subproject appropriately. If enabled add it to the + # $enabled_subprojects running shell variable, set a + # SUBPROJECT_ENABLED C define, and include the appropriate + # 'subproject.ac'. + + + { $as_echo "$as_me:${as_lineno-$LINENO}: configuring default subproject : spike_dasm" >&5 +$as_echo "$as_me: configuring default subproject : spike_dasm" >&6;} + ac_config_files="$ac_config_files spike_dasm.mk:spike_dasm/spike_dasm.mk.in" + + enable_spike_dasm_sproj="yes" + subprojects_enabled="$subprojects_enabled spike_dasm" + +$as_echo "#define SPIKE_DASM_ENABLED /**/" >>confdefs.h + + + + + + # Output make variables @@ -4866,15 +5262,9 @@ ac_config_headers="$ac_config_headers config.h" ac_config_files="$ac_config_files Makefile" -ac_config_files="$ac_config_files riscv-spike.pc" - -ac_config_files="$ac_config_files riscv-riscv.pc" - -ac_config_files="$ac_config_files riscv-softfloat.pc" - -ac_config_files="$ac_config_files riscv-dummy_rocc.pc" +ac_config_files="$ac_config_files riscv-fesvr.pc" -ac_config_files="$ac_config_files riscv-spike_main.pc" +ac_config_files="$ac_config_files riscv-disasm.pc" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure @@ -5567,17 +5957,18 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 for ac_config_target in $ac_config_targets do case $ac_config_target in + "fesvr.mk") CONFIG_FILES="$CONFIG_FILES fesvr.mk:fesvr/fesvr.mk.in" ;; "riscv.mk") CONFIG_FILES="$CONFIG_FILES riscv.mk:riscv/riscv.mk.in" ;; - "dummy_rocc.mk") CONFIG_FILES="$CONFIG_FILES dummy_rocc.mk:dummy_rocc/dummy_rocc.mk.in" ;; + "disasm.mk") CONFIG_FILES="$CONFIG_FILES disasm.mk:disasm/disasm.mk.in" ;; + "customext.mk") CONFIG_FILES="$CONFIG_FILES customext.mk:customext/customext.mk.in" ;; + "fdt.mk") CONFIG_FILES="$CONFIG_FILES fdt.mk:fdt/fdt.mk.in" ;; "softfloat.mk") CONFIG_FILES="$CONFIG_FILES softfloat.mk:softfloat/softfloat.mk.in" ;; "spike_main.mk") CONFIG_FILES="$CONFIG_FILES spike_main.mk:spike_main/spike_main.mk.in" ;; + "spike_dasm.mk") CONFIG_FILES="$CONFIG_FILES spike_dasm.mk:spike_dasm/spike_dasm.mk.in" ;; "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; - "riscv-spike.pc") CONFIG_FILES="$CONFIG_FILES riscv-spike.pc" ;; - "riscv-riscv.pc") CONFIG_FILES="$CONFIG_FILES riscv-riscv.pc" ;; - "riscv-softfloat.pc") CONFIG_FILES="$CONFIG_FILES riscv-softfloat.pc" ;; - "riscv-dummy_rocc.pc") CONFIG_FILES="$CONFIG_FILES riscv-dummy_rocc.pc" ;; - "riscv-spike_main.pc") CONFIG_FILES="$CONFIG_FILES riscv-spike_main.pc" ;; + "riscv-fesvr.pc") CONFIG_FILES="$CONFIG_FILES riscv-fesvr.pc" ;; + "riscv-disasm.pc") CONFIG_FILES="$CONFIG_FILES riscv-disasm.pc" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac diff --git a/configure.ac b/configure.ac index e361877da2..b7788b4ea9 100644 --- a/configure.ac +++ b/configure.ac @@ -43,6 +43,11 @@ AC_CONFIG_AUX_DIR([scripts]) AC_CANONICAL_BUILD AC_CANONICAL_HOST +m4_include(ax_require_defined.m4) +m4_include(ax_append_flag.m4) +m4_include(ax_check_link_flag.m4) +m4_include(ax_append_link_flags.m4) + #------------------------------------------------------------------------- # Checks for programs #------------------------------------------------------------------------- @@ -53,9 +58,9 @@ AC_CHECK_TOOL([AR],[ar]) AC_CHECK_TOOL([RANLIB],[ranlib]) AC_PATH_PROG([DTC],[dtc],[no]) AS_IF([test x"$DTC" == xno],AC_MSG_ERROR([device-tree-compiler not found])) -AC_DEFINE_UNQUOTED(DTC, ["$DTC"], [Path to the device-tree-compiler]) +AC_DEFINE_UNQUOTED(DTC, ["dtc"], [Executable name of device-tree-compiler]) -AC_C_BIGENDIAN(AC_MSG_ERROR([Spike requires a little-endian host])) +AC_C_BIGENDIAN #------------------------------------------------------------------------- # MCPPBS specific program checks @@ -72,12 +77,17 @@ MCPPBS_PROG_INSTALL AC_HEADER_STDC +#------------------------------------------------------------------------- +# Checks for type +#------------------------------------------------------------------------- + +AC_CHECK_TYPE([__int128_t], AC_SUBST([HAVE_INT128],[yes])) + #------------------------------------------------------------------------- # Default compiler flags #------------------------------------------------------------------------- -AC_SUBST([CFLAGS], ["-Wall -Wno-unused -g -O2"]) -AC_SUBST([CXXFLAGS],["-Wall -Wno-unused -g -O2 -std=c++11"]) +AX_APPEND_LINK_FLAGS([-Wl,--export-dynamic]) #------------------------------------------------------------------------- # MCPPBS subproject list @@ -86,7 +96,7 @@ AC_SUBST([CXXFLAGS],["-Wall -Wno-unused -g -O2 -std=c++11"]) # The '*' suffix indicates an optional subproject. The '**' suffix # indicates an optional subproject which is also the name of a group. -MCPPBS_SUBPROJECTS([ riscv, dummy_rocc, softfloat, spike_main ]) +MCPPBS_SUBPROJECTS([ fesvr, riscv, disasm, customext, fdt, softfloat, spike_main, spike_dasm ]) #------------------------------------------------------------------------- # MCPPBS subproject groups @@ -105,9 +115,6 @@ MCPPBS_SUBPROJECTS([ riscv, dummy_rocc, softfloat, spike_main ]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_FILES([Makefile]) -AC_CONFIG_FILES([riscv-spike.pc]) -AC_CONFIG_FILES([riscv-riscv.pc]) -AC_CONFIG_FILES([riscv-softfloat.pc]) -AC_CONFIG_FILES([riscv-dummy_rocc.pc]) -AC_CONFIG_FILES([riscv-spike_main.pc]) +AC_CONFIG_FILES([riscv-fesvr.pc]) +AC_CONFIG_FILES([riscv-disasm.pc]) AC_OUTPUT diff --git a/customext/cflush.cc b/customext/cflush.cc new file mode 100644 index 0000000000..dedcc03763 --- /dev/null +++ b/customext/cflush.cc @@ -0,0 +1,41 @@ +#include "extension.h" +#include + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rs1()]; + } +} xrs1; + +static reg_t custom_cflush(processor_t* p, insn_t insn, reg_t pc) +{ + require_privilege(PRV_M); + + return pc + 4; \ +} + +class cflush_t : public extension_t +{ + public: + const char* name() { return "cflush"; } + + cflush_t() {} + + std::vector get_instructions() { + std::vector insns; + insns.push_back((insn_desc_t){0xFC000073, 0xFFF07FFF, custom_cflush, custom_cflush}); + insns.push_back((insn_desc_t){0xFC200073, 0xFFF07FFF, custom_cflush, custom_cflush}); + insns.push_back((insn_desc_t){0xFC100073, 0xFFF07FFF, custom_cflush, custom_cflush}); + return insns; + } + + std::vector get_disasms() { + std::vector insns; + insns.push_back(new disasm_insn_t("cflush.d.l1", 0xFC000073, 0xFFF07FFF, {&xrs1})); + insns.push_back(new disasm_insn_t("cdiscard.d.l1", 0xFC200073, 0xFFF07FFF, {&xrs1})); + insns.push_back(new disasm_insn_t("cflush.i.l1", 0xFC100073, 0xFFF07FFF, {&xrs1})); + return insns; + } +}; + +REGISTER_EXTENSION(cflush, []() { return new cflush_t; }) diff --git a/dummy_rocc/dummy_rocc.ac b/customext/customext.ac similarity index 100% rename from dummy_rocc/dummy_rocc.ac rename to customext/customext.ac diff --git a/customext/customext.mk.in b/customext/customext.mk.in new file mode 100644 index 0000000000..0dd725ef2c --- /dev/null +++ b/customext/customext.mk.in @@ -0,0 +1,12 @@ +customext_subproject_deps = \ + spike_main \ + riscv \ + softfloat \ + +customext_srcs = \ + dummy_rocc.cc \ + cflush.cc \ + +customext_CFLAGS = -fPIC + +customext_install_shared_lib = yes diff --git a/dummy_rocc/dummy_rocc.cc b/customext/dummy_rocc.cc similarity index 100% rename from dummy_rocc/dummy_rocc.cc rename to customext/dummy_rocc.cc diff --git a/dummy_rocc/dummy_rocc_test.c b/customext/dummy_rocc_test.c similarity index 100% rename from dummy_rocc/dummy_rocc_test.c rename to customext/dummy_rocc_test.c diff --git a/debug_rom/debug_rom.S b/debug_rom/debug_rom.S index 28c7076fda..8d8e4cd037 100755 --- a/debug_rom/debug_rom.S +++ b/debug_rom/debug_rom.S @@ -14,6 +14,7 @@ entry: jal zero, _entry resume: + // Not used. jal zero, _resume exception: jal zero, _exception @@ -37,16 +38,22 @@ entry_loop: csrr s0, CSR_MHARTID lbu s0, DEBUG_ROM_FLAGS(s0) // multiple harts can resume here andi s0, s0, (1 << DEBUG_ROM_FLAG_RESUME) - bnez s0, resume + bnez s0, _resume + wfi jal zero, entry_loop _exception: + // Restore S0, which we always save to dscratch. + // We need this in case the user tried an abstract write to a + // non-existent CSR. + csrr s0, CSR_DSCRATCH sw zero, DEBUG_ROM_EXCEPTION(zero) // Let debug module know you got an exception. ebreak going: + csrr s0, CSR_MHARTID + sw s0, DEBUG_ROM_GOING(zero) // When debug module sees this write, the GO flag is reset. csrr s0, CSR_DSCRATCH // Restore s0 here - sw zero, DEBUG_ROM_GOING(zero) // When debug module sees this write, the GO flag is reset. fence fence.i jalr zero, zero, %lo(whereto) // Debug module will put different instructions and data in the RAM, diff --git a/debug_rom/debug_rom.h b/debug_rom/debug_rom.h index d21e1669c7..7edd5f68f9 100644 --- a/debug_rom/debug_rom.h +++ b/debug_rom/debug_rom.h @@ -1,12 +1,13 @@ static const unsigned char debug_rom_raw[] = { - 0x6f, 0x00, 0xc0, 0x00, 0x6f, 0x00, 0x40, 0x05, 0x6f, 0x00, 0x40, 0x03, + 0x6f, 0x00, 0xc0, 0x00, 0x6f, 0x00, 0x00, 0x06, 0x6f, 0x00, 0x80, 0x03, 0x0f, 0x00, 0xf0, 0x0f, 0x73, 0x10, 0x24, 0x7b, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x20, 0x80, 0x10, 0x03, 0x44, 0x04, 0x40, 0x13, 0x74, 0x14, 0x00, - 0x63, 0x10, 0x04, 0x02, 0x73, 0x24, 0x40, 0xf1, 0x03, 0x44, 0x04, 0x40, - 0x13, 0x74, 0x24, 0x00, 0xe3, 0x18, 0x04, 0xfc, 0x6f, 0xf0, 0xdf, 0xfd, - 0x23, 0x26, 0x00, 0x10, 0x73, 0x00, 0x10, 0x00, 0x73, 0x24, 0x20, 0x7b, - 0x23, 0x22, 0x00, 0x10, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x10, 0x00, 0x00, + 0x63, 0x14, 0x04, 0x02, 0x73, 0x24, 0x40, 0xf1, 0x03, 0x44, 0x04, 0x40, + 0x13, 0x74, 0x24, 0x00, 0x63, 0x18, 0x04, 0x02, 0x73, 0x00, 0x50, 0x10, + 0x6f, 0xf0, 0x9f, 0xfd, 0x73, 0x24, 0x20, 0x7b, 0x23, 0x26, 0x00, 0x10, + 0x73, 0x00, 0x10, 0x00, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x22, 0x80, 0x10, + 0x73, 0x24, 0x20, 0x7b, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x10, 0x00, 0x00, 0x67, 0x00, 0x00, 0x30, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x24, 0x80, 0x10, 0x73, 0x24, 0x20, 0x7b, 0x73, 0x00, 0x20, 0x7b }; -static const unsigned int debug_rom_raw_len = 104; +static const unsigned int debug_rom_raw_len = 116; diff --git a/disasm/disasm.ac b/disasm/disasm.ac new file mode 100644 index 0000000000..e69de29bb2 diff --git a/disasm/disasm.cc b/disasm/disasm.cc new file mode 100644 index 0000000000..0b95893299 --- /dev/null +++ b/disasm/disasm.cc @@ -0,0 +1,1744 @@ +// See LICENSE for license details. + +#include "disasm.h" +#include +#include +#include +#include +#include +#include + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.i_imm()) + '(' + xpr_name[insn.rs1()] + ')'; + } +} load_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.s_imm()) + '(' + xpr_name[insn.rs1()] + ')'; + } +} store_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string("(") + xpr_name[insn.rs1()] + ')'; + } +} amo_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rd()]; + } +} xrd; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rs1()]; + } +} xrs1; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rs2()]; + } +} xrs2; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return fpr_name[insn.rd()]; + } +} frd; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return fpr_name[insn.rs1()]; + } +} frs1; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return fpr_name[insn.rs2()]; + } +} frs2; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return fpr_name[insn.rs3()]; + } +} frs3; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + switch (insn.csr()) + { + #define DECLARE_CSR(name, num) case num: return #name; + #include "encoding.h" + #undef DECLARE_CSR + default: + { + char buf[16]; + snprintf(buf, sizeof buf, "unknown_%03" PRIx64, insn.csr()); + return std::string(buf); + } + } + } +} csr; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.i_imm()); + } +} imm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.shamt()); + } +} shamt; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + s << std::hex << "0x" << ((uint32_t)insn.u_imm() >> 12); + return s.str(); + } +} bigimm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string(insn.rs1()); + } +} zimm5; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + int32_t target = insn.sb_imm(); + char sign = target >= 0 ? '+' : '-'; + s << "pc " << sign << ' ' << abs(target); + return s.str(); + } +} branch_target; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + int32_t target = insn.uj_imm(); + char sign = target >= 0 ? '+' : '-'; + s << "pc " << sign << std::hex << " 0x" << abs(target); + return s.str(); + } +} jump_target; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rvc_rs1()]; + } +} rvc_rs1; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rvc_rs2()]; + } +} rvc_rs2; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return fpr_name[insn.rvc_rs2()]; + } +} rvc_fp_rs2; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rvc_rs1s()]; + } +} rvc_rs1s; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[insn.rvc_rs2s()]; + } +} rvc_rs2s; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return fpr_name[insn.rvc_rs2s()]; + } +} rvc_fp_rs2s; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return xpr_name[X_SP]; + } +} rvc_sp; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_imm()); + } +} rvc_imm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_addi4spn_imm()); + } +} rvc_addi4spn_imm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_addi16sp_imm()); + } +} rvc_addi16sp_imm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_lwsp_imm()); + } +} rvc_lwsp_imm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)(insn.rvc_imm() & 0x3f)); + } +} rvc_shamt; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + s << std::hex << "0x" << ((uint32_t)insn.rvc_imm() << 12 >> 12); + return s.str(); + } +} rvc_uimm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_lwsp_imm()) + '(' + xpr_name[X_SP] + ')'; + } +} rvc_lwsp_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_ldsp_imm()) + '(' + xpr_name[X_SP] + ')'; + } +} rvc_ldsp_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_swsp_imm()) + '(' + xpr_name[X_SP] + ')'; + } +} rvc_swsp_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_sdsp_imm()) + '(' + xpr_name[X_SP] + ')'; + } +} rvc_sdsp_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_lw_imm()) + '(' + xpr_name[insn.rvc_rs1s()] + ')'; + } +} rvc_lw_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.rvc_ld_imm()) + '(' + xpr_name[insn.rvc_rs1s()] + ')'; + } +} rvc_ld_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + int32_t target = insn.rvc_b_imm(); + char sign = target >= 0 ? '+' : '-'; + s << "pc " << sign << ' ' << abs(target); + return s.str(); + } +} rvc_branch_target; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + int32_t target = insn.rvc_j_imm(); + char sign = target >= 0 ? '+' : '-'; + s << "pc " << sign << ' ' << abs(target); + return s.str(); + } +} rvc_jump_target; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string("(") + xpr_name[insn.rs1()] + ')'; + } +} v_address; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return vr_name[insn.rd()]; + } +} vd; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return vr_name[insn.rs1()]; + } +} vs1; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return vr_name[insn.rs2()]; + } +} vs2; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return vr_name[insn.rd()]; + } +} vs3; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return insn.v_vm() ? "" : "v0.t"; + } +} vm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return "v0"; + } +} v0; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.v_simm5()); + } +} v_simm5; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + std::stringstream s; + int sew = insn.v_sew(); + int lmul = insn.v_lmul(); + auto vta = insn.v_vta() == 1 ? "ta" : "tu"; + auto vma = insn.v_vma() == 1 ? "ma" : "mu"; + s << "e" << sew; + if(insn.v_frac_lmul()) { + std::string lmul_str = ""; + switch(lmul){ + case 3: + lmul_str = "f2"; + break; + case 2: + lmul_str = "f4"; + break; + case 1: + lmul_str = "f8"; + break; + default: + assert(true && "unsupport fractional LMUL"); + } + s << ", m" << lmul_str; + } else { + s << ", m" << (1 << lmul); + } + s << ", " << vta << ", " << vma; + return s.str(); + } +} v_vtype; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return "x0"; + } +} x0; + +// Xpulpimg + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_uimmL()); + } +} p_uimmL; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_uimmS()); + } +} p_uimmS; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_loop()); + } +} p_loop; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_Luimm5()); + } +} p_Luimm5; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_zimm5()); + } +} p_zimm5; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.p_simm5()); + } +} p_simm5; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((uint32_t)insn.p_zimm6()); + } +} p_zimm6; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.p_simm6()); + } +} p_simm6; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.i_imm()) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} load_address_irpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.rs2()]) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} load_address_rrpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.rs2()]) + '(' + xpr_name[insn.rs1()] + ')'; + } +} load_address_rr; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::to_string((int)insn.s_imm()) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} store_address_irpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.p_rs3()]) + '(' + xpr_name[insn.rs1()] + "!)"; + } +} store_address_rrpost; + +struct : public arg_t { + std::string to_string(insn_t insn) const { + return std::string(xpr_name[insn.p_rs3()]) + '(' + xpr_name[insn.rs1()] + ')'; + } +} store_address_rr; + + +typedef struct { + reg_t match; + reg_t mask; + const char *fmt; + std::vector& arg; +} custom_fmt_t; + +std::string disassembler_t::disassemble(insn_t insn) const +{ + const disasm_insn_t* disasm_insn = lookup(insn); + return disasm_insn ? disasm_insn->to_string(insn) : "unknown"; +} + +disassembler_t::disassembler_t(int xlen) +{ + const uint32_t mask_rd = 0x1fUL << 7; + const uint32_t match_rd_ra = 1UL << 7; + const uint32_t mask_rs1 = 0x1fUL << 15; + const uint32_t match_rs1_ra = 1UL << 15; + const uint32_t mask_rs2 = 0x1fUL << 20; + const uint32_t mask_imm = 0xfffUL << 20; + const uint32_t match_imm_1 = 1UL << 20; + const uint32_t mask_rvc_rs2 = 0x1fUL << 2; + const uint32_t mask_rvc_imm = mask_rvc_rs2 | 0x1000UL; + const uint32_t mask_nf = 0x7Ul << 29; + const uint32_t mask_wd = 0x1Ul << 26; + const uint32_t mask_vm = 0x1Ul << 25; + const uint32_t mask_vldst = 0x7Ul << 12 | 0x1UL << 28; + const uint32_t mask_amoop = 0x1fUl << 27; + const uint32_t mask_width = 0x7Ul << 12; + + #define DECLARE_INSN(code, match, mask) \ + const uint32_t match_##code = match; \ + const uint32_t mask_##code = mask; + #include "encoding.h" + #undef DECLARE_INSN + + // explicit per-instruction disassembly + #define DISASM_INSN(name, code, extra, ...) \ + add_insn(new disasm_insn_t(name, match_##code, mask_##code | (extra), __VA_ARGS__)); + #define DEFINE_NOARG(code) \ + add_insn(new disasm_insn_t(#code, match_##code, mask_##code, {})); + #define DEFINE_RTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &xrs2}) + #define DEFINE_R1TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1}) + #define DEFINE_ITYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &imm}) + #define DEFINE_ITYPE_SHIFT(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &shamt}) + #define DEFINE_I0TYPE(name, code) DISASM_INSN(name, code, mask_rs1, {&xrd, &imm}) + #define DEFINE_I1TYPE(name, code) DISASM_INSN(name, code, mask_imm, {&xrd, &xrs1}) + #define DEFINE_I2TYPE(name, code) DISASM_INSN(name, code, mask_rd | mask_imm, {&xrs1}) + #define DEFINE_LTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &bigimm}) + #define DEFINE_BTYPE(code) DISASM_INSN(#code, code, 0, {&xrs1, &xrs2, &branch_target}) + #define DEFINE_B0TYPE(name, code) DISASM_INSN(name, code, mask_rs1 | mask_rs2, {&branch_target}) + #define DEFINE_B1TYPE(name, code) DISASM_INSN(name, code, mask_rs2, {&xrs1, &branch_target}) + #define DEFINE_XLOAD(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address}) + #define DEFINE_XSTORE(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address}) + #define DEFINE_XAMO(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs2, &amo_address}) + #define DEFINE_XAMO_LR(code) DISASM_INSN(#code, code, 0, {&xrd, &amo_address}) + #define DEFINE_FLOAD(code) DISASM_INSN(#code, code, 0, {&frd, &load_address}) + #define DEFINE_FSTORE(code) DISASM_INSN(#code, code, 0, {&frs2, &store_address}) + #define DEFINE_FRTYPE(code) DISASM_INSN(#code, code, 0, {&frd, &frs1, &frs2}) + #define DEFINE_FR1TYPE(code) DISASM_INSN(#code, code, 0, {&frd, &frs1}) + #define DEFINE_FR3TYPE(code) DISASM_INSN(#code, code, 0, {&frd, &frs1, &frs2, &frs3}) + #define DEFINE_FXTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &frs1}) + #define DEFINE_FX2TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &frs1, &frs2}) + #define DEFINE_XFTYPE(code) DISASM_INSN(#code, code, 0, {&frd, &xrs1}) + #define DEFINE_SFENCE_TYPE(code) DISASM_INSN(#code, code, 0, {&xrs1, &xrs2}) + // Xpulpimg + #define DEFINE_PLOAD_IRPOST(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address_irpost}) + #define DEFINE_PLOAD_RRPOST(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address_rrpost}) + #define DEFINE_PLOAD_RR(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address_rr}) + #define DEFINE_PSTORE_IRPOST(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_irpost}) + #define DEFINE_PSTORE_RRPOST(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_rrpost}) + #define DEFINE_PSTORE_RR(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address_rr}) + #define DEFINE_PI0TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm5}) + #define DEFINE_PI1ZTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_zimm6}) + #define DEFINE_PI1STYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm6}) + #define DEFINE_PLUI2TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_Luimm5, &p_zimm5}) + #define DEFINE_PBTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &p_simm5, &branch_target}) + #define DEFINE_PR2LUITYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &xrs2, &p_Luimm5}) + + DEFINE_XLOAD(lb) + DEFINE_XLOAD(lbu) + DEFINE_XLOAD(lh) + DEFINE_XLOAD(lhu) + DEFINE_XLOAD(lw) + DEFINE_XLOAD(lwu) + DEFINE_XLOAD(ld) + + DEFINE_XSTORE(sb) + DEFINE_XSTORE(sh) + DEFINE_XSTORE(sw) + DEFINE_XSTORE(sd) + + DEFINE_XAMO(amoadd_w) + DEFINE_XAMO(amoswap_w) + DEFINE_XAMO(amoand_w) + DEFINE_XAMO(amoor_w) + DEFINE_XAMO(amoxor_w) + DEFINE_XAMO(amomin_w) + DEFINE_XAMO(amomax_w) + DEFINE_XAMO(amominu_w) + DEFINE_XAMO(amomaxu_w) + DEFINE_XAMO(amoadd_d) + DEFINE_XAMO(amoswap_d) + DEFINE_XAMO(amoand_d) + DEFINE_XAMO(amoor_d) + DEFINE_XAMO(amoxor_d) + DEFINE_XAMO(amomin_d) + DEFINE_XAMO(amomax_d) + DEFINE_XAMO(amominu_d) + DEFINE_XAMO(amomaxu_d) + + DEFINE_XAMO_LR(lr_w) + DEFINE_XAMO(sc_w) + DEFINE_XAMO_LR(lr_d) + DEFINE_XAMO(sc_d) + + DEFINE_FLOAD(flw) + DEFINE_FLOAD(fld) + DEFINE_FLOAD(flh) + DEFINE_FLOAD(flq) + + DEFINE_FSTORE(fsw) + DEFINE_FSTORE(fsd) + DEFINE_FSTORE(fsh) + DEFINE_FSTORE(fsq) + + add_insn(new disasm_insn_t("j", match_jal, mask_jal | mask_rd, {&jump_target})); + add_insn(new disasm_insn_t("jal", match_jal | match_rd_ra, mask_jal | mask_rd, {&jump_target})); + add_insn(new disasm_insn_t("jal", match_jal, mask_jal, {&xrd, &jump_target})); + + DEFINE_B1TYPE("beqz", beq); + DEFINE_B1TYPE("bnez", bne); + DEFINE_B1TYPE("bltz", blt); + DEFINE_B1TYPE("bgez", bge); + DEFINE_BTYPE(beq) + DEFINE_BTYPE(bne) + DEFINE_BTYPE(blt) + DEFINE_BTYPE(bge) + DEFINE_BTYPE(bltu) + DEFINE_BTYPE(bgeu) + + DEFINE_LTYPE(lui); + DEFINE_LTYPE(auipc); + + add_insn(new disasm_insn_t("ret", match_jalr | match_rs1_ra, mask_jalr | mask_rd | mask_rs1 | mask_imm, {})); + DEFINE_I2TYPE("jr", jalr); + add_insn(new disasm_insn_t("jalr", match_jalr | match_rd_ra, mask_jalr | mask_rd | mask_imm, {&xrs1})); + DEFINE_ITYPE(jalr); + + add_insn(new disasm_insn_t("nop", match_addi, mask_addi | mask_rd | mask_rs1 | mask_imm, {})); + DEFINE_I0TYPE("li", addi); + DEFINE_I1TYPE("mv", addi); + DEFINE_ITYPE(addi); + DEFINE_ITYPE(slti); + add_insn(new disasm_insn_t("seqz", match_sltiu | match_imm_1, mask_sltiu | mask_imm, {&xrd, &xrs1})); + DEFINE_ITYPE(sltiu); + add_insn(new disasm_insn_t("not", match_xori | mask_imm, mask_xori | mask_imm, {&xrd, &xrs1})); + DEFINE_ITYPE(xori); + + DEFINE_ITYPE_SHIFT(slli); + DEFINE_ITYPE_SHIFT(srli); + DEFINE_ITYPE_SHIFT(srai); + + DEFINE_ITYPE(ori); + DEFINE_ITYPE(andi); + DEFINE_I1TYPE("sext.w", addiw); + DEFINE_ITYPE(addiw); + + DEFINE_ITYPE_SHIFT(slliw); + DEFINE_ITYPE_SHIFT(srliw); + DEFINE_ITYPE_SHIFT(sraiw); + + DEFINE_RTYPE(add); + DEFINE_RTYPE(sub); + DEFINE_RTYPE(sll); + DEFINE_RTYPE(slt); + add_insn(new disasm_insn_t("snez", match_sltu, mask_sltu | mask_rs1, {&xrd, &xrs2})); + DEFINE_RTYPE(sltu); + DEFINE_RTYPE(xor); + DEFINE_RTYPE(srl); + DEFINE_RTYPE(sra); + DEFINE_RTYPE(or); + DEFINE_RTYPE(and); + DEFINE_RTYPE(mul); + DEFINE_RTYPE(mulh); + DEFINE_RTYPE(mulhu); + DEFINE_RTYPE(mulhsu); + DEFINE_RTYPE(div); + DEFINE_RTYPE(divu); + DEFINE_RTYPE(rem); + DEFINE_RTYPE(remu); + DEFINE_RTYPE(addw); + DEFINE_RTYPE(subw); + DEFINE_RTYPE(sllw); + DEFINE_RTYPE(srlw); + DEFINE_RTYPE(sraw); + DEFINE_RTYPE(mulw); + DEFINE_RTYPE(divw); + DEFINE_RTYPE(divuw); + DEFINE_RTYPE(remw); + DEFINE_RTYPE(remuw); + + DEFINE_NOARG(ecall); + DEFINE_NOARG(ebreak); + DEFINE_NOARG(uret); + DEFINE_NOARG(sret); + DEFINE_NOARG(mret); + DEFINE_NOARG(dret); + DEFINE_NOARG(wfi); + DEFINE_NOARG(fence); + DEFINE_NOARG(fence_i); + DEFINE_SFENCE_TYPE(sfence_vma); + + add_insn(new disasm_insn_t("csrr", match_csrrs, mask_csrrs | mask_rs1, {&xrd, &csr})); + add_insn(new disasm_insn_t("csrw", match_csrrw, mask_csrrw | mask_rd, {&csr, &xrs1})); + add_insn(new disasm_insn_t("csrs", match_csrrs, mask_csrrs | mask_rd, {&csr, &xrs1})); + add_insn(new disasm_insn_t("csrc", match_csrrc, mask_csrrc | mask_rd, {&csr, &xrs1})); + add_insn(new disasm_insn_t("csrwi", match_csrrwi, mask_csrrwi | mask_rd, {&csr, &zimm5})); + add_insn(new disasm_insn_t("csrsi", match_csrrsi, mask_csrrsi | mask_rd, {&csr, &zimm5})); + add_insn(new disasm_insn_t("csrci", match_csrrci, mask_csrrci | mask_rd, {&csr, &zimm5})); + add_insn(new disasm_insn_t("csrrw", match_csrrw, mask_csrrw, {&xrd, &csr, &xrs1})); + add_insn(new disasm_insn_t("csrrs", match_csrrs, mask_csrrs, {&xrd, &csr, &xrs1})); + add_insn(new disasm_insn_t("csrrc", match_csrrc, mask_csrrc, {&xrd, &csr, &xrs1})); + add_insn(new disasm_insn_t("csrrwi", match_csrrwi, mask_csrrwi, {&xrd, &csr, &zimm5})); + add_insn(new disasm_insn_t("csrrsi", match_csrrsi, mask_csrrsi, {&xrd, &csr, &zimm5})); + add_insn(new disasm_insn_t("csrrci", match_csrrci, mask_csrrci, {&xrd, &csr, &zimm5})); + + DEFINE_FRTYPE(fadd_s); + DEFINE_FRTYPE(fsub_s); + DEFINE_FRTYPE(fmul_s); + DEFINE_FRTYPE(fdiv_s); + DEFINE_FR1TYPE(fsqrt_s); + DEFINE_FRTYPE(fmin_s); + DEFINE_FRTYPE(fmax_s); + DEFINE_FR3TYPE(fmadd_s); + DEFINE_FR3TYPE(fmsub_s); + DEFINE_FR3TYPE(fnmadd_s); + DEFINE_FR3TYPE(fnmsub_s); + DEFINE_FRTYPE(fsgnj_s); + DEFINE_FRTYPE(fsgnjn_s); + DEFINE_FRTYPE(fsgnjx_s); + DEFINE_FR1TYPE(fcvt_s_d); + DEFINE_FR1TYPE(fcvt_s_q); + DEFINE_XFTYPE(fcvt_s_l); + DEFINE_XFTYPE(fcvt_s_lu); + DEFINE_XFTYPE(fcvt_s_w); + DEFINE_XFTYPE(fcvt_s_wu); + DEFINE_XFTYPE(fcvt_s_wu); + DEFINE_XFTYPE(fmv_w_x); + DEFINE_FXTYPE(fcvt_l_s); + DEFINE_FXTYPE(fcvt_lu_s); + DEFINE_FXTYPE(fcvt_w_s); + DEFINE_FXTYPE(fcvt_wu_s); + DEFINE_FXTYPE(fclass_s); + DEFINE_FXTYPE(fmv_x_w); + DEFINE_FX2TYPE(feq_s); + DEFINE_FX2TYPE(flt_s); + DEFINE_FX2TYPE(fle_s); + + DEFINE_FRTYPE(fadd_d); + DEFINE_FRTYPE(fsub_d); + DEFINE_FRTYPE(fmul_d); + DEFINE_FRTYPE(fdiv_d); + DEFINE_FR1TYPE(fsqrt_d); + DEFINE_FRTYPE(fmin_d); + DEFINE_FRTYPE(fmax_d); + DEFINE_FR3TYPE(fmadd_d); + DEFINE_FR3TYPE(fmsub_d); + DEFINE_FR3TYPE(fnmadd_d); + DEFINE_FR3TYPE(fnmsub_d); + DEFINE_FRTYPE(fsgnj_d); + DEFINE_FRTYPE(fsgnjn_d); + DEFINE_FRTYPE(fsgnjx_d); + DEFINE_FR1TYPE(fcvt_d_s); + DEFINE_FR1TYPE(fcvt_d_q); + DEFINE_XFTYPE(fcvt_d_l); + DEFINE_XFTYPE(fcvt_d_lu); + DEFINE_XFTYPE(fcvt_d_w); + DEFINE_XFTYPE(fcvt_d_wu); + DEFINE_XFTYPE(fcvt_d_wu); + DEFINE_XFTYPE(fmv_d_x); + DEFINE_FXTYPE(fcvt_l_d); + DEFINE_FXTYPE(fcvt_lu_d); + DEFINE_FXTYPE(fcvt_w_d); + DEFINE_FXTYPE(fcvt_wu_d); + DEFINE_FXTYPE(fclass_d); + DEFINE_FXTYPE(fmv_x_d); + DEFINE_FX2TYPE(feq_d); + DEFINE_FX2TYPE(flt_d); + DEFINE_FX2TYPE(fle_d); + + DEFINE_FRTYPE(fadd_h); + DEFINE_FRTYPE(fsub_h); + DEFINE_FRTYPE(fmul_h); + DEFINE_FRTYPE(fdiv_h); + DEFINE_FR1TYPE(fsqrt_h); + DEFINE_FRTYPE(fmin_h); + DEFINE_FRTYPE(fmax_h); + DEFINE_FR3TYPE(fmadd_h); + DEFINE_FR3TYPE(fmsub_h); + DEFINE_FR3TYPE(fnmadd_h); + DEFINE_FR3TYPE(fnmsub_h); + DEFINE_FRTYPE(fsgnj_h); + DEFINE_FRTYPE(fsgnjn_h); + DEFINE_FRTYPE(fsgnjx_h); + DEFINE_FR1TYPE(fcvt_h_s); + DEFINE_FR1TYPE(fcvt_h_d); + DEFINE_FR1TYPE(fcvt_h_q); + DEFINE_FR1TYPE(fcvt_s_h); + DEFINE_FR1TYPE(fcvt_d_h); + DEFINE_FR1TYPE(fcvt_q_h); + DEFINE_XFTYPE(fcvt_h_l); + DEFINE_XFTYPE(fcvt_h_lu); + DEFINE_XFTYPE(fcvt_h_w); + DEFINE_XFTYPE(fcvt_h_wu); + DEFINE_XFTYPE(fcvt_h_wu); + DEFINE_XFTYPE(fmv_h_x); + DEFINE_FXTYPE(fcvt_l_h); + DEFINE_FXTYPE(fcvt_lu_h); + DEFINE_FXTYPE(fcvt_w_h); + DEFINE_FXTYPE(fcvt_wu_h); + DEFINE_FXTYPE(fclass_h); + DEFINE_FXTYPE(fmv_x_h); + DEFINE_FX2TYPE(feq_h); + DEFINE_FX2TYPE(flt_h); + DEFINE_FX2TYPE(fle_h); + + DEFINE_FRTYPE(fadd_q); + DEFINE_FRTYPE(fsub_q); + DEFINE_FRTYPE(fmul_q); + DEFINE_FRTYPE(fdiv_q); + DEFINE_FR1TYPE(fsqrt_q); + DEFINE_FRTYPE(fmin_q); + DEFINE_FRTYPE(fmax_q); + DEFINE_FR3TYPE(fmadd_q); + DEFINE_FR3TYPE(fmsub_q); + DEFINE_FR3TYPE(fnmadd_q); + DEFINE_FR3TYPE(fnmsub_q); + DEFINE_FRTYPE(fsgnj_q); + DEFINE_FRTYPE(fsgnjn_q); + DEFINE_FRTYPE(fsgnjx_q); + DEFINE_FR1TYPE(fcvt_q_s); + DEFINE_FR1TYPE(fcvt_q_d); + DEFINE_XFTYPE(fcvt_q_l); + DEFINE_XFTYPE(fcvt_q_lu); + DEFINE_XFTYPE(fcvt_q_w); + DEFINE_XFTYPE(fcvt_q_wu); + DEFINE_XFTYPE(fcvt_q_wu); + //DEFINE_XFTYPE(fmv_q_x); + DEFINE_FXTYPE(fcvt_l_q); + DEFINE_FXTYPE(fcvt_lu_q); + DEFINE_FXTYPE(fcvt_w_q); + DEFINE_FXTYPE(fcvt_wu_q); + DEFINE_FXTYPE(fclass_q); + //DEFINE_FXTYPE(fmv_x_q); + DEFINE_FX2TYPE(feq_q); + DEFINE_FX2TYPE(flt_q); + DEFINE_FX2TYPE(fle_q); + + DISASM_INSN("c.ebreak", c_add, mask_rd | mask_rvc_rs2, {}); + add_insn(new disasm_insn_t("ret", match_c_jr | match_rd_ra, mask_c_jr | mask_rd | mask_rvc_imm, {})); + DISASM_INSN("c.jr", c_jr, mask_rvc_imm, {&rvc_rs1}); + DISASM_INSN("c.jalr", c_jalr, mask_rvc_imm, {&rvc_rs1}); + DISASM_INSN("c.nop", c_addi, mask_rd | mask_rvc_imm, {}); + DISASM_INSN("c.addi16sp", c_addi16sp, mask_rd, {&rvc_sp, &rvc_addi16sp_imm}); + DISASM_INSN("c.addi4spn", c_addi4spn, 0, {&rvc_rs2s, &rvc_sp, &rvc_addi4spn_imm}); + DISASM_INSN("c.li", c_li, 0, {&xrd, &rvc_imm}); + DISASM_INSN("c.lui", c_lui, 0, {&xrd, &rvc_uimm}); + DISASM_INSN("c.addi", c_addi, 0, {&xrd, &rvc_imm}); + DISASM_INSN("c.slli", c_slli, 0, {&rvc_rs1, &rvc_shamt}); + DISASM_INSN("c.srli", c_srli, 0, {&rvc_rs1s, &rvc_shamt}); + DISASM_INSN("c.srai", c_srai, 0, {&rvc_rs1s, &rvc_shamt}); + DISASM_INSN("c.andi", c_andi, 0, {&rvc_rs1s, &rvc_imm}); + DISASM_INSN("c.mv", c_mv, 0, {&xrd, &rvc_rs2}); + DISASM_INSN("c.add", c_add, 0, {&xrd, &rvc_rs2}); + DISASM_INSN("c.addw", c_addw, 0, {&rvc_rs1s, &rvc_rs2s}); + DISASM_INSN("c.sub", c_sub, 0, {&rvc_rs1s, &rvc_rs2s}); + DISASM_INSN("c.subw", c_subw, 0, {&rvc_rs1s, &rvc_rs2s}); + DISASM_INSN("c.and", c_and, 0, {&rvc_rs1s, &rvc_rs2s}); + DISASM_INSN("c.or", c_or, 0, {&rvc_rs1s, &rvc_rs2s}); + DISASM_INSN("c.xor", c_xor, 0, {&rvc_rs1s, &rvc_rs2s}); + DISASM_INSN("c.lwsp", c_lwsp, 0, {&xrd, &rvc_lwsp_address}); + DISASM_INSN("c.fld", c_fld, 0, {&rvc_fp_rs2s, &rvc_ld_address}); + DISASM_INSN("c.swsp", c_swsp, 0, {&rvc_rs2, &rvc_swsp_address}); + DISASM_INSN("c.lw", c_lw, 0, {&rvc_rs2s, &rvc_lw_address}); + DISASM_INSN("c.sw", c_sw, 0, {&rvc_rs2s, &rvc_lw_address}); + DISASM_INSN("c.beqz", c_beqz, 0, {&rvc_rs1s, &rvc_branch_target}); + DISASM_INSN("c.bnez", c_bnez, 0, {&rvc_rs1s, &rvc_branch_target}); + DISASM_INSN("c.j", c_j, 0, {&rvc_jump_target}); + DISASM_INSN("c.fldsp", c_fldsp, 0, {&frd, &rvc_ldsp_address}); + DISASM_INSN("c.fsd", c_fsd, 0, {&rvc_fp_rs2s, &rvc_ld_address}); + DISASM_INSN("c.fsdsp", c_fsdsp, 0, {&rvc_fp_rs2, &rvc_sdsp_address}); + +#ifdef VECTOR_EXT + + DISASM_INSN("vsetvli", vsetvli, 0, {&xrd, &xrs1, &v_vtype}); + //DISASM_INSN("vsetvl", vsetvl, 0, {&xrd, &xrs1, &xrs2}); + + #define DISASM_VMEM_INSN(name, fmt, ff) \ + add_insn(new disasm_insn_t(#name "8" #ff ".v", match_##name##8##ff##_v, mask_##name##8##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "16" #ff ".v", match_##name##16##ff##_v, mask_##name##16##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "32" #ff ".v", match_##name##32##ff##_v, mask_##name##32##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "64" #ff ".v", match_##name##64##ff##_v, mask_##name##64##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "128" #ff ".v", match_##name##128##ff##_v, mask_##name##128##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "256" #ff ".v", match_##name##256##ff##_v, mask_##name##256##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "512" #ff ".v", match_##name##512##ff##_v, mask_##name##512##ff##_v | mask_nf, fmt)); \ + add_insn(new disasm_insn_t(#name "1024" #ff ".v", match_##name##1024##ff##_v, mask_##name##1024##ff##_v | mask_nf, fmt)); \ + + std::vector v_ld_unit = {&vd, &v_address, &opt, &vm}; + std::vector v_st_unit = {&vs3, &v_address, &opt, &vm}; + std::vector v_ld_stride = {&vd, &v_address, &xrs2, &opt, &vm}; + std::vector v_st_stride = {&vs3, &v_address, &xrs2, &opt, &vm}; + std::vector v_ld_index = {&vd, &v_address, &vs2, &opt, &vm}; + std::vector v_st_index = {&vs3, &v_address, &vs2, &opt, &vm}; + + DISASM_VMEM_INSN(vle, v_ld_unit, ); + DISASM_VMEM_INSN(vlse, v_ld_stride, ); + DISASM_VMEM_INSN(vlxei, v_ld_index, ); + DISASM_VMEM_INSN(vle, v_ld_unit, ff); + DISASM_VMEM_INSN(vse, v_st_unit, ); + DISASM_VMEM_INSN(vsse, v_st_stride, ); + DISASM_VMEM_INSN(vsxei, v_st_index, ); + DISASM_VMEM_INSN(vsuxei, v_st_index, ); + + #undef DISASM_VMEM_INSN + + // handle vector segment load/store + for (size_t elt = 0; elt <= 7; ++elt) { + const custom_fmt_t template_insn[] = { + {match_vle8_v, mask_vle8_v, "vlseg%de%d.v", v_ld_unit}, + {match_vse8_v, mask_vse8_v, "vsseg%de%d.v", v_st_unit}, + + {match_vlse8_v, mask_vlse8_v, "vlsseg%de%d.v", v_ld_stride}, + {match_vsse8_v, mask_vsse8_v, "vssseg%de%d.v", v_st_stride}, + + {match_vlxei8_v, mask_vlxei8_v, "vlxseg%dei%d.v", v_ld_index}, + {match_vsxei8_v, mask_vsxei8_v, "vsxseg%dei%d.v", v_st_index}, + + {match_vle8ff_v, mask_vle8ff_v, "vlseg%de%dff.v", v_ld_unit} + }; + + reg_t elt_map[] = {0x00000000, 0x00005000, 0x00006000, 0x00007000, + 0x10000000, 0x10005000, 0x10006000, 0x10007000}; + + for (size_t nf = 1; nf <= 7; ++nf) { + for (auto item : template_insn) { + const reg_t match_nf = nf << 29; + char buf[128]; + sprintf(buf, item.fmt, nf + 1, 8 << elt); + add_insn(new disasm_insn_t( + buf, + ((item.match | match_nf) & ~mask_vldst) | elt_map[elt], + item.mask | mask_nf, + item.arg + )); + } + } + + //handle whole register load + if (elt >= 4) + continue; + + const custom_fmt_t template_insn2[] = { + {match_vl1re8_v, mask_vl1re8_v, "vl%dre%d.v", v_ld_unit}, + }; + + for (reg_t i = 0, nf = 7; i < 4; i++, nf >>= 1) { + for (auto item : template_insn2) { + const reg_t match_nf = nf << 29; + char buf[128]; + sprintf(buf, item.fmt, nf + 1, 8 << elt); + add_insn(new disasm_insn_t( + buf, + item.match | match_nf | elt_map[elt], + item.mask | mask_nf, + item.arg + )); + } + } + } + + #define DISASM_ST_WHOLE_INSN(name, nf) \ + add_insn(new disasm_insn_t(#name, match_vs1r_v | (nf << 29), \ + mask_vs1r_v | mask_nf, \ + {&vs3, &v_address})); + DISASM_ST_WHOLE_INSN(vs1r.v, 0); + DISASM_ST_WHOLE_INSN(vs2r.v, 1); + DISASM_ST_WHOLE_INSN(vs4r.v, 3); + DISASM_ST_WHOLE_INSN(vs8r.v, 7); + + #undef DISASM_ST_WHOLE_INSN + + #define DISASM_OPIV_VXI_INSN(name, sign, suf) \ + add_insn(new disasm_insn_t(#name "." #suf "v", \ + match_##name##_##suf##v, mask_##name##_##suf##v, \ + {&vd, &vs2, &vs1, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name "." #suf "x", \ + match_##name##_##suf##x, mask_##name##_##suf##x, \ + {&vd, &vs2, &xrs1, &opt, &vm})); \ + if (sign) \ + add_insn(new disasm_insn_t(#name "." #suf "i", \ + match_##name##_##suf##i, mask_##name##_##suf##i, \ + {&vd, &vs2, &v_simm5, &opt, &vm})); \ + else \ + add_insn(new disasm_insn_t(#name "." #suf "i", \ + match_##name##_##suf##i, mask_##name##_##suf##i, \ + {&vd, &vs2, &zimm5, &opt, &vm})); + + #define DISASM_OPIV_VX__INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".vv", match_##name##_vv, mask_##name##_vv, \ + {&vd, &vs2, &vs1, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name ".vx", match_##name##_vx, mask_##name##_vx, \ + {&vd, &vs2, &xrs1, &opt, &vm})); \ + + #define DISASM_OPIV__XI_INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".vx", match_##name##_vx, mask_##name##_vx, \ + {&vd, &vs2, &xrs1, &opt, &vm})); \ + if (sign) \ + add_insn(new disasm_insn_t(#name ".vi", match_##name##_vi, mask_##name##_vi, \ + {&vd, &vs2, &v_simm5, &opt, &vm})); \ + else \ + add_insn(new disasm_insn_t(#name ".vi", match_##name##_vi, mask_##name##_vi, \ + {&vd, &vs2, &zimm5, &opt, &vm})); + + #define DISASM_OPIV_V___INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".vv", match_##name##_vv, mask_##name##_vv, \ + {&vd, &vs2, &vs1, &opt, &vm})); + + #define DISASM_OPIV_S___INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".vs", match_##name##_vs, mask_##name##_vs, \ + {&vd, &vs2, &vs1, &opt, &vm})); + + #define DISASM_OPIV_W___INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".wv", match_##name##_wv, mask_##name##_wv, \ + {&vd, &vs2, &vs1, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name ".wx", match_##name##_wx, mask_##name##_wx, \ + {&vd, &vs2, &xrs1, &opt, &vm})); + + #define DISASM_OPIV_M___INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".mm", match_##name##_mm, mask_##name##_mm, \ + {&vd, &vs2, &vs1})); + + #define DISASM_OPIV__X__INSN(name, sign) \ + add_insn(new disasm_insn_t(#name ".vx", match_##name##_vx, mask_##name##_vx, \ + {&vd, &vs2, &xrs1, &opt, &vm})); + + #define DISASM_OPIV_VXIM_INSN(name, sign, has_vm) \ + add_insn(new disasm_insn_t(#name ".vvm", match_##name##_vvm, \ + mask_##name##_vvm | mask_vm, \ + {&vd, &vs2, &vs1, &v0})); \ + add_insn(new disasm_insn_t(#name ".vxm", match_##name##_vxm, \ + mask_##name##_vxm | mask_vm, \ + {&vd, &vs2, &xrs1, &v0})); \ + add_insn(new disasm_insn_t(#name ".vim", match_##name##_vim, \ + mask_##name##_vim | mask_vm, \ + {&vd, &vs2, &v_simm5, &v0})); \ + if (has_vm) { \ + add_insn(new disasm_insn_t(#name ".vv", \ + match_##name##_vvm | mask_vm, \ + mask_##name##_vvm | mask_vm, \ + {&vd, &vs2, &vs1})); \ + add_insn(new disasm_insn_t(#name ".vx", \ + match_##name##_vxm | mask_vm, \ + mask_##name##_vxm | mask_vm, \ + {&vd, &vs2, &xrs1})); \ + add_insn(new disasm_insn_t(#name ".vi", \ + match_##name##_vim | mask_vm, \ + mask_##name##_vim | mask_vm, \ + {&vd, &vs2, &v_simm5})); \ + } + + #define DISASM_OPIV_VX_M_INSN(name, sign, has_vm) \ + add_insn(new disasm_insn_t(#name ".vvm", match_##name##_vvm, \ + mask_##name##_vvm | mask_vm, \ + {&vd, &vs2, &vs1, &v0})); \ + add_insn(new disasm_insn_t(#name ".vxm", match_##name##_vxm, \ + mask_##name##_vxm | mask_vm, \ + {&vd, &vs2, &xrs1, &v0})); \ + if (has_vm) { \ + add_insn(new disasm_insn_t(#name ".vv", \ + match_##name##_vvm | mask_vm, \ + mask_##name##_vvm | mask_vm, \ + {&vd, &vs2, &vs1})); \ + add_insn(new disasm_insn_t(#name ".vx", \ + match_##name##_vxm | mask_vm, \ + mask_##name##_vxm | mask_vm, \ + {&vd, &vs2, &xrs1})); \ + } \ + + //OPFVV/OPFVF + //0b00_0000 + DISASM_OPIV_VXI_INSN(vadd, 1, v); + DISASM_OPIV_VX__INSN(vsub, 1); + DISASM_OPIV__XI_INSN(vrsub, 1); + DISASM_OPIV_VX__INSN(vminu, 0); + DISASM_OPIV_VX__INSN(vmin, 1); + DISASM_OPIV_VX__INSN(vmaxu, 1); + DISASM_OPIV_VX__INSN(vmax, 0); + DISASM_OPIV_VXI_INSN(vand, 1, v); + DISASM_OPIV_VXI_INSN(vor, 1, v); + DISASM_OPIV_VXI_INSN(vxor, 1, v); + DISASM_OPIV_VXI_INSN(vrgather, 0, v); + DISASM_OPIV_V___INSN(vrgatherei16, 0); + DISASM_OPIV__XI_INSN(vslideup, 0); + DISASM_OPIV__XI_INSN(vslidedown, 0); + + //0b01_0000 + //DISASM_OPIV_VXIM_INSN(vadc, 1, 0); + DISASM_OPIV_VXIM_INSN(vmadc, 1, 1); + //DISASM_OPIV_VX_M_INSN(vsbc, 1, 0); + DISASM_OPIV_VX_M_INSN(vmsbc, 1, 1); + DISASM_OPIV_VXIM_INSN(vmerge, 1, 0); + DISASM_INSN("vmv.v.i", vmv_v_i, 0, {&vd, &v_simm5}); + DISASM_INSN("vmv.v.v", vmv_v_v, 0, {&vd, &vs1}); + DISASM_INSN("vmv.v.x", vmv_v_x, 0, {&vd, &xrs1}); + DISASM_OPIV_VXI_INSN(vmseq, 1, v); + DISASM_OPIV_VXI_INSN(vmsne, 1, v); + DISASM_OPIV_VX__INSN(vmsltu, 0); + DISASM_OPIV_VX__INSN(vmslt, 1); + DISASM_OPIV_VXI_INSN(vmsleu, 0, v); + DISASM_OPIV_VXI_INSN(vmsle, 1, v); + DISASM_OPIV__XI_INSN(vmsgtu, 0); + DISASM_OPIV__XI_INSN(vmsgt, 1); + + //0b10_0000 + DISASM_OPIV_VXI_INSN(vsaddu, 0, v); + DISASM_OPIV_VXI_INSN(vsadd, 1, v); + DISASM_OPIV_VX__INSN(vssubu, 0); + DISASM_OPIV_VX__INSN(vssub, 1); + DISASM_OPIV_VXI_INSN(vsll, 1, v); + DISASM_INSN("vmv1r.v", vmv1r_v, 0, {&vd, &vs2}); + DISASM_INSN("vmv2r.v", vmv2r_v, 0, {&vd, &vs2}); + DISASM_INSN("vmv4r.v", vmv4r_v, 0, {&vd, &vs2}); + DISASM_INSN("vmv8r.v", vmv8r_v, 0, {&vd, &vs2}); + DISASM_OPIV_VX__INSN(vsmul, 1); + DISASM_OPIV_VXI_INSN(vsrl, 0, v); + DISASM_OPIV_VXI_INSN(vsra, 0, v); + DISASM_OPIV_VXI_INSN(vssrl, 0, v); + DISASM_OPIV_VXI_INSN(vssra, 0, v); + DISASM_OPIV_VXI_INSN(vnsrl, 0, w); + DISASM_OPIV_VXI_INSN(vnsra, 0, w); + DISASM_OPIV_VXI_INSN(vnclipu, 0, w); + DISASM_OPIV_VXI_INSN(vnclip, 0, w); + + //0b11_0000 + DISASM_OPIV_S___INSN(vwredsumu, 0); + DISASM_OPIV_S___INSN(vwredsum, 1); + DISASM_OPIV_V___INSN(vdotu, 0); + DISASM_OPIV_V___INSN(vdot, 1); + + //OPMVV/OPMVX + //0b00_0000 + //DISASM_OPIV_VX__INSN(vaaddu, 0); + DISASM_OPIV_VX__INSN(vaadd, 0); + //DISASM_OPIV_VX__INSN(vasubu, 0); + DISASM_OPIV_VX__INSN(vasub, 0); + + DISASM_OPIV_S___INSN(vredsum, 1); + DISASM_OPIV_S___INSN(vredand, 1); + DISASM_OPIV_S___INSN(vredor, 1); + DISASM_OPIV_S___INSN(vredxor, 1); + DISASM_OPIV_S___INSN(vredminu, 0); + DISASM_OPIV_S___INSN(vredmin, 1); + DISASM_OPIV_S___INSN(vredmaxu, 0); + DISASM_OPIV_S___INSN(vredmax, 1); + //DISASM_OPIV__X__INSN(vslide1up, 1); + DISASM_OPIV__X__INSN(vslide1down,1); + + //0b01_0000 + //VWXUNARY0 + DISASM_INSN("vmv.x.s", vmv_x_s, 0, {&xrd, &vs2}); + DISASM_INSN("vpopc.m", vpopc_m, 0, {&xrd, &vs2, &opt, &vm}); + DISASM_INSN("vfirst.m", vfirst_m, 0, {&xrd, &vs2, &opt, &vm}); + + //VRXUNARY0 + DISASM_INSN("vmv.s.x", vmv_s_x, 0, {&vd, &xrs1}); + + //VXUNARY0 + DISASM_INSN("vzext.vf2", vzext_vf2, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vsext.vf2", vsext_vf2, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vzext.vf4", vzext_vf4, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vsext.vf4", vsext_vf4, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vzext.vf8", vzext_vf8, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vsext.vf8", vsext_vf8, 0, {&vd, &vs2, &opt, &vm}); + + //VMUNARY0 + DISASM_INSN("vmsbf.m", vmsbf_m, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vmsof.m", vmsof_m, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vmsif.m", vmsif_m, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("viota.m", viota_m, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vid.v", vid_v, 0, {&vd, &opt, &vm}); + + DISASM_INSN("vid.v", vid_v, 0, {&vd, &opt, &vm}); + + DISASM_INSN("vcompress.vm", vcompress_vm, 0, {&vd, &vs2, &vs1}); + + DISASM_OPIV_M___INSN(vmandnot, 1); + DISASM_OPIV_M___INSN(vmand, 1); + DISASM_OPIV_M___INSN(vmor, 1); + DISASM_OPIV_M___INSN(vmxor, 1); + DISASM_OPIV_M___INSN(vmornot, 1); + DISASM_OPIV_M___INSN(vmnand, 1); + DISASM_OPIV_M___INSN(vmnor, 1); + DISASM_OPIV_M___INSN(vmxnor, 1); + + //0b10_0000 + //DISASM_OPIV_VX__INSN(vdivu, 0); + DISASM_OPIV_VX__INSN(vdiv, 1); + DISASM_OPIV_VX__INSN(vremu, 0); + DISASM_OPIV_VX__INSN(vrem, 1); + //DISASM_OPIV_VX__INSN(vmulhu, 0); + DISASM_OPIV_VX__INSN(vmul, 1); + //DISASM_OPIV_VX__INSN(vmulhsu, 0); + DISASM_OPIV_VX__INSN(vmulh, 1); + DISASM_OPIV_VX__INSN(vmadd, 1); + DISASM_OPIV_VX__INSN(vnmsub, 1); + DISASM_OPIV_VX__INSN(vmacc, 1); + DISASM_OPIV_VX__INSN(vnmsac, 1); + + //0b11_0000 + DISASM_OPIV_VX__INSN(vwaddu, 0); + DISASM_OPIV_VX__INSN(vwadd, 1); + DISASM_OPIV_VX__INSN(vwsubu, 0); + DISASM_OPIV_VX__INSN(vwsub, 1); + DISASM_OPIV_W___INSN(vwaddu, 0); + DISASM_OPIV_W___INSN(vwadd, 1); + DISASM_OPIV_W___INSN(vwsubu, 0); + DISASM_OPIV_W___INSN(vwsub, 1); + DISASM_OPIV_VX__INSN(vwmulu, 0); + DISASM_OPIV_VX__INSN(vwmulsu, 0); + DISASM_OPIV_VX__INSN(vwmul, 1); + DISASM_OPIV_VX__INSN(vwmaccu, 0); + DISASM_OPIV_VX__INSN(vwmacc, 1); + DISASM_OPIV__X__INSN(vwmaccus, 1); + DISASM_OPIV_VX__INSN(vwmaccsu, 0); + + #undef DISASM_OPIV_VXI_INSN + #undef DISASM_OPIV_VX__INSN + #undef DISASM_OPIV__XI_INSN + #undef DISASM_OPIV_V___INSN + #undef DISASM_OPIV_S___INSN + #undef DISASM_OPIV_W___INSN + #undef DISASM_OPIV_M___INSN + #undef DISASM_OPIV__X__INSN + #undef DISASM_OPIV_VXIM_INSN + #undef DISASM_OPIV_VX_M_INSN + + #define DISASM_OPIV_VF_INSN(name) \ + add_insn(new disasm_insn_t(#name ".vv", match_##name##_vv, mask_##name##_vv, \ + {&vd, &vs2, &vs1, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name ".vf", match_##name##_vf, mask_##name##_vf, \ + {&vd, &vs2, &frs1, &opt, &vm})); \ + + #define DISASM_OPIV_WF_INSN(name) \ + add_insn(new disasm_insn_t(#name ".wv", match_##name##_wv, mask_##name##_wv, \ + {&vd, &vs2, &vs1, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name ".wf", match_##name##_wf, mask_##name##_wf, \ + {&vd, &vs2, &frs1, &opt, &vm})); \ + + #define DISASM_OPIV_V__INSN(name) \ + add_insn(new disasm_insn_t(#name ".vv", match_##name##_vv, mask_##name##_vv, \ + {&vd, &vs2, &vs1, &opt, &vm})); + + #define DISASM_OPIV_S__INSN(name) \ + add_insn(new disasm_insn_t(#name ".vs", match_##name##_vs, mask_##name##_vs, \ + {&vd, &vs2, &vs1, &opt, &vm})); + + #define DISASM_OPIV__F_INSN(name) \ + add_insn(new disasm_insn_t(#name ".vf", match_##name##_vf, mask_##name##_vf, \ + {&vd, &vs2, &frs1, &opt, &vm})); \ + + #define DISASM_VFUNARY0_INSN(name, suf) \ + add_insn(new disasm_insn_t(#name "cvt.xu.f." #suf, \ + match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \ + {&vd, &vs2, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name "cvt.x.f." #suf, \ + match_##name##cvt_x_f_##suf, mask_##name##cvt_x_f_##suf, \ + {&vd, &vs2, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name "cvt.f.xu." #suf, \ + match_##name##cvt_f_xu_##suf, mask_##name##cvt_f_xu_##suf, \ + {&vd, &vs2, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name "cvt.f.x." #suf, \ + match_##name##cvt_f_x_##suf, mask_##name##cvt_f_x_##suf, \ + {&vd, &vs2, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name "cvt.rtz.xu.f." #suf, \ + match_##name##cvt_rtz_xu_f_##suf, mask_##name##cvt_rtz_xu_f_##suf, \ + {&vd, &vs2, &opt, &vm})); \ + add_insn(new disasm_insn_t(#name "cvt.rtz.x.f." #suf, \ + match_##name##cvt_rtz_x_f_##suf, mask_##name##cvt_rtz_x_f_##suf, \ + {&vd, &vs2, &opt, &vm})); \ + + //OPFVV/OPFVF + //0b00_0000 + DISASM_OPIV_VF_INSN(vfadd); + DISASM_OPIV_S__INSN(vfredsum); + DISASM_OPIV_VF_INSN(vfsub); + DISASM_OPIV_S__INSN(vfredosum); + DISASM_OPIV_VF_INSN(vfmin); + DISASM_OPIV_S__INSN(vfredmin); + DISASM_OPIV_VF_INSN(vfmax); + DISASM_OPIV_S__INSN(vfredmax); + DISASM_OPIV_VF_INSN(vfsgnj); + DISASM_OPIV_VF_INSN(vfsgnjn); + DISASM_OPIV_VF_INSN(vfsgnjx); + DISASM_INSN("vfmv.f.s", vfmv_f_s, 0, {&frd, &vs2}); + DISASM_INSN("vfmv.s.f", vfmv_s_f, mask_vfmv_s_f, {&vd, &frs1}); + DISASM_OPIV__F_INSN(vfslide1up); + DISASM_OPIV__F_INSN(vfslide1down); + + //0b01_0000 + DISASM_INSN("vfmerge.vfm", vfmerge_vfm, 0, {&vd, &vs2, &frs1, &v0}); + DISASM_INSN("vfmv.v.f", vfmv_v_f, 0, {&vd, &frs1}); + DISASM_OPIV_VF_INSN(vmfeq); + DISASM_OPIV_VF_INSN(vmfle); + DISASM_OPIV_VF_INSN(vmflt); + DISASM_OPIV_VF_INSN(vmfne); + DISASM_OPIV__F_INSN(vmfgt); + DISASM_OPIV__F_INSN(vmfge); + + //0b10_0000 + DISASM_OPIV_VF_INSN(vfdiv); + DISASM_OPIV__F_INSN(vfrdiv); + + //vfunary0 + //DISASM_VFUNARY0_INSN(vf, v); + + DISASM_VFUNARY0_INSN(vfw, v); + DISASM_INSN("vfwcvt.f.f.v", vfwcvt_f_f_v, 0, {&vd, &vs2, &opt, &vm}); + + DISASM_VFUNARY0_INSN(vfn, w); + DISASM_INSN("vfncvt.f.f.w", vfncvt_f_f_w, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vfncvt.rod.f.f.w", vfncvt_rod_f_f_w, 0, {&vd, &vs2, &opt, &vm}); + + //vfunary1 + DISASM_INSN("vfsqrt.v", vfsqrt_v, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vfrsqrte7.v", vfrsqrte7_v, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vfrece7.v", vfrece7_v, 0, {&vd, &vs2, &opt, &vm}); + DISASM_INSN("vfclass.v", vfclass_v, 0, {&vd, &vs2, &opt, &vm}); + + DISASM_OPIV_VF_INSN(vfmul); + DISASM_OPIV__F_INSN(vfrsub); + DISASM_OPIV_VF_INSN(vfmadd); + DISASM_OPIV_VF_INSN(vfnmadd); + DISASM_OPIV_VF_INSN(vfmsub); + DISASM_OPIV_VF_INSN(vfnmsub); + DISASM_OPIV_VF_INSN(vfmacc); + DISASM_OPIV_VF_INSN(vfnmacc); + DISASM_OPIV_VF_INSN(vfmsac); + DISASM_OPIV_VF_INSN(vfnmsac); + + //0b11_0000 + DISASM_OPIV_VF_INSN(vfwadd); + DISASM_OPIV_S__INSN(vfwredsum); + DISASM_OPIV_VF_INSN(vfwsub); + DISASM_OPIV_S__INSN(vfwredosum); + DISASM_OPIV_WF_INSN(vfwadd); + DISASM_OPIV_WF_INSN(vfwsub); + DISASM_OPIV_VF_INSN(vfwmul); + DISASM_OPIV_V__INSN(vfdot); + DISASM_OPIV_VF_INSN(vfwmacc); + DISASM_OPIV_VF_INSN(vfwnmacc); + DISASM_OPIV_VF_INSN(vfwmsac); + DISASM_OPIV_VF_INSN(vfwnmsac); + + #undef DISASM_OPIV_VF_INSN + #undef DISASM_OPIV_V__INSN + #undef DISASM_OPIV__F_INSN + #undef DISASM_OPIV_S__INSN + #undef DISASM_OPIV_W__INSN + #undef DISASM_VFUNARY0_INSN + + // vector amo + std::vector v_fmt_amo_wd = {&vd, &v_address, &vs2, &vd, &opt, &vm}; + std::vector v_fmt_amo = {&x0, &v_address, &vs2, &vd, &opt, &vm}; + for (size_t elt = 0; elt <= 3; ++elt) { + const custom_fmt_t template_insn[] = { + {match_vamoaddei8_v | mask_wd, mask_vamoaddei8_v | mask_wd, + "%sei%d.v", v_fmt_amo_wd}, + {match_vamoaddei8_v, mask_vamoaddei8_v | mask_wd, + "%sei%d.v", v_fmt_amo}, + }; + std::pair amo_map[] = { + {"vamoswap", 0x01ul << 27}, + {"vamoadd", 0x00ul << 27}, + {"vamoxor", 0x04ul << 27}, + {"vamoand", 0x0cul << 27}, + {"vamoor", 0x08ul << 27}, + {"vamomin", 0x10ul << 27}, + {"vamomax", 0x14ul << 27}, + {"vamominu", 0x18ul << 27}, + {"vamomaxu", 0x1cul << 27}}; + const reg_t elt_map[] = {0x0ul << 12, 0x5ul << 12, + 0x6ul <<12, 0x7ul << 12}; + + for (size_t idx = 0; idx < sizeof(amo_map) / sizeof(amo_map[0]); ++idx) { + for (auto item : template_insn) { + char buf[128]; + sprintf(buf, item.fmt, amo_map[idx].first, 8 << elt); + add_insn(new disasm_insn_t(buf, + item.match | amo_map[idx].second | elt_map[elt], + item.mask, + item.arg)); + } + } + } + +#endif + + if (xlen == 32) { + DISASM_INSN("c.flw", c_flw, 0, {&rvc_fp_rs2s, &rvc_lw_address}); + DISASM_INSN("c.flwsp", c_flwsp, 0, {&frd, &rvc_lwsp_address}); + DISASM_INSN("c.fsw", c_fsw, 0, {&rvc_fp_rs2s, &rvc_lw_address}); + DISASM_INSN("c.fswsp", c_fswsp, 0, {&rvc_fp_rs2, &rvc_swsp_address}); + DISASM_INSN("c.jal", c_jal, 0, {&rvc_jump_target}); + } else { + DISASM_INSN("c.ld", c_ld, 0, {&rvc_rs2s, &rvc_ld_address}); + DISASM_INSN("c.ldsp", c_ldsp, 0, {&xrd, &rvc_ldsp_address}); + DISASM_INSN("c.sd", c_sd, 0, {&rvc_rs2s, &rvc_ld_address}); + DISASM_INSN("c.sdsp", c_sdsp, 0, {&rvc_rs2, &rvc_sdsp_address}); + DISASM_INSN("c.addiw", c_addiw, 0, {&xrd, &rvc_imm}); + } + + // Xpulpimg extension + DEFINE_PLOAD_IRPOST(p_lb_irpost); + DEFINE_PLOAD_IRPOST(p_lbu_irpost); + DEFINE_PLOAD_IRPOST(p_lh_irpost); + DEFINE_PLOAD_IRPOST(p_lhu_irpost); + DEFINE_PLOAD_IRPOST(p_lw_irpost); + DEFINE_PLOAD_RRPOST(p_lb_rrpost); + DEFINE_PLOAD_RRPOST(p_lbu_rrpost); + DEFINE_PLOAD_RRPOST(p_lh_rrpost); + DEFINE_PLOAD_RRPOST(p_lhu_rrpost); + DEFINE_PLOAD_RRPOST(p_lw_rrpost); + DEFINE_PLOAD_RR(p_lb_rr); + DEFINE_PLOAD_RR(p_lbu_rr); + DEFINE_PLOAD_RR(p_lh_rr); + DEFINE_PLOAD_RR(p_lhu_rr); + DEFINE_PLOAD_RR(p_lw_rr); + DEFINE_PSTORE_IRPOST(p_sb_irpost); + DEFINE_PSTORE_IRPOST(p_sh_irpost); + DEFINE_PSTORE_IRPOST(p_sw_irpost); + DEFINE_PSTORE_RRPOST(p_sb_rrpost); + DEFINE_PSTORE_RRPOST(p_sh_rrpost); + DEFINE_PSTORE_RRPOST(p_sw_rrpost); + DEFINE_PSTORE_RR(p_sb_rr); + DEFINE_PSTORE_RR(p_sh_rr); + DEFINE_PSTORE_RR(p_sw_rr); + DEFINE_R1TYPE(p_abs); + DEFINE_RTYPE(p_slet); + DEFINE_RTYPE(p_sletu); + DEFINE_RTYPE(p_min); + DEFINE_RTYPE(p_minu); + DEFINE_RTYPE(p_max); + DEFINE_RTYPE(p_maxu); + DEFINE_PI0TYPE(p_clip); + DEFINE_PI0TYPE(p_clipu); + DEFINE_RTYPE(p_clipr); + DEFINE_RTYPE(p_clipur); + DEFINE_PBTYPE(p_beqimm); + DEFINE_PBTYPE(p_bneimm); + DEFINE_RTYPE(p_mac); + DEFINE_RTYPE(p_msu); + + // xpulpbitop + DEFINE_PLUI2TYPE(p_extract); + DEFINE_RTYPE(p_extractr); + DEFINE_PLUI2TYPE(p_extractu); + DEFINE_RTYPE(p_extractur); + DEFINE_PLUI2TYPE(p_insert); + DEFINE_RTYPE(p_insertr); + DEFINE_PLUI2TYPE(p_bset); + DEFINE_RTYPE(p_bsetr); + DEFINE_PLUI2TYPE(p_bclr); + DEFINE_RTYPE(p_bclrr); + // xpulpbitopsmall (subset of xpulpbitop) + DEFINE_R1TYPE(p_exths); + DEFINE_R1TYPE(p_exthz); + DEFINE_R1TYPE(p_extbs); + DEFINE_R1TYPE(p_extbz); + DEFINE_RTYPE(p_ror); + DEFINE_R1TYPE(p_ff1); + DEFINE_R1TYPE(p_fl1); + DEFINE_R1TYPE(p_clb); + DEFINE_R1TYPE(p_cnt); + + // xpulpbitrev + DEFINE_PLUI2TYPE(p_bitrev); + + // xpulpmulrnhi + DEFINE_PR2LUITYPE(p_muluN); + DEFINE_PR2LUITYPE(p_mulhhuN); + DEFINE_PR2LUITYPE(p_mulsN); + DEFINE_PR2LUITYPE(p_mulhhsN); + DEFINE_PR2LUITYPE(p_muluRN); + DEFINE_PR2LUITYPE(p_mulhhuRN); + DEFINE_PR2LUITYPE(p_mulsRN); + DEFINE_PR2LUITYPE(p_mulhhsRN); + + // xpulpmacrnhi + DEFINE_PR2LUITYPE(p_macuN); + DEFINE_PR2LUITYPE(p_machhuN); + DEFINE_PR2LUITYPE(p_macsN); + DEFINE_PR2LUITYPE(p_machhsN); + DEFINE_PR2LUITYPE(p_macuRN); + DEFINE_PR2LUITYPE(p_machhuRN); + DEFINE_PR2LUITYPE(p_macsRN); + DEFINE_PR2LUITYPE(p_machhsRN); + + // xpulpaddsubrn + DEFINE_PR2LUITYPE(p_addN); + DEFINE_PR2LUITYPE(p_adduN); + DEFINE_PR2LUITYPE(p_addRN); + DEFINE_PR2LUITYPE(p_adduRN); + DEFINE_PR2LUITYPE(p_subN); + DEFINE_PR2LUITYPE(p_subuN); + DEFINE_PR2LUITYPE(p_subRN); + DEFINE_PR2LUITYPE(p_subuRN); + DEFINE_RTYPE(p_addNr); + DEFINE_RTYPE(p_adduNr); + DEFINE_RTYPE(p_addRNr); + DEFINE_RTYPE(p_adduRNr); + DEFINE_RTYPE(p_subNr); + DEFINE_RTYPE(p_subuNr); + DEFINE_RTYPE(p_subRNr); + DEFINE_RTYPE(p_subuRNr); + + // xpulphwloop + DISASM_INSN("lp_starti", lp_starti, 0, {&p_loop, &p_uimmL}); + DISASM_INSN("lp_endi", lp_endi, 0, {&p_loop, &p_uimmL}); + DISASM_INSN("lp_counti", lp_counti, 0, {&p_loop, &p_uimmL}); + DISASM_INSN("lp_count", lp_count, 0, {&p_loop, &xrs1}); + DISASM_INSN("lp_setup", lp_setup, 0, {&p_loop, &xrs1, &p_uimmL}); + DISASM_INSN("lp_setupi", lp_setupi, 0, {&p_loop, &p_uimmL, &p_uimmS}); + + // xpulpvect + DEFINE_RTYPE(pv_add_h); + DEFINE_RTYPE(pv_add_sc_h); + DEFINE_PI1STYPE(pv_add_sci_h); + DEFINE_RTYPE(pv_add_b); + DEFINE_RTYPE(pv_add_sc_b); + DEFINE_PI1STYPE(pv_add_sci_b); + DEFINE_RTYPE(pv_add_h_div2); + DEFINE_RTYPE(pv_add_h_div4); + DEFINE_RTYPE(pv_add_h_div8); + DEFINE_RTYPE(pv_sub_h); + DEFINE_RTYPE(pv_sub_sc_h); + DEFINE_PI1STYPE(pv_sub_sci_h); + DEFINE_RTYPE(pv_sub_b); + DEFINE_RTYPE(pv_sub_sc_b); + DEFINE_PI1STYPE(pv_sub_sci_b); + DEFINE_RTYPE(pv_sub_h_div2); + DEFINE_RTYPE(pv_sub_h_div2); + DEFINE_RTYPE(pv_sub_h_div2); + DEFINE_RTYPE(pv_avg_h); + DEFINE_RTYPE(pv_avg_sc_h); + DEFINE_PI1STYPE(pv_avg_sci_h); + DEFINE_RTYPE(pv_avg_b); + DEFINE_RTYPE(pv_avg_sc_b); + DEFINE_PI1STYPE(pv_avg_sci_b); + DEFINE_RTYPE(pv_avgu_h); + DEFINE_RTYPE(pv_avgu_sc_h); + DEFINE_PI1ZTYPE(pv_avgu_sci_h); + DEFINE_RTYPE(pv_avgu_b); + DEFINE_RTYPE(pv_avgu_sc_b); + DEFINE_PI1ZTYPE(pv_avgu_sci_b); + + DEFINE_RTYPE(pv_min_h); + DEFINE_RTYPE(pv_min_sc_h); + DEFINE_PI1STYPE(pv_min_sci_h); + DEFINE_RTYPE(pv_min_b); + DEFINE_RTYPE(pv_min_sc_b); + DEFINE_PI1STYPE(pv_min_sci_b); + DEFINE_RTYPE(pv_minu_h); + DEFINE_RTYPE(pv_minu_sc_h); + DEFINE_PI1ZTYPE(pv_minu_sci_h); + DEFINE_RTYPE(pv_minu_b); + DEFINE_RTYPE(pv_minu_sc_b); + DEFINE_PI1ZTYPE(pv_minu_sci_b); + DEFINE_RTYPE(pv_max_h); + DEFINE_RTYPE(pv_max_sc_h); + DEFINE_PI1STYPE(pv_max_sci_h); + DEFINE_RTYPE(pv_max_b); + DEFINE_RTYPE(pv_max_sc_b); + DEFINE_PI1STYPE(pv_max_sci_b); + DEFINE_RTYPE(pv_maxu_h); + DEFINE_RTYPE(pv_maxu_sc_h); + DEFINE_PI1ZTYPE(pv_maxu_sci_h); + DEFINE_RTYPE(pv_maxu_b); + DEFINE_RTYPE(pv_maxu_sc_b); + DEFINE_PI1ZTYPE(pv_maxu_sci_b); + DEFINE_PI1ZTYPE(pv_and_sci_b); + DEFINE_R1TYPE(pv_abs_h); + DEFINE_R1TYPE(pv_abs_b); + + DEFINE_RTYPE(pv_srl_h); + DEFINE_RTYPE(pv_srl_sc_h); + DEFINE_PI1ZTYPE(pv_srl_sci_h); + DEFINE_RTYPE(pv_srl_b); + DEFINE_RTYPE(pv_srl_sc_b); + DEFINE_PI1ZTYPE(pv_srl_sci_b); + DEFINE_RTYPE(pv_sra_h); + DEFINE_RTYPE(pv_sra_sc_h); + DEFINE_PI1ZTYPE(pv_sra_sci_h); + DEFINE_RTYPE(pv_sra_b); + DEFINE_RTYPE(pv_sra_sc_b); + DEFINE_PI1ZTYPE(pv_sra_sci_b); + DEFINE_RTYPE(pv_sll_h); + DEFINE_RTYPE(pv_sll_sc_h); + DEFINE_PI1ZTYPE(pv_sll_sci_h); + DEFINE_RTYPE(pv_sll_b); + DEFINE_RTYPE(pv_sll_sc_b); + DEFINE_PI1ZTYPE(pv_sll_sci_b); + DEFINE_RTYPE(pv_or_h); + DEFINE_RTYPE(pv_or_sc_h); + DEFINE_PI1ZTYPE(pv_or_sci_h); + DEFINE_RTYPE(pv_or_b); + DEFINE_RTYPE(pv_or_sc_b); + DEFINE_PI1ZTYPE(pv_or_sci_b); + DEFINE_RTYPE(pv_xor_h); + DEFINE_RTYPE(pv_xor_sc_h); + DEFINE_PI1ZTYPE(pv_xor_sci_h); + DEFINE_RTYPE(pv_xor_b); + DEFINE_RTYPE(pv_xor_sc_b); + DEFINE_PI1ZTYPE(pv_xor_sci_b); + DEFINE_RTYPE(pv_and_h); + DEFINE_RTYPE(pv_and_sc_h); + DEFINE_PI1ZTYPE(pv_and_sci_h); + DEFINE_RTYPE(pv_and_b); + DEFINE_RTYPE(pv_and_sc_b); + + DEFINE_PI1ZTYPE(pv_extract_h); + DEFINE_PI1ZTYPE(pv_extract_b); + DEFINE_PI1ZTYPE(pv_extractu_h); + DEFINE_PI1ZTYPE(pv_extractu_b); + DEFINE_PI1ZTYPE(pv_insert_h); + DEFINE_PI1ZTYPE(pv_insert_b); + + DEFINE_RTYPE(pv_dotup_h); + DEFINE_RTYPE(pv_dotup_sc_h); + DEFINE_PI1ZTYPE(pv_dotup_sci_h); + DEFINE_RTYPE(pv_dotup_b); + DEFINE_RTYPE(pv_dotup_sc_b); + DEFINE_PI1ZTYPE(pv_dotup_sci_b); + DEFINE_RTYPE(pv_dotusp_h); + DEFINE_RTYPE(pv_dotusp_sc_h); + DEFINE_PI1STYPE(pv_dotusp_sci_h); + DEFINE_RTYPE(pv_dotusp_b); + DEFINE_RTYPE(pv_dotusp_sc_b); + DEFINE_PI1STYPE(pv_dotusp_sci_b); + DEFINE_RTYPE(pv_dotsp_h); + DEFINE_RTYPE(pv_dotsp_sc_h); + DEFINE_PI1STYPE(pv_dotsp_sci_h); + DEFINE_RTYPE(pv_dotsp_b); + DEFINE_RTYPE(pv_dotsp_sc_b); + DEFINE_PI1STYPE(pv_dotsp_sci_b); + DEFINE_RTYPE(pv_sdotup_h); + DEFINE_RTYPE(pv_sdotup_sc_h); + DEFINE_PI1ZTYPE(pv_sdotup_sci_h); + DEFINE_RTYPE(pv_sdotup_b); + DEFINE_RTYPE(pv_sdotup_sc_b); + DEFINE_PI1ZTYPE(pv_sdotup_sci_b); + DEFINE_RTYPE(pv_sdotusp_h); + DEFINE_RTYPE(pv_sdotusp_sc_h); + DEFINE_PI1STYPE(pv_sdotusp_sci_h); + DEFINE_RTYPE(pv_sdotusp_b); + DEFINE_RTYPE(pv_sdotusp_sc_b); + DEFINE_PI1STYPE(pv_sdotusp_sci_b); + DEFINE_RTYPE(pv_sdotsp_h); + DEFINE_RTYPE(pv_sdotsp_sc_h); + DEFINE_PI1STYPE(pv_sdotsp_sci_h); + DEFINE_RTYPE(pv_sdotsp_b); + DEFINE_RTYPE(pv_sdotsp_sc_b); + DEFINE_PI1STYPE(pv_sdotsp_sci_b); + + DEFINE_RTYPE(pv_cmpeq_h); + DEFINE_RTYPE(pv_cmpeq_sc_h); + DEFINE_PI1ZTYPE(pv_cmpeq_sci_h); + DEFINE_RTYPE(pv_cmpeq_b); + DEFINE_RTYPE(pv_cmpeq_sc_b); + DEFINE_PI1ZTYPE(pv_cmpeq_sci_b); + DEFINE_RTYPE(pv_cmpne_h); + DEFINE_RTYPE(pv_cmpne_sc_h); + DEFINE_PI1ZTYPE(pv_cmpne_sci_h); + DEFINE_RTYPE(pv_cmpne_b); + DEFINE_RTYPE(pv_cmpne_sc_b); + DEFINE_PI1ZTYPE(pv_cmpne_sci_b); + DEFINE_RTYPE(pv_cmpgt_h); + DEFINE_RTYPE(pv_cmpgt_sc_h); + DEFINE_PI1STYPE(pv_cmpgt_sci_h); + DEFINE_RTYPE(pv_cmpgt_b); + DEFINE_RTYPE(pv_cmpgt_sc_b); + DEFINE_PI1STYPE(pv_cmpgt_sci_b); + DEFINE_RTYPE(pv_cmpge_h); + DEFINE_RTYPE(pv_cmpge_sc_h); + DEFINE_PI1STYPE(pv_cmpge_sci_h); + DEFINE_RTYPE(pv_cmpge_b); + DEFINE_RTYPE(pv_cmpge_sc_b); + DEFINE_PI1STYPE(pv_cmpge_sci_b); + DEFINE_RTYPE(pv_cmplt_h); + DEFINE_RTYPE(pv_cmplt_sc_h); + DEFINE_PI1STYPE(pv_cmplt_sci_h); + DEFINE_RTYPE(pv_cmplt_b); + DEFINE_RTYPE(pv_cmplt_sc_b); + DEFINE_PI1STYPE(pv_cmplt_sci_b); + DEFINE_RTYPE(pv_cmple_h); + DEFINE_RTYPE(pv_cmple_sc_h); + DEFINE_PI1STYPE(pv_cmple_sci_h); + DEFINE_RTYPE(pv_cmple_b); + DEFINE_RTYPE(pv_cmple_sc_b); + DEFINE_PI1STYPE(pv_cmple_sci_b); + DEFINE_RTYPE(pv_cmpgtu_h); + DEFINE_RTYPE(pv_cmpgtu_sc_h); + DEFINE_PI1ZTYPE(pv_cmpgtu_sci_h); + DEFINE_RTYPE(pv_cmpgtu_b); + DEFINE_RTYPE(pv_cmpgtu_sc_b); + DEFINE_PI1ZTYPE(pv_cmpgtu_sci_b); + DEFINE_RTYPE(pv_cmpgeu_h); + DEFINE_RTYPE(pv_cmpgeu_sc_h); + DEFINE_PI1ZTYPE(pv_cmpgeu_sci_h); + DEFINE_RTYPE(pv_cmpgeu_b); + DEFINE_RTYPE(pv_cmpgeu_sc_b); + DEFINE_PI1ZTYPE(pv_cmpgeu_sci_b); + DEFINE_RTYPE(pv_cmpltu_h); + DEFINE_RTYPE(pv_cmpltu_sc_h); + DEFINE_PI1ZTYPE(pv_cmpltu_sci_h); + DEFINE_RTYPE(pv_cmpltu_b); + DEFINE_RTYPE(pv_cmpltu_sc_b); + DEFINE_PI1ZTYPE(pv_cmpltu_sci_b); + DEFINE_RTYPE(pv_cmpleu_h); + DEFINE_RTYPE(pv_cmpleu_sc_h); + DEFINE_PI1ZTYPE(pv_cmpleu_sci_h); + DEFINE_RTYPE(pv_cmpleu_b); + DEFINE_RTYPE(pv_cmpleu_sc_b); + DEFINE_PI1ZTYPE(pv_cmpleu_sci_b); + + // xpulpvectcomplex + DEFINE_R1TYPE(pv_cplxconj_h); + DEFINE_RTYPE(pv_subrotmj_h); + DEFINE_RTYPE(pv_subrotmj_h_div2); + DEFINE_RTYPE(pv_subrotmj_h_div4); + DEFINE_RTYPE(pv_subrotmj_h_div8); + DEFINE_RTYPE(pv_cplxmul_h_r); + DEFINE_RTYPE(pv_cplxmul_h_r_div2); + DEFINE_RTYPE(pv_cplxmul_h_r_div4); + DEFINE_RTYPE(pv_cplxmul_h_r_div8); + DEFINE_RTYPE(pv_cplxmul_h_i); + DEFINE_RTYPE(pv_cplxmul_h_i_div2); + DEFINE_RTYPE(pv_cplxmul_h_i_div4); + DEFINE_RTYPE(pv_cplxmul_h_i_div8); + + // xpulpvectshufflepack + DEFINE_RTYPE(pv_shuffle_h); + DEFINE_PI1ZTYPE(pv_shuffle_sci_h); + DEFINE_RTYPE(pv_shuffle_b); + DEFINE_PI1ZTYPE(pv_shufflei0_sci_b); + DEFINE_PI1ZTYPE(pv_shufflei1_sci_b); + DEFINE_PI1ZTYPE(pv_shufflei2_sci_b); + DEFINE_PI1ZTYPE(pv_shufflei3_sci_b); + DEFINE_RTYPE(pv_shuffle2_h); + DEFINE_RTYPE(pv_shuffle2_b); + DEFINE_RTYPE(pv_pack); + DEFINE_RTYPE(pv_pack_h); + DEFINE_RTYPE(pv_packhi_b); + DEFINE_RTYPE(pv_packlo_b); + + // provide a default disassembly for all instructions as a fallback + #define DECLARE_INSN(code, match, mask) \ + add_insn(new disasm_insn_t(#code " (args unknown)", match, mask, {})); + #include "encoding.h" + #undef DECLARE_INSN +} + +const disasm_insn_t* disassembler_t::lookup(insn_t insn) const +{ + size_t idx = insn.bits() % HASH_SIZE; + for (size_t j = 0; j < chain[idx].size(); j++) + if(*chain[idx][j] == insn) + return chain[idx][j]; + + idx = HASH_SIZE; + for (size_t j = 0; j < chain[idx].size(); j++) + if(*chain[idx][j] == insn) + return chain[idx][j]; + + return NULL; +} + +void NOINLINE disassembler_t::add_insn(disasm_insn_t* insn) +{ + size_t idx = HASH_SIZE; + if (insn->get_mask() % HASH_SIZE == HASH_SIZE - 1) + idx = insn->get_match() % HASH_SIZE; + chain[idx].push_back(insn); +} + +disassembler_t::~disassembler_t() +{ + for (size_t i = 0; i < HASH_SIZE+1; i++) + for (size_t j = 0; j < chain[i].size(); j++) + delete chain[i][j]; +} diff --git a/disasm/disasm.mk.in b/disasm/disasm.mk.in new file mode 100644 index 0000000000..039a717f90 --- /dev/null +++ b/disasm/disasm.mk.in @@ -0,0 +1,7 @@ +disasm_CFLAGS = -fPIC + +disasm_srcs = \ + disasm.cc \ + regnames.cc \ + +disasm_install_lib = yes diff --git a/riscv/regnames.cc b/disasm/regnames.cc similarity index 75% rename from riscv/regnames.cc rename to disasm/regnames.cc index 0bf8d9c6e9..0a7fd4d22c 100644 --- a/riscv/regnames.cc +++ b/disasm/regnames.cc @@ -16,6 +16,13 @@ const char* fpr_name[] = { "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11" }; +const char* vr_name[] = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" +}; + const char* csr_name(int which) { switch (which) { #define DECLARE_CSR(name, number) case number: return #name; diff --git a/dummy_rocc/dummy_rocc.mk.in b/dummy_rocc/dummy_rocc.mk.in deleted file mode 100644 index 0143ffd1c3..0000000000 --- a/dummy_rocc/dummy_rocc.mk.in +++ /dev/null @@ -1,7 +0,0 @@ -dummy_rocc_subproject_deps = \ - spike_main \ - riscv \ - softfloat \ - -dummy_rocc_srcs = \ - dummy_rocc.cc \ diff --git a/fdt/fdt.ac b/fdt/fdt.ac new file mode 100644 index 0000000000..e69de29bb2 diff --git a/fdt/fdt.c b/fdt/fdt.c new file mode 100644 index 0000000000..d6ce7c052d --- /dev/null +++ b/fdt/fdt.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +/* + * Minimal sanity check for a read-only tree. fdt_ro_probe_() checks + * that the given buffer contains what appears to be a flattened + * device tree with sane information in its header. + */ +int32_t fdt_ro_probe_(const void *fdt) +{ + uint32_t totalsize = fdt_totalsize(fdt); + + if (fdt_magic(fdt) == FDT_MAGIC) { + /* Complete tree */ + if (fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION) + return -FDT_ERR_BADVERSION; + if (fdt_last_comp_version(fdt) > FDT_LAST_SUPPORTED_VERSION) + return -FDT_ERR_BADVERSION; + } else if (fdt_magic(fdt) == FDT_SW_MAGIC) { + /* Unfinished sequential-write blob */ + if (fdt_size_dt_struct(fdt) == 0) + return -FDT_ERR_BADSTATE; + } else { + return -FDT_ERR_BADMAGIC; + } + + if (totalsize < INT32_MAX) + return totalsize; + else + return -FDT_ERR_TRUNCATED; +} + +static int check_off_(uint32_t hdrsize, uint32_t totalsize, uint32_t off) +{ + return (off >= hdrsize) && (off <= totalsize); +} + +static int check_block_(uint32_t hdrsize, uint32_t totalsize, + uint32_t base, uint32_t size) +{ + if (!check_off_(hdrsize, totalsize, base)) + return 0; /* block start out of bounds */ + if ((base + size) < base) + return 0; /* overflow */ + if (!check_off_(hdrsize, totalsize, base + size)) + return 0; /* block end out of bounds */ + return 1; +} + +size_t fdt_header_size_(uint32_t version) +{ + if (version <= 1) + return FDT_V1_SIZE; + else if (version <= 2) + return FDT_V2_SIZE; + else if (version <= 3) + return FDT_V3_SIZE; + else if (version <= 16) + return FDT_V16_SIZE; + else + return FDT_V17_SIZE; +} + +int fdt_check_header(const void *fdt) +{ + size_t hdrsize; + + if (fdt_magic(fdt) != FDT_MAGIC) + return -FDT_ERR_BADMAGIC; + hdrsize = fdt_header_size(fdt); + if ((fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION) + || (fdt_last_comp_version(fdt) > FDT_LAST_SUPPORTED_VERSION)) + return -FDT_ERR_BADVERSION; + if (fdt_version(fdt) < fdt_last_comp_version(fdt)) + return -FDT_ERR_BADVERSION; + + if ((fdt_totalsize(fdt) < hdrsize) + || (fdt_totalsize(fdt) > INT_MAX)) + return -FDT_ERR_TRUNCATED; + + /* Bounds check memrsv block */ + if (!check_off_(hdrsize, fdt_totalsize(fdt), fdt_off_mem_rsvmap(fdt))) + return -FDT_ERR_TRUNCATED; + + /* Bounds check structure block */ + if (fdt_version(fdt) < 17) { + if (!check_off_(hdrsize, fdt_totalsize(fdt), + fdt_off_dt_struct(fdt))) + return -FDT_ERR_TRUNCATED; + } else { + if (!check_block_(hdrsize, fdt_totalsize(fdt), + fdt_off_dt_struct(fdt), + fdt_size_dt_struct(fdt))) + return -FDT_ERR_TRUNCATED; + } + + /* Bounds check strings block */ + if (!check_block_(hdrsize, fdt_totalsize(fdt), + fdt_off_dt_strings(fdt), fdt_size_dt_strings(fdt))) + return -FDT_ERR_TRUNCATED; + + return 0; +} + +const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int len) +{ + unsigned absoffset = offset + fdt_off_dt_struct(fdt); + + if ((absoffset < offset) + || ((absoffset + len) < absoffset) + || (absoffset + len) > fdt_totalsize(fdt)) + return NULL; + + if (fdt_version(fdt) >= 0x11) + if (((offset + len) < offset) + || ((offset + len) > fdt_size_dt_struct(fdt))) + return NULL; + + return fdt_offset_ptr_(fdt, offset); +} + +uint32_t fdt_next_tag(const void *fdt, int startoffset, int *nextoffset) +{ + const fdt32_t *tagp, *lenp; + uint32_t tag; + int offset = startoffset; + const char *p; + + *nextoffset = -FDT_ERR_TRUNCATED; + tagp = fdt_offset_ptr(fdt, offset, FDT_TAGSIZE); + if (!tagp) + return FDT_END; /* premature end */ + tag = fdt32_to_cpu(*tagp); + offset += FDT_TAGSIZE; + + *nextoffset = -FDT_ERR_BADSTRUCTURE; + switch (tag) { + case FDT_BEGIN_NODE: + /* skip name */ + do { + p = fdt_offset_ptr(fdt, offset++, 1); + } while (p && (*p != '\0')); + if (!p) + return FDT_END; /* premature end */ + break; + + case FDT_PROP: + lenp = fdt_offset_ptr(fdt, offset, sizeof(*lenp)); + if (!lenp) + return FDT_END; /* premature end */ + /* skip-name offset, length and value */ + offset += sizeof(struct fdt_property) - FDT_TAGSIZE + + fdt32_to_cpu(*lenp); + if (fdt_version(fdt) < 0x10 && fdt32_to_cpu(*lenp) >= 8 && + ((offset - fdt32_to_cpu(*lenp)) % 8) != 0) + offset += 4; + break; + + case FDT_END: + case FDT_END_NODE: + case FDT_NOP: + break; + + default: + return FDT_END; + } + + if (!fdt_offset_ptr(fdt, startoffset, offset - startoffset)) + return FDT_END; /* premature end */ + + *nextoffset = FDT_TAGALIGN(offset); + return tag; +} + +int fdt_check_node_offset_(const void *fdt, int offset) +{ + if ((offset < 0) || (offset % FDT_TAGSIZE) + || (fdt_next_tag(fdt, offset, &offset) != FDT_BEGIN_NODE)) + return -FDT_ERR_BADOFFSET; + + return offset; +} + +int fdt_check_prop_offset_(const void *fdt, int offset) +{ + if ((offset < 0) || (offset % FDT_TAGSIZE) + || (fdt_next_tag(fdt, offset, &offset) != FDT_PROP)) + return -FDT_ERR_BADOFFSET; + + return offset; +} + +int fdt_next_node(const void *fdt, int offset, int *depth) +{ + int nextoffset = 0; + uint32_t tag; + + if (offset >= 0) + if ((nextoffset = fdt_check_node_offset_(fdt, offset)) < 0) + return nextoffset; + + do { + offset = nextoffset; + tag = fdt_next_tag(fdt, offset, &nextoffset); + + switch (tag) { + case FDT_PROP: + case FDT_NOP: + break; + + case FDT_BEGIN_NODE: + if (depth) + (*depth)++; + break; + + case FDT_END_NODE: + if (depth && ((--(*depth)) < 0)) + return nextoffset; + break; + + case FDT_END: + if ((nextoffset >= 0) + || ((nextoffset == -FDT_ERR_TRUNCATED) && !depth)) + return -FDT_ERR_NOTFOUND; + else + return nextoffset; + } + } while (tag != FDT_BEGIN_NODE); + + return offset; +} + +int fdt_first_subnode(const void *fdt, int offset) +{ + int depth = 0; + + offset = fdt_next_node(fdt, offset, &depth); + if (offset < 0 || depth != 1) + return -FDT_ERR_NOTFOUND; + + return offset; +} + +int fdt_next_subnode(const void *fdt, int offset) +{ + int depth = 1; + + /* + * With respect to the parent, the depth of the next subnode will be + * the same as the last. + */ + do { + offset = fdt_next_node(fdt, offset, &depth); + if (offset < 0 || depth < 1) + return -FDT_ERR_NOTFOUND; + } while (depth > 1); + + return offset; +} + +const char *fdt_find_string_(const char *strtab, int tabsize, const char *s) +{ + int len = strlen(s) + 1; + const char *last = strtab + tabsize - len; + const char *p; + + for (p = strtab; p <= last; p++) + if (memcmp(p, s, len) == 0) + return p; + return NULL; +} + +int fdt_move(const void *fdt, void *buf, int bufsize) +{ + FDT_RO_PROBE(fdt); + + if (fdt_totalsize(fdt) > bufsize) + return -FDT_ERR_NOSPACE; + + memmove(buf, fdt, fdt_totalsize(fdt)); + return 0; +} diff --git a/fdt/fdt.h b/fdt/fdt.h new file mode 100644 index 0000000000..f2e68807f2 --- /dev/null +++ b/fdt/fdt.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ +#ifndef FDT_H +#define FDT_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * Copyright 2012 Kim Phillips, Freescale Semiconductor. + */ + +#ifndef __ASSEMBLY__ + +struct fdt_header { + fdt32_t magic; /* magic word FDT_MAGIC */ + fdt32_t totalsize; /* total size of DT block */ + fdt32_t off_dt_struct; /* offset to structure */ + fdt32_t off_dt_strings; /* offset to strings */ + fdt32_t off_mem_rsvmap; /* offset to memory reserve map */ + fdt32_t version; /* format version */ + fdt32_t last_comp_version; /* last compatible version */ + + /* version 2 fields below */ + fdt32_t boot_cpuid_phys; /* Which physical CPU id we're + booting on */ + /* version 3 fields below */ + fdt32_t size_dt_strings; /* size of the strings block */ + + /* version 17 fields below */ + fdt32_t size_dt_struct; /* size of the structure block */ +}; + +struct fdt_reserve_entry { + fdt64_t address; + fdt64_t size; +}; + +struct fdt_node_header { + fdt32_t tag; + char name[0]; +}; + +struct fdt_property { + fdt32_t tag; + fdt32_t len; + fdt32_t nameoff; + char data[0]; +}; + +#endif /* !__ASSEMBLY */ + +#define FDT_MAGIC 0xd00dfeed /* 4: version, 4: total size */ +#define FDT_TAGSIZE sizeof(fdt32_t) + +#define FDT_BEGIN_NODE 0x1 /* Start node: full name */ +#define FDT_END_NODE 0x2 /* End node */ +#define FDT_PROP 0x3 /* Property: name off, + size, content */ +#define FDT_NOP 0x4 /* nop */ +#define FDT_END 0x9 + +#define FDT_V1_SIZE (7*sizeof(fdt32_t)) +#define FDT_V2_SIZE (FDT_V1_SIZE + sizeof(fdt32_t)) +#define FDT_V3_SIZE (FDT_V2_SIZE + sizeof(fdt32_t)) +#define FDT_V16_SIZE FDT_V3_SIZE +#define FDT_V17_SIZE (FDT_V16_SIZE + sizeof(fdt32_t)) + +#endif /* FDT_H */ diff --git a/fdt/fdt.mk.in b/fdt/fdt.mk.in new file mode 100644 index 0000000000..273375efb4 --- /dev/null +++ b/fdt/fdt.mk.in @@ -0,0 +1,17 @@ +fdt_subproject_deps = \ + +fdt_hdrs = \ + fdt.h \ + libfdt.h \ + libfdt_env.h \ + +fdt_c_srcs = \ + fdt.c \ + fdt_ro.c \ + fdt_wip.c \ + fdt_sw.c \ + fdt_rw.c \ + fdt_strerror.c \ + fdt_empty_tree.c \ + fdt_addresses.c \ + fdt_overlay.c \ diff --git a/fdt/fdt_addresses.c b/fdt/fdt_addresses.c new file mode 100644 index 0000000000..9a82cd0ba2 --- /dev/null +++ b/fdt/fdt_addresses.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2014 David Gibson + * Copyright (C) 2018 embedded brains GmbH + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +static int fdt_cells(const void *fdt, int nodeoffset, const char *name) +{ + const fdt32_t *c; + uint32_t val; + int len; + + c = fdt_getprop(fdt, nodeoffset, name, &len); + if (!c) + return len; + + if (len != sizeof(*c)) + return -FDT_ERR_BADNCELLS; + + val = fdt32_to_cpu(*c); + if (val > FDT_MAX_NCELLS) + return -FDT_ERR_BADNCELLS; + + return (int)val; +} + +int fdt_address_cells(const void *fdt, int nodeoffset) +{ + int val; + + val = fdt_cells(fdt, nodeoffset, "#address-cells"); + if (val == 0) + return -FDT_ERR_BADNCELLS; + if (val == -FDT_ERR_NOTFOUND) + return 2; + return val; +} + +int fdt_size_cells(const void *fdt, int nodeoffset) +{ + int val; + + val = fdt_cells(fdt, nodeoffset, "#size-cells"); + if (val == -FDT_ERR_NOTFOUND) + return 1; + return val; +} + +/* This function assumes that [address|size]_cells is 1 or 2 */ +int fdt_appendprop_addrrange(void *fdt, int parent, int nodeoffset, + const char *name, uint64_t addr, uint64_t size) +{ + int addr_cells, size_cells, ret; + uint8_t data[sizeof(fdt64_t) * 2], *prop; + + ret = fdt_address_cells(fdt, parent); + if (ret < 0) + return ret; + addr_cells = ret; + + ret = fdt_size_cells(fdt, parent); + if (ret < 0) + return ret; + size_cells = ret; + + /* check validity of address */ + prop = data; + if (addr_cells == 1) { + if ((addr > UINT32_MAX) || ((UINT32_MAX + 1 - addr) < size)) + return -FDT_ERR_BADVALUE; + + fdt32_st(prop, (uint32_t)addr); + } else if (addr_cells == 2) { + fdt64_st(prop, addr); + } else { + return -FDT_ERR_BADNCELLS; + } + + /* check validity of size */ + prop += addr_cells * sizeof(fdt32_t); + if (size_cells == 1) { + if (size > UINT32_MAX) + return -FDT_ERR_BADVALUE; + + fdt32_st(prop, (uint32_t)size); + } else if (size_cells == 2) { + fdt64_st(prop, size); + } else { + return -FDT_ERR_BADNCELLS; + } + + return fdt_appendprop(fdt, nodeoffset, name, data, + (addr_cells + size_cells) * sizeof(fdt32_t)); +} diff --git a/fdt/fdt_empty_tree.c b/fdt/fdt_empty_tree.c new file mode 100644 index 0000000000..49d54d44b8 --- /dev/null +++ b/fdt/fdt_empty_tree.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2012 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +int fdt_create_empty_tree(void *buf, int bufsize) +{ + int err; + + err = fdt_create(buf, bufsize); + if (err) + return err; + + err = fdt_finish_reservemap(buf); + if (err) + return err; + + err = fdt_begin_node(buf, ""); + if (err) + return err; + + err = fdt_end_node(buf); + if (err) + return err; + + err = fdt_finish(buf); + if (err) + return err; + + return fdt_open_into(buf, buf, bufsize); +} diff --git a/fdt/fdt_overlay.c b/fdt/fdt_overlay.c new file mode 100644 index 0000000000..be71873366 --- /dev/null +++ b/fdt/fdt_overlay.c @@ -0,0 +1,881 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2016 Free Electrons + * Copyright (C) 2016 NextThing Co. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +/** + * overlay_get_target_phandle - retrieves the target phandle of a fragment + * @fdto: pointer to the device tree overlay blob + * @fragment: node offset of the fragment in the overlay + * + * overlay_get_target_phandle() retrieves the target phandle of an + * overlay fragment when that fragment uses a phandle (target + * property) instead of a path (target-path property). + * + * returns: + * the phandle pointed by the target property + * 0, if the phandle was not found + * -1, if the phandle was malformed + */ +static uint32_t overlay_get_target_phandle(const void *fdto, int fragment) +{ + const fdt32_t *val; + int len; + + val = fdt_getprop(fdto, fragment, "target", &len); + if (!val) + return 0; + + if ((len != sizeof(*val)) || (fdt32_to_cpu(*val) == (uint32_t)-1)) + return (uint32_t)-1; + + return fdt32_to_cpu(*val); +} + +/** + * overlay_get_target - retrieves the offset of a fragment's target + * @fdt: Base device tree blob + * @fdto: Device tree overlay blob + * @fragment: node offset of the fragment in the overlay + * @pathp: pointer which receives the path of the target (or NULL) + * + * overlay_get_target() retrieves the target offset in the base + * device tree of a fragment, no matter how the actual targeting is + * done (through a phandle or a path) + * + * returns: + * the targeted node offset in the base device tree + * Negative error code on error + */ +static int overlay_get_target(const void *fdt, const void *fdto, + int fragment, char const **pathp) +{ + uint32_t phandle; + const char *path = NULL; + int path_len = 0, ret; + + /* Try first to do a phandle based lookup */ + phandle = overlay_get_target_phandle(fdto, fragment); + if (phandle == (uint32_t)-1) + return -FDT_ERR_BADPHANDLE; + + /* no phandle, try path */ + if (!phandle) { + /* And then a path based lookup */ + path = fdt_getprop(fdto, fragment, "target-path", &path_len); + if (path) + ret = fdt_path_offset(fdt, path); + else + ret = path_len; + } else + ret = fdt_node_offset_by_phandle(fdt, phandle); + + /* + * If we haven't found either a target or a + * target-path property in a node that contains a + * __overlay__ subnode (we wouldn't be called + * otherwise), consider it a improperly written + * overlay + */ + if (ret < 0 && path_len == -FDT_ERR_NOTFOUND) + ret = -FDT_ERR_BADOVERLAY; + + /* return on error */ + if (ret < 0) + return ret; + + /* return pointer to path (if available) */ + if (pathp) + *pathp = path ? path : NULL; + + return ret; +} + +/** + * overlay_phandle_add_offset - Increases a phandle by an offset + * @fdt: Base device tree blob + * @node: Device tree overlay blob + * @name: Name of the property to modify (phandle or linux,phandle) + * @delta: offset to apply + * + * overlay_phandle_add_offset() increments a node phandle by a given + * offset. + * + * returns: + * 0 on success. + * Negative error code on error + */ +static int overlay_phandle_add_offset(void *fdt, int node, + const char *name, uint32_t delta) +{ + const fdt32_t *val; + uint32_t adj_val; + int len; + + val = fdt_getprop(fdt, node, name, &len); + if (!val) + return len; + + if (len != sizeof(*val)) + return -FDT_ERR_BADPHANDLE; + + adj_val = fdt32_to_cpu(*val); + if ((adj_val + delta) < adj_val) + return -FDT_ERR_NOPHANDLES; + + adj_val += delta; + if (adj_val == (uint32_t)-1) + return -FDT_ERR_NOPHANDLES; + + return fdt_setprop_inplace_u32(fdt, node, name, adj_val); +} + +/** + * overlay_adjust_node_phandles - Offsets the phandles of a node + * @fdto: Device tree overlay blob + * @node: Offset of the node we want to adjust + * @delta: Offset to shift the phandles of + * + * overlay_adjust_node_phandles() adds a constant to all the phandles + * of a given node. This is mainly use as part of the overlay + * application process, when we want to update all the overlay + * phandles to not conflict with the overlays of the base device tree. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_adjust_node_phandles(void *fdto, int node, + uint32_t delta) +{ + int child; + int ret; + + ret = overlay_phandle_add_offset(fdto, node, "phandle", delta); + if (ret && ret != -FDT_ERR_NOTFOUND) + return ret; + + ret = overlay_phandle_add_offset(fdto, node, "linux,phandle", delta); + if (ret && ret != -FDT_ERR_NOTFOUND) + return ret; + + fdt_for_each_subnode(child, fdto, node) { + ret = overlay_adjust_node_phandles(fdto, child, delta); + if (ret) + return ret; + } + + return 0; +} + +/** + * overlay_adjust_local_phandles - Adjust the phandles of a whole overlay + * @fdto: Device tree overlay blob + * @delta: Offset to shift the phandles of + * + * overlay_adjust_local_phandles() adds a constant to all the + * phandles of an overlay. This is mainly use as part of the overlay + * application process, when we want to update all the overlay + * phandles to not conflict with the overlays of the base device tree. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_adjust_local_phandles(void *fdto, uint32_t delta) +{ + /* + * Start adjusting the phandles from the overlay root + */ + return overlay_adjust_node_phandles(fdto, 0, delta); +} + +/** + * overlay_update_local_node_references - Adjust the overlay references + * @fdto: Device tree overlay blob + * @tree_node: Node offset of the node to operate on + * @fixup_node: Node offset of the matching local fixups node + * @delta: Offset to shift the phandles of + * + * overlay_update_local_nodes_references() update the phandles + * pointing to a node within the device tree overlay by adding a + * constant delta. + * + * This is mainly used as part of a device tree application process, + * where you want the device tree overlays phandles to not conflict + * with the ones from the base device tree before merging them. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_update_local_node_references(void *fdto, + int tree_node, + int fixup_node, + uint32_t delta) +{ + int fixup_prop; + int fixup_child; + int ret; + + fdt_for_each_property_offset(fixup_prop, fdto, fixup_node) { + const fdt32_t *fixup_val; + const char *tree_val; + const char *name; + int fixup_len; + int tree_len; + int i; + + fixup_val = fdt_getprop_by_offset(fdto, fixup_prop, + &name, &fixup_len); + if (!fixup_val) + return fixup_len; + + if (fixup_len % sizeof(uint32_t)) + return -FDT_ERR_BADOVERLAY; + + tree_val = fdt_getprop(fdto, tree_node, name, &tree_len); + if (!tree_val) { + if (tree_len == -FDT_ERR_NOTFOUND) + return -FDT_ERR_BADOVERLAY; + + return tree_len; + } + + for (i = 0; i < (fixup_len / sizeof(uint32_t)); i++) { + fdt32_t adj_val; + uint32_t poffset; + + poffset = fdt32_to_cpu(fixup_val[i]); + + /* + * phandles to fixup can be unaligned. + * + * Use a memcpy for the architectures that do + * not support unaligned accesses. + */ + memcpy(&adj_val, tree_val + poffset, sizeof(adj_val)); + + adj_val = cpu_to_fdt32(fdt32_to_cpu(adj_val) + delta); + + ret = fdt_setprop_inplace_namelen_partial(fdto, + tree_node, + name, + strlen(name), + poffset, + &adj_val, + sizeof(adj_val)); + if (ret == -FDT_ERR_NOSPACE) + return -FDT_ERR_BADOVERLAY; + + if (ret) + return ret; + } + } + + fdt_for_each_subnode(fixup_child, fdto, fixup_node) { + const char *fixup_child_name = fdt_get_name(fdto, fixup_child, + NULL); + int tree_child; + + tree_child = fdt_subnode_offset(fdto, tree_node, + fixup_child_name); + if (tree_child == -FDT_ERR_NOTFOUND) + return -FDT_ERR_BADOVERLAY; + if (tree_child < 0) + return tree_child; + + ret = overlay_update_local_node_references(fdto, + tree_child, + fixup_child, + delta); + if (ret) + return ret; + } + + return 0; +} + +/** + * overlay_update_local_references - Adjust the overlay references + * @fdto: Device tree overlay blob + * @delta: Offset to shift the phandles of + * + * overlay_update_local_references() update all the phandles pointing + * to a node within the device tree overlay by adding a constant + * delta to not conflict with the base overlay. + * + * This is mainly used as part of a device tree application process, + * where you want the device tree overlays phandles to not conflict + * with the ones from the base device tree before merging them. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_update_local_references(void *fdto, uint32_t delta) +{ + int fixups; + + fixups = fdt_path_offset(fdto, "/__local_fixups__"); + if (fixups < 0) { + /* There's no local phandles to adjust, bail out */ + if (fixups == -FDT_ERR_NOTFOUND) + return 0; + + return fixups; + } + + /* + * Update our local references from the root of the tree + */ + return overlay_update_local_node_references(fdto, 0, fixups, + delta); +} + +/** + * overlay_fixup_one_phandle - Set an overlay phandle to the base one + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * @symbols_off: Node offset of the symbols node in the base device tree + * @path: Path to a node holding a phandle in the overlay + * @path_len: number of path characters to consider + * @name: Name of the property holding the phandle reference in the overlay + * @name_len: number of name characters to consider + * @poffset: Offset within the overlay property where the phandle is stored + * @label: Label of the node referenced by the phandle + * + * overlay_fixup_one_phandle() resolves an overlay phandle pointing to + * a node in the base device tree. + * + * This is part of the device tree overlay application process, when + * you want all the phandles in the overlay to point to the actual + * base dt nodes. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_fixup_one_phandle(void *fdt, void *fdto, + int symbols_off, + const char *path, uint32_t path_len, + const char *name, uint32_t name_len, + int poffset, const char *label) +{ + const char *symbol_path; + uint32_t phandle; + fdt32_t phandle_prop; + int symbol_off, fixup_off; + int prop_len; + + if (symbols_off < 0) + return symbols_off; + + symbol_path = fdt_getprop(fdt, symbols_off, label, + &prop_len); + if (!symbol_path) + return prop_len; + + symbol_off = fdt_path_offset(fdt, symbol_path); + if (symbol_off < 0) + return symbol_off; + + phandle = fdt_get_phandle(fdt, symbol_off); + if (!phandle) + return -FDT_ERR_NOTFOUND; + + fixup_off = fdt_path_offset_namelen(fdto, path, path_len); + if (fixup_off == -FDT_ERR_NOTFOUND) + return -FDT_ERR_BADOVERLAY; + if (fixup_off < 0) + return fixup_off; + + phandle_prop = cpu_to_fdt32(phandle); + return fdt_setprop_inplace_namelen_partial(fdto, fixup_off, + name, name_len, poffset, + &phandle_prop, + sizeof(phandle_prop)); +}; + +/** + * overlay_fixup_phandle - Set an overlay phandle to the base one + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * @symbols_off: Node offset of the symbols node in the base device tree + * @property: Property offset in the overlay holding the list of fixups + * + * overlay_fixup_phandle() resolves all the overlay phandles pointed + * to in a __fixups__ property, and updates them to match the phandles + * in use in the base device tree. + * + * This is part of the device tree overlay application process, when + * you want all the phandles in the overlay to point to the actual + * base dt nodes. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_fixup_phandle(void *fdt, void *fdto, int symbols_off, + int property) +{ + const char *value; + const char *label; + int len; + + value = fdt_getprop_by_offset(fdto, property, + &label, &len); + if (!value) { + if (len == -FDT_ERR_NOTFOUND) + return -FDT_ERR_INTERNAL; + + return len; + } + + do { + const char *path, *name, *fixup_end; + const char *fixup_str = value; + uint32_t path_len, name_len; + uint32_t fixup_len; + char *sep, *endptr; + int poffset, ret; + + fixup_end = memchr(value, '\0', len); + if (!fixup_end) + return -FDT_ERR_BADOVERLAY; + fixup_len = fixup_end - fixup_str; + + len -= fixup_len + 1; + value += fixup_len + 1; + + path = fixup_str; + sep = memchr(fixup_str, ':', fixup_len); + if (!sep || *sep != ':') + return -FDT_ERR_BADOVERLAY; + + path_len = sep - path; + if (path_len == (fixup_len - 1)) + return -FDT_ERR_BADOVERLAY; + + fixup_len -= path_len + 1; + name = sep + 1; + sep = memchr(name, ':', fixup_len); + if (!sep || *sep != ':') + return -FDT_ERR_BADOVERLAY; + + name_len = sep - name; + if (!name_len) + return -FDT_ERR_BADOVERLAY; + + poffset = strtoul(sep + 1, &endptr, 10); + if ((*endptr != '\0') || (endptr <= (sep + 1))) + return -FDT_ERR_BADOVERLAY; + + ret = overlay_fixup_one_phandle(fdt, fdto, symbols_off, + path, path_len, name, name_len, + poffset, label); + if (ret) + return ret; + } while (len > 0); + + return 0; +} + +/** + * overlay_fixup_phandles - Resolve the overlay phandles to the base + * device tree + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * + * overlay_fixup_phandles() resolves all the overlay phandles pointing + * to nodes in the base device tree. + * + * This is one of the steps of the device tree overlay application + * process, when you want all the phandles in the overlay to point to + * the actual base dt nodes. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_fixup_phandles(void *fdt, void *fdto) +{ + int fixups_off, symbols_off; + int property; + + /* We can have overlays without any fixups */ + fixups_off = fdt_path_offset(fdto, "/__fixups__"); + if (fixups_off == -FDT_ERR_NOTFOUND) + return 0; /* nothing to do */ + if (fixups_off < 0) + return fixups_off; + + /* And base DTs without symbols */ + symbols_off = fdt_path_offset(fdt, "/__symbols__"); + if ((symbols_off < 0 && (symbols_off != -FDT_ERR_NOTFOUND))) + return symbols_off; + + fdt_for_each_property_offset(property, fdto, fixups_off) { + int ret; + + ret = overlay_fixup_phandle(fdt, fdto, symbols_off, property); + if (ret) + return ret; + } + + return 0; +} + +/** + * overlay_apply_node - Merges a node into the base device tree + * @fdt: Base Device Tree blob + * @target: Node offset in the base device tree to apply the fragment to + * @fdto: Device tree overlay blob + * @node: Node offset in the overlay holding the changes to merge + * + * overlay_apply_node() merges a node into a target base device tree + * node pointed. + * + * This is part of the final step in the device tree overlay + * application process, when all the phandles have been adjusted and + * resolved and you just have to merge overlay into the base device + * tree. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_apply_node(void *fdt, int target, + void *fdto, int node) +{ + int property; + int subnode; + + fdt_for_each_property_offset(property, fdto, node) { + const char *name; + const void *prop; + int prop_len; + int ret; + + prop = fdt_getprop_by_offset(fdto, property, &name, + &prop_len); + if (prop_len == -FDT_ERR_NOTFOUND) + return -FDT_ERR_INTERNAL; + if (prop_len < 0) + return prop_len; + + ret = fdt_setprop(fdt, target, name, prop, prop_len); + if (ret) + return ret; + } + + fdt_for_each_subnode(subnode, fdto, node) { + const char *name = fdt_get_name(fdto, subnode, NULL); + int nnode; + int ret; + + nnode = fdt_add_subnode(fdt, target, name); + if (nnode == -FDT_ERR_EXISTS) { + nnode = fdt_subnode_offset(fdt, target, name); + if (nnode == -FDT_ERR_NOTFOUND) + return -FDT_ERR_INTERNAL; + } + + if (nnode < 0) + return nnode; + + ret = overlay_apply_node(fdt, nnode, fdto, subnode); + if (ret) + return ret; + } + + return 0; +} + +/** + * overlay_merge - Merge an overlay into its base device tree + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * + * overlay_merge() merges an overlay into its base device tree. + * + * This is the next to last step in the device tree overlay application + * process, when all the phandles have been adjusted and resolved and + * you just have to merge overlay into the base device tree. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_merge(void *fdt, void *fdto) +{ + int fragment; + + fdt_for_each_subnode(fragment, fdto, 0) { + int overlay; + int target; + int ret; + + /* + * Each fragments will have an __overlay__ node. If + * they don't, it's not supposed to be merged + */ + overlay = fdt_subnode_offset(fdto, fragment, "__overlay__"); + if (overlay == -FDT_ERR_NOTFOUND) + continue; + + if (overlay < 0) + return overlay; + + target = overlay_get_target(fdt, fdto, fragment, NULL); + if (target < 0) + return target; + + ret = overlay_apply_node(fdt, target, fdto, overlay); + if (ret) + return ret; + } + + return 0; +} + +static int get_path_len(const void *fdt, int nodeoffset) +{ + int len = 0, namelen; + const char *name; + + FDT_RO_PROBE(fdt); + + for (;;) { + name = fdt_get_name(fdt, nodeoffset, &namelen); + if (!name) + return namelen; + + /* root? we're done */ + if (namelen == 0) + break; + + nodeoffset = fdt_parent_offset(fdt, nodeoffset); + if (nodeoffset < 0) + return nodeoffset; + len += namelen + 1; + } + + /* in case of root pretend it's "/" */ + if (len == 0) + len++; + return len; +} + +/** + * overlay_symbol_update - Update the symbols of base tree after a merge + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * + * overlay_symbol_update() updates the symbols of the base tree with the + * symbols of the applied overlay + * + * This is the last step in the device tree overlay application + * process, allowing the reference of overlay symbols by subsequent + * overlay operations. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_symbol_update(void *fdt, void *fdto) +{ + int root_sym, ov_sym, prop, path_len, fragment, target; + int len, frag_name_len, ret, rel_path_len; + const char *s, *e; + const char *path; + const char *name; + const char *frag_name; + const char *rel_path; + const char *target_path; + char *buf; + void *p; + + ov_sym = fdt_subnode_offset(fdto, 0, "__symbols__"); + + /* if no overlay symbols exist no problem */ + if (ov_sym < 0) + return 0; + + root_sym = fdt_subnode_offset(fdt, 0, "__symbols__"); + + /* it no root symbols exist we should create them */ + if (root_sym == -FDT_ERR_NOTFOUND) + root_sym = fdt_add_subnode(fdt, 0, "__symbols__"); + + /* any error is fatal now */ + if (root_sym < 0) + return root_sym; + + /* iterate over each overlay symbol */ + fdt_for_each_property_offset(prop, fdto, ov_sym) { + path = fdt_getprop_by_offset(fdto, prop, &name, &path_len); + if (!path) + return path_len; + + /* verify it's a string property (terminated by a single \0) */ + if (path_len < 1 || memchr(path, '\0', path_len) != &path[path_len - 1]) + return -FDT_ERR_BADVALUE; + + /* keep end marker to avoid strlen() */ + e = path + path_len; + + if (*path != '/') + return -FDT_ERR_BADVALUE; + + /* get fragment name first */ + s = strchr(path + 1, '/'); + if (!s) { + /* Symbol refers to something that won't end + * up in the target tree */ + continue; + } + + frag_name = path + 1; + frag_name_len = s - path - 1; + + /* verify format; safe since "s" lies in \0 terminated prop */ + len = sizeof("/__overlay__/") - 1; + if ((e - s) > len && (memcmp(s, "/__overlay__/", len) == 0)) { + /* //__overlay__/ */ + rel_path = s + len; + rel_path_len = e - rel_path; + } else if ((e - s) == len + && (memcmp(s, "/__overlay__", len - 1) == 0)) { + /* //__overlay__ */ + rel_path = ""; + rel_path_len = 0; + } else { + /* Symbol refers to something that won't end + * up in the target tree */ + continue; + } + + /* find the fragment index in which the symbol lies */ + ret = fdt_subnode_offset_namelen(fdto, 0, frag_name, + frag_name_len); + /* not found? */ + if (ret < 0) + return -FDT_ERR_BADOVERLAY; + fragment = ret; + + /* an __overlay__ subnode must exist */ + ret = fdt_subnode_offset(fdto, fragment, "__overlay__"); + if (ret < 0) + return -FDT_ERR_BADOVERLAY; + + /* get the target of the fragment */ + ret = overlay_get_target(fdt, fdto, fragment, &target_path); + if (ret < 0) + return ret; + target = ret; + + /* if we have a target path use */ + if (!target_path) { + ret = get_path_len(fdt, target); + if (ret < 0) + return ret; + len = ret; + } else { + len = strlen(target_path); + } + + ret = fdt_setprop_placeholder(fdt, root_sym, name, + len + (len > 1) + rel_path_len + 1, &p); + if (ret < 0) + return ret; + + if (!target_path) { + /* again in case setprop_placeholder changed it */ + ret = overlay_get_target(fdt, fdto, fragment, &target_path); + if (ret < 0) + return ret; + target = ret; + } + + buf = p; + if (len > 1) { /* target is not root */ + if (!target_path) { + ret = fdt_get_path(fdt, target, buf, len + 1); + if (ret < 0) + return ret; + } else + memcpy(buf, target_path, len + 1); + + } else + len--; + + buf[len] = '/'; + memcpy(buf + len + 1, rel_path, rel_path_len); + buf[len + 1 + rel_path_len] = '\0'; + } + + return 0; +} + +int fdt_overlay_apply(void *fdt, void *fdto) +{ + uint32_t delta; + int ret; + + FDT_RO_PROBE(fdt); + FDT_RO_PROBE(fdto); + + ret = fdt_find_max_phandle(fdt, &delta); + if (ret) + goto err; + + ret = overlay_adjust_local_phandles(fdto, delta); + if (ret) + goto err; + + ret = overlay_update_local_references(fdto, delta); + if (ret) + goto err; + + ret = overlay_fixup_phandles(fdt, fdto); + if (ret) + goto err; + + ret = overlay_merge(fdt, fdto); + if (ret) + goto err; + + ret = overlay_symbol_update(fdt, fdto); + if (ret) + goto err; + + /* + * The overlay has been damaged, erase its magic. + */ + fdt_set_magic(fdto, ~0); + + return 0; + +err: + /* + * The overlay might have been damaged, erase its magic. + */ + fdt_set_magic(fdto, ~0); + + /* + * The base device tree might have been damaged, erase its + * magic. + */ + fdt_set_magic(fdt, ~0); + + return ret; +} diff --git a/fdt/fdt_ro.c b/fdt/fdt_ro.c new file mode 100644 index 0000000000..a5c2797cde --- /dev/null +++ b/fdt/fdt_ro.c @@ -0,0 +1,898 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +static int fdt_nodename_eq_(const void *fdt, int offset, + const char *s, int len) +{ + int olen; + const char *p = fdt_get_name(fdt, offset, &olen); + + if (!p || olen < len) + /* short match */ + return 0; + + if (memcmp(p, s, len) != 0) + return 0; + + if (p[len] == '\0') + return 1; + else if (!memchr(s, '@', len) && (p[len] == '@')) + return 1; + else + return 0; +} + +const char *fdt_get_string(const void *fdt, int stroffset, int *lenp) +{ + int32_t totalsize = fdt_ro_probe_(fdt); + uint32_t absoffset = stroffset + fdt_off_dt_strings(fdt); + size_t len; + int err; + const char *s, *n; + + err = totalsize; + if (totalsize < 0) + goto fail; + + err = -FDT_ERR_BADOFFSET; + if (absoffset >= totalsize) + goto fail; + len = totalsize - absoffset; + + if (fdt_magic(fdt) == FDT_MAGIC) { + if (stroffset < 0) + goto fail; + if (fdt_version(fdt) >= 17) { + if (stroffset >= fdt_size_dt_strings(fdt)) + goto fail; + if ((fdt_size_dt_strings(fdt) - stroffset) < len) + len = fdt_size_dt_strings(fdt) - stroffset; + } + } else if (fdt_magic(fdt) == FDT_SW_MAGIC) { + if ((stroffset >= 0) + || (stroffset < -fdt_size_dt_strings(fdt))) + goto fail; + if ((-stroffset) < len) + len = -stroffset; + } else { + err = -FDT_ERR_INTERNAL; + goto fail; + } + + s = (const char *)fdt + absoffset; + n = memchr(s, '\0', len); + if (!n) { + /* missing terminating NULL */ + err = -FDT_ERR_TRUNCATED; + goto fail; + } + + if (lenp) + *lenp = n - s; + return s; + +fail: + if (lenp) + *lenp = err; + return NULL; +} + +const char *fdt_string(const void *fdt, int stroffset) +{ + return fdt_get_string(fdt, stroffset, NULL); +} + +static int fdt_string_eq_(const void *fdt, int stroffset, + const char *s, int len) +{ + int slen; + const char *p = fdt_get_string(fdt, stroffset, &slen); + + return p && (slen == len) && (memcmp(p, s, len) == 0); +} + +int fdt_find_max_phandle(const void *fdt, uint32_t *phandle) +{ + uint32_t max = 0; + int offset = -1; + + while (true) { + uint32_t value; + + offset = fdt_next_node(fdt, offset, NULL); + if (offset < 0) { + if (offset == -FDT_ERR_NOTFOUND) + break; + + return offset; + } + + value = fdt_get_phandle(fdt, offset); + + if (value > max) + max = value; + } + + if (phandle) + *phandle = max; + + return 0; +} + +int fdt_generate_phandle(const void *fdt, uint32_t *phandle) +{ + uint32_t max; + int err; + + err = fdt_find_max_phandle(fdt, &max); + if (err < 0) + return err; + + if (max == FDT_MAX_PHANDLE) + return -FDT_ERR_NOPHANDLES; + + if (phandle) + *phandle = max + 1; + + return 0; +} + +static const struct fdt_reserve_entry *fdt_mem_rsv(const void *fdt, int n) +{ + int offset = n * sizeof(struct fdt_reserve_entry); + int absoffset = fdt_off_mem_rsvmap(fdt) + offset; + + if (absoffset < fdt_off_mem_rsvmap(fdt)) + return NULL; + if (absoffset > fdt_totalsize(fdt) - sizeof(struct fdt_reserve_entry)) + return NULL; + return fdt_mem_rsv_(fdt, n); +} + +int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size) +{ + const struct fdt_reserve_entry *re; + + FDT_RO_PROBE(fdt); + re = fdt_mem_rsv(fdt, n); + if (!re) + return -FDT_ERR_BADOFFSET; + + *address = fdt64_ld(&re->address); + *size = fdt64_ld(&re->size); + return 0; +} + +int fdt_num_mem_rsv(const void *fdt) +{ + int i; + const struct fdt_reserve_entry *re; + + for (i = 0; (re = fdt_mem_rsv(fdt, i)) != NULL; i++) { + if (fdt64_ld(&re->size) == 0) + return i; + } + return -FDT_ERR_TRUNCATED; +} + +static int nextprop_(const void *fdt, int offset) +{ + uint32_t tag; + int nextoffset; + + do { + tag = fdt_next_tag(fdt, offset, &nextoffset); + + switch (tag) { + case FDT_END: + if (nextoffset >= 0) + return -FDT_ERR_BADSTRUCTURE; + else + return nextoffset; + + case FDT_PROP: + return offset; + } + offset = nextoffset; + } while (tag == FDT_NOP); + + return -FDT_ERR_NOTFOUND; +} + +int fdt_subnode_offset_namelen(const void *fdt, int offset, + const char *name, int namelen) +{ + int depth; + + FDT_RO_PROBE(fdt); + + for (depth = 0; + (offset >= 0) && (depth >= 0); + offset = fdt_next_node(fdt, offset, &depth)) + if ((depth == 1) + && fdt_nodename_eq_(fdt, offset, name, namelen)) + return offset; + + if (depth < 0) + return -FDT_ERR_NOTFOUND; + return offset; /* error */ +} + +int fdt_subnode_offset(const void *fdt, int parentoffset, + const char *name) +{ + return fdt_subnode_offset_namelen(fdt, parentoffset, name, strlen(name)); +} + +int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen) +{ + const char *end = path + namelen; + const char *p = path; + int offset = 0; + + FDT_RO_PROBE(fdt); + + /* see if we have an alias */ + if (*path != '/') { + const char *q = memchr(path, '/', end - p); + + if (!q) + q = end; + + p = fdt_get_alias_namelen(fdt, p, q - p); + if (!p) + return -FDT_ERR_BADPATH; + offset = fdt_path_offset(fdt, p); + + p = q; + } + + while (p < end) { + const char *q; + + while (*p == '/') { + p++; + if (p == end) + return offset; + } + q = memchr(p, '/', end - p); + if (! q) + q = end; + + offset = fdt_subnode_offset_namelen(fdt, offset, p, q-p); + if (offset < 0) + return offset; + + p = q; + } + + return offset; +} + +int fdt_path_offset(const void *fdt, const char *path) +{ + return fdt_path_offset_namelen(fdt, path, strlen(path)); +} + +const char *fdt_get_name(const void *fdt, int nodeoffset, int *len) +{ + const struct fdt_node_header *nh = fdt_offset_ptr_(fdt, nodeoffset); + const char *nameptr; + int err; + + if (((err = fdt_ro_probe_(fdt)) < 0) + || ((err = fdt_check_node_offset_(fdt, nodeoffset)) < 0)) + goto fail; + + nameptr = nh->name; + + if (fdt_version(fdt) < 0x10) { + /* + * For old FDT versions, match the naming conventions of V16: + * give only the leaf name (after all /). The actual tree + * contents are loosely checked. + */ + const char *leaf; + leaf = strrchr(nameptr, '/'); + if (leaf == NULL) { + err = -FDT_ERR_BADSTRUCTURE; + goto fail; + } + nameptr = leaf+1; + } + + if (len) + *len = strlen(nameptr); + + return nameptr; + + fail: + if (len) + *len = err; + return NULL; +} + +int fdt_first_property_offset(const void *fdt, int nodeoffset) +{ + int offset; + + if ((offset = fdt_check_node_offset_(fdt, nodeoffset)) < 0) + return offset; + + return nextprop_(fdt, offset); +} + +int fdt_next_property_offset(const void *fdt, int offset) +{ + if ((offset = fdt_check_prop_offset_(fdt, offset)) < 0) + return offset; + + return nextprop_(fdt, offset); +} + +static const struct fdt_property *fdt_get_property_by_offset_(const void *fdt, + int offset, + int *lenp) +{ + int err; + const struct fdt_property *prop; + + if ((err = fdt_check_prop_offset_(fdt, offset)) < 0) { + if (lenp) + *lenp = err; + return NULL; + } + + prop = fdt_offset_ptr_(fdt, offset); + + if (lenp) + *lenp = fdt32_ld(&prop->len); + + return prop; +} + +const struct fdt_property *fdt_get_property_by_offset(const void *fdt, + int offset, + int *lenp) +{ + /* Prior to version 16, properties may need realignment + * and this API does not work. fdt_getprop_*() will, however. */ + + if (fdt_version(fdt) < 0x10) { + if (lenp) + *lenp = -FDT_ERR_BADVERSION; + return NULL; + } + + return fdt_get_property_by_offset_(fdt, offset, lenp); +} + +static const struct fdt_property *fdt_get_property_namelen_(const void *fdt, + int offset, + const char *name, + int namelen, + int *lenp, + int *poffset) +{ + for (offset = fdt_first_property_offset(fdt, offset); + (offset >= 0); + (offset = fdt_next_property_offset(fdt, offset))) { + const struct fdt_property *prop; + + if (!(prop = fdt_get_property_by_offset_(fdt, offset, lenp))) { + offset = -FDT_ERR_INTERNAL; + break; + } + if (fdt_string_eq_(fdt, fdt32_ld(&prop->nameoff), + name, namelen)) { + if (poffset) + *poffset = offset; + return prop; + } + } + + if (lenp) + *lenp = offset; + return NULL; +} + + +const struct fdt_property *fdt_get_property_namelen(const void *fdt, + int offset, + const char *name, + int namelen, int *lenp) +{ + /* Prior to version 16, properties may need realignment + * and this API does not work. fdt_getprop_*() will, however. */ + if (fdt_version(fdt) < 0x10) { + if (lenp) + *lenp = -FDT_ERR_BADVERSION; + return NULL; + } + + return fdt_get_property_namelen_(fdt, offset, name, namelen, lenp, + NULL); +} + + +const struct fdt_property *fdt_get_property(const void *fdt, + int nodeoffset, + const char *name, int *lenp) +{ + return fdt_get_property_namelen(fdt, nodeoffset, name, + strlen(name), lenp); +} + +const void *fdt_getprop_namelen(const void *fdt, int nodeoffset, + const char *name, int namelen, int *lenp) +{ + int poffset; + const struct fdt_property *prop; + + prop = fdt_get_property_namelen_(fdt, nodeoffset, name, namelen, lenp, + &poffset); + if (!prop) + return NULL; + + /* Handle realignment */ + if (fdt_version(fdt) < 0x10 && (poffset + sizeof(*prop)) % 8 && + fdt32_ld(&prop->len) >= 8) + return prop->data + 4; + return prop->data; +} + +const void *fdt_getprop_by_offset(const void *fdt, int offset, + const char **namep, int *lenp) +{ + const struct fdt_property *prop; + + prop = fdt_get_property_by_offset_(fdt, offset, lenp); + if (!prop) + return NULL; + if (namep) { + const char *name; + int namelen; + name = fdt_get_string(fdt, fdt32_ld(&prop->nameoff), + &namelen); + if (!name) { + if (lenp) + *lenp = namelen; + return NULL; + } + *namep = name; + } + + /* Handle realignment */ + if (fdt_version(fdt) < 0x10 && (offset + sizeof(*prop)) % 8 && + fdt32_ld(&prop->len) >= 8) + return prop->data + 4; + return prop->data; +} + +const void *fdt_getprop(const void *fdt, int nodeoffset, + const char *name, int *lenp) +{ + return fdt_getprop_namelen(fdt, nodeoffset, name, strlen(name), lenp); +} + +uint32_t fdt_get_phandle(const void *fdt, int nodeoffset) +{ + const fdt32_t *php; + int len; + + /* FIXME: This is a bit sub-optimal, since we potentially scan + * over all the properties twice. */ + php = fdt_getprop(fdt, nodeoffset, "phandle", &len); + if (!php || (len != sizeof(*php))) { + php = fdt_getprop(fdt, nodeoffset, "linux,phandle", &len); + if (!php || (len != sizeof(*php))) + return 0; + } + + return fdt32_ld(php); +} + +const char *fdt_get_alias_namelen(const void *fdt, + const char *name, int namelen) +{ + int aliasoffset; + + aliasoffset = fdt_path_offset(fdt, "/aliases"); + if (aliasoffset < 0) + return NULL; + + return fdt_getprop_namelen(fdt, aliasoffset, name, namelen, NULL); +} + +const char *fdt_get_alias(const void *fdt, const char *name) +{ + return fdt_get_alias_namelen(fdt, name, strlen(name)); +} + +int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen) +{ + int pdepth = 0, p = 0; + int offset, depth, namelen; + const char *name; + + FDT_RO_PROBE(fdt); + + if (buflen < 2) + return -FDT_ERR_NOSPACE; + + for (offset = 0, depth = 0; + (offset >= 0) && (offset <= nodeoffset); + offset = fdt_next_node(fdt, offset, &depth)) { + while (pdepth > depth) { + do { + p--; + } while (buf[p-1] != '/'); + pdepth--; + } + + if (pdepth >= depth) { + name = fdt_get_name(fdt, offset, &namelen); + if (!name) + return namelen; + if ((p + namelen + 1) <= buflen) { + memcpy(buf + p, name, namelen); + p += namelen; + buf[p++] = '/'; + pdepth++; + } + } + + if (offset == nodeoffset) { + if (pdepth < (depth + 1)) + return -FDT_ERR_NOSPACE; + + if (p > 1) /* special case so that root path is "/", not "" */ + p--; + buf[p] = '\0'; + return 0; + } + } + + if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0)) + return -FDT_ERR_BADOFFSET; + else if (offset == -FDT_ERR_BADOFFSET) + return -FDT_ERR_BADSTRUCTURE; + + return offset; /* error from fdt_next_node() */ +} + +int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset, + int supernodedepth, int *nodedepth) +{ + int offset, depth; + int supernodeoffset = -FDT_ERR_INTERNAL; + + FDT_RO_PROBE(fdt); + + if (supernodedepth < 0) + return -FDT_ERR_NOTFOUND; + + for (offset = 0, depth = 0; + (offset >= 0) && (offset <= nodeoffset); + offset = fdt_next_node(fdt, offset, &depth)) { + if (depth == supernodedepth) + supernodeoffset = offset; + + if (offset == nodeoffset) { + if (nodedepth) + *nodedepth = depth; + + if (supernodedepth > depth) + return -FDT_ERR_NOTFOUND; + else + return supernodeoffset; + } + } + + if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0)) + return -FDT_ERR_BADOFFSET; + else if (offset == -FDT_ERR_BADOFFSET) + return -FDT_ERR_BADSTRUCTURE; + + return offset; /* error from fdt_next_node() */ +} + +int fdt_node_depth(const void *fdt, int nodeoffset) +{ + int nodedepth; + int err; + + err = fdt_supernode_atdepth_offset(fdt, nodeoffset, 0, &nodedepth); + if (err) + return (err < 0) ? err : -FDT_ERR_INTERNAL; + return nodedepth; +} + +int fdt_parent_offset(const void *fdt, int nodeoffset) +{ + int nodedepth = fdt_node_depth(fdt, nodeoffset); + + if (nodedepth < 0) + return nodedepth; + return fdt_supernode_atdepth_offset(fdt, nodeoffset, + nodedepth - 1, NULL); +} + +int fdt_node_offset_by_prop_value(const void *fdt, int startoffset, + const char *propname, + const void *propval, int proplen) +{ + int offset; + const void *val; + int len; + + FDT_RO_PROBE(fdt); + + /* FIXME: The algorithm here is pretty horrible: we scan each + * property of a node in fdt_getprop(), then if that didn't + * find what we want, we scan over them again making our way + * to the next node. Still it's the easiest to implement + * approach; performance can come later. */ + for (offset = fdt_next_node(fdt, startoffset, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + val = fdt_getprop(fdt, offset, propname, &len); + if (val && (len == proplen) + && (memcmp(val, propval, len) == 0)) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} + +int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle) +{ + int offset; + + if ((phandle == 0) || (phandle == -1)) + return -FDT_ERR_BADPHANDLE; + + FDT_RO_PROBE(fdt); + + /* FIXME: The algorithm here is pretty horrible: we + * potentially scan each property of a node in + * fdt_get_phandle(), then if that didn't find what + * we want, we scan over them again making our way to the next + * node. Still it's the easiest to implement approach; + * performance can come later. */ + for (offset = fdt_next_node(fdt, -1, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + if (fdt_get_phandle(fdt, offset) == phandle) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} + +int fdt_stringlist_contains(const char *strlist, int listlen, const char *str) +{ + int len = strlen(str); + const char *p; + + while (listlen >= len) { + if (memcmp(str, strlist, len+1) == 0) + return 1; + p = memchr(strlist, '\0', listlen); + if (!p) + return 0; /* malformed strlist.. */ + listlen -= (p-strlist) + 1; + strlist = p + 1; + } + return 0; +} + +int fdt_stringlist_count(const void *fdt, int nodeoffset, const char *property) +{ + const char *list, *end; + int length, count = 0; + + list = fdt_getprop(fdt, nodeoffset, property, &length); + if (!list) + return length; + + end = list + length; + + while (list < end) { + length = strnlen(list, end - list) + 1; + + /* Abort if the last string isn't properly NUL-terminated. */ + if (list + length > end) + return -FDT_ERR_BADVALUE; + + list += length; + count++; + } + + return count; +} + +int fdt_stringlist_search(const void *fdt, int nodeoffset, const char *property, + const char *string) +{ + int length, len, idx = 0; + const char *list, *end; + + list = fdt_getprop(fdt, nodeoffset, property, &length); + if (!list) + return length; + + len = strlen(string) + 1; + end = list + length; + + while (list < end) { + length = strnlen(list, end - list) + 1; + + /* Abort if the last string isn't properly NUL-terminated. */ + if (list + length > end) + return -FDT_ERR_BADVALUE; + + if (length == len && memcmp(list, string, length) == 0) + return idx; + + list += length; + idx++; + } + + return -FDT_ERR_NOTFOUND; +} + +const char *fdt_stringlist_get(const void *fdt, int nodeoffset, + const char *property, int idx, + int *lenp) +{ + const char *list, *end; + int length; + + list = fdt_getprop(fdt, nodeoffset, property, &length); + if (!list) { + if (lenp) + *lenp = length; + + return NULL; + } + + end = list + length; + + while (list < end) { + length = strnlen(list, end - list) + 1; + + /* Abort if the last string isn't properly NUL-terminated. */ + if (list + length > end) { + if (lenp) + *lenp = -FDT_ERR_BADVALUE; + + return NULL; + } + + if (idx == 0) { + if (lenp) + *lenp = length - 1; + + return list; + } + + list += length; + idx--; + } + + if (lenp) + *lenp = -FDT_ERR_NOTFOUND; + + return NULL; +} + +int fdt_node_check_compatible(const void *fdt, int nodeoffset, + const char *compatible) +{ + const void *prop; + int len; + + prop = fdt_getprop(fdt, nodeoffset, "compatible", &len); + if (!prop) + return len; + + return !fdt_stringlist_contains(prop, len, compatible); +} + +int fdt_node_offset_by_compatible(const void *fdt, int startoffset, + const char *compatible) +{ + int offset, err; + + FDT_RO_PROBE(fdt); + + /* FIXME: The algorithm here is pretty horrible: we scan each + * property of a node in fdt_node_check_compatible(), then if + * that didn't find what we want, we scan over them again + * making our way to the next node. Still it's the easiest to + * implement approach; performance can come later. */ + for (offset = fdt_next_node(fdt, startoffset, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + err = fdt_node_check_compatible(fdt, offset, compatible); + if ((err < 0) && (err != -FDT_ERR_NOTFOUND)) + return err; + else if (err == 0) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} + +int fdt_check_full(const void *fdt, size_t bufsize) +{ + int err; + int num_memrsv; + int offset, nextoffset = 0; + uint32_t tag; + unsigned depth = 0; + const void *prop; + const char *propname; + + if (bufsize < FDT_V1_SIZE) + return -FDT_ERR_TRUNCATED; + err = fdt_check_header(fdt); + if (err != 0) + return err; + if (bufsize < fdt_totalsize(fdt)) + return -FDT_ERR_TRUNCATED; + + num_memrsv = fdt_num_mem_rsv(fdt); + if (num_memrsv < 0) + return num_memrsv; + + while (1) { + offset = nextoffset; + tag = fdt_next_tag(fdt, offset, &nextoffset); + + if (nextoffset < 0) + return nextoffset; + + switch (tag) { + case FDT_NOP: + break; + + case FDT_END: + if (depth != 0) + return -FDT_ERR_BADSTRUCTURE; + return 0; + + case FDT_BEGIN_NODE: + depth++; + if (depth > INT_MAX) + return -FDT_ERR_BADSTRUCTURE; + break; + + case FDT_END_NODE: + if (depth == 0) + return -FDT_ERR_BADSTRUCTURE; + depth--; + break; + + case FDT_PROP: + prop = fdt_getprop_by_offset(fdt, offset, &propname, + &err); + if (!prop) + return err; + break; + + default: + return -FDT_ERR_INTERNAL; + } + } +} diff --git a/fdt/fdt_rw.c b/fdt/fdt_rw.c new file mode 100644 index 0000000000..8795947c00 --- /dev/null +++ b/fdt/fdt_rw.c @@ -0,0 +1,476 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +static int fdt_blocks_misordered_(const void *fdt, + int mem_rsv_size, int struct_size) +{ + return (fdt_off_mem_rsvmap(fdt) < FDT_ALIGN(sizeof(struct fdt_header), 8)) + || (fdt_off_dt_struct(fdt) < + (fdt_off_mem_rsvmap(fdt) + mem_rsv_size)) + || (fdt_off_dt_strings(fdt) < + (fdt_off_dt_struct(fdt) + struct_size)) + || (fdt_totalsize(fdt) < + (fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt))); +} + +static int fdt_rw_probe_(void *fdt) +{ + FDT_RO_PROBE(fdt); + + if (fdt_version(fdt) < 17) + return -FDT_ERR_BADVERSION; + if (fdt_blocks_misordered_(fdt, sizeof(struct fdt_reserve_entry), + fdt_size_dt_struct(fdt))) + return -FDT_ERR_BADLAYOUT; + if (fdt_version(fdt) > 17) + fdt_set_version(fdt, 17); + + return 0; +} + +#define FDT_RW_PROBE(fdt) \ + { \ + int err_; \ + if ((err_ = fdt_rw_probe_(fdt)) != 0) \ + return err_; \ + } + +static inline int fdt_data_size_(void *fdt) +{ + return fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt); +} + +static int fdt_splice_(void *fdt, void *splicepoint, int oldlen, int newlen) +{ + char *p = splicepoint; + char *end = (char *)fdt + fdt_data_size_(fdt); + + if (((p + oldlen) < p) || ((p + oldlen) > end)) + return -FDT_ERR_BADOFFSET; + if ((p < (char *)fdt) || ((end - oldlen + newlen) < (char *)fdt)) + return -FDT_ERR_BADOFFSET; + if ((end - oldlen + newlen) > ((char *)fdt + fdt_totalsize(fdt))) + return -FDT_ERR_NOSPACE; + memmove(p + newlen, p + oldlen, end - p - oldlen); + return 0; +} + +static int fdt_splice_mem_rsv_(void *fdt, struct fdt_reserve_entry *p, + int oldn, int newn) +{ + int delta = (newn - oldn) * sizeof(*p); + int err; + err = fdt_splice_(fdt, p, oldn * sizeof(*p), newn * sizeof(*p)); + if (err) + return err; + fdt_set_off_dt_struct(fdt, fdt_off_dt_struct(fdt) + delta); + fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta); + return 0; +} + +static int fdt_splice_struct_(void *fdt, void *p, + int oldlen, int newlen) +{ + int delta = newlen - oldlen; + int err; + + if ((err = fdt_splice_(fdt, p, oldlen, newlen))) + return err; + + fdt_set_size_dt_struct(fdt, fdt_size_dt_struct(fdt) + delta); + fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta); + return 0; +} + +/* Must only be used to roll back in case of error */ +static void fdt_del_last_string_(void *fdt, const char *s) +{ + int newlen = strlen(s) + 1; + + fdt_set_size_dt_strings(fdt, fdt_size_dt_strings(fdt) - newlen); +} + +static int fdt_splice_string_(void *fdt, int newlen) +{ + void *p = (char *)fdt + + fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt); + int err; + + if ((err = fdt_splice_(fdt, p, 0, newlen))) + return err; + + fdt_set_size_dt_strings(fdt, fdt_size_dt_strings(fdt) + newlen); + return 0; +} + +static int fdt_find_add_string_(void *fdt, const char *s, int *allocated) +{ + char *strtab = (char *)fdt + fdt_off_dt_strings(fdt); + const char *p; + char *new; + int len = strlen(s) + 1; + int err; + + *allocated = 0; + + p = fdt_find_string_(strtab, fdt_size_dt_strings(fdt), s); + if (p) + /* found it */ + return (p - strtab); + + new = strtab + fdt_size_dt_strings(fdt); + err = fdt_splice_string_(fdt, len); + if (err) + return err; + + *allocated = 1; + + memcpy(new, s, len); + return (new - strtab); +} + +int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size) +{ + struct fdt_reserve_entry *re; + int err; + + FDT_RW_PROBE(fdt); + + re = fdt_mem_rsv_w_(fdt, fdt_num_mem_rsv(fdt)); + err = fdt_splice_mem_rsv_(fdt, re, 0, 1); + if (err) + return err; + + re->address = cpu_to_fdt64(address); + re->size = cpu_to_fdt64(size); + return 0; +} + +int fdt_del_mem_rsv(void *fdt, int n) +{ + struct fdt_reserve_entry *re = fdt_mem_rsv_w_(fdt, n); + + FDT_RW_PROBE(fdt); + + if (n >= fdt_num_mem_rsv(fdt)) + return -FDT_ERR_NOTFOUND; + + return fdt_splice_mem_rsv_(fdt, re, 1, 0); +} + +static int fdt_resize_property_(void *fdt, int nodeoffset, const char *name, + int len, struct fdt_property **prop) +{ + int oldlen; + int err; + + *prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen); + if (!*prop) + return oldlen; + + if ((err = fdt_splice_struct_(fdt, (*prop)->data, FDT_TAGALIGN(oldlen), + FDT_TAGALIGN(len)))) + return err; + + (*prop)->len = cpu_to_fdt32(len); + return 0; +} + +static int fdt_add_property_(void *fdt, int nodeoffset, const char *name, + int len, struct fdt_property **prop) +{ + int proplen; + int nextoffset; + int namestroff; + int err; + int allocated; + + if ((nextoffset = fdt_check_node_offset_(fdt, nodeoffset)) < 0) + return nextoffset; + + namestroff = fdt_find_add_string_(fdt, name, &allocated); + if (namestroff < 0) + return namestroff; + + *prop = fdt_offset_ptr_w_(fdt, nextoffset); + proplen = sizeof(**prop) + FDT_TAGALIGN(len); + + err = fdt_splice_struct_(fdt, *prop, 0, proplen); + if (err) { + if (allocated) + fdt_del_last_string_(fdt, name); + return err; + } + + (*prop)->tag = cpu_to_fdt32(FDT_PROP); + (*prop)->nameoff = cpu_to_fdt32(namestroff); + (*prop)->len = cpu_to_fdt32(len); + return 0; +} + +int fdt_set_name(void *fdt, int nodeoffset, const char *name) +{ + char *namep; + int oldlen, newlen; + int err; + + FDT_RW_PROBE(fdt); + + namep = (char *)(uintptr_t)fdt_get_name(fdt, nodeoffset, &oldlen); + if (!namep) + return oldlen; + + newlen = strlen(name); + + err = fdt_splice_struct_(fdt, namep, FDT_TAGALIGN(oldlen+1), + FDT_TAGALIGN(newlen+1)); + if (err) + return err; + + memcpy(namep, name, newlen+1); + return 0; +} + +int fdt_setprop_placeholder(void *fdt, int nodeoffset, const char *name, + int len, void **prop_data) +{ + struct fdt_property *prop; + int err; + + FDT_RW_PROBE(fdt); + + err = fdt_resize_property_(fdt, nodeoffset, name, len, &prop); + if (err == -FDT_ERR_NOTFOUND) + err = fdt_add_property_(fdt, nodeoffset, name, len, &prop); + if (err) + return err; + + *prop_data = prop->data; + return 0; +} + +int fdt_setprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + void *prop_data; + int err; + + err = fdt_setprop_placeholder(fdt, nodeoffset, name, len, &prop_data); + if (err) + return err; + + if (len) + memcpy(prop_data, val, len); + return 0; +} + +int fdt_appendprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + struct fdt_property *prop; + int err, oldlen, newlen; + + FDT_RW_PROBE(fdt); + + prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen); + if (prop) { + newlen = len + oldlen; + err = fdt_splice_struct_(fdt, prop->data, + FDT_TAGALIGN(oldlen), + FDT_TAGALIGN(newlen)); + if (err) + return err; + prop->len = cpu_to_fdt32(newlen); + memcpy(prop->data + oldlen, val, len); + } else { + err = fdt_add_property_(fdt, nodeoffset, name, len, &prop); + if (err) + return err; + memcpy(prop->data, val, len); + } + return 0; +} + +int fdt_delprop(void *fdt, int nodeoffset, const char *name) +{ + struct fdt_property *prop; + int len, proplen; + + FDT_RW_PROBE(fdt); + + prop = fdt_get_property_w(fdt, nodeoffset, name, &len); + if (!prop) + return len; + + proplen = sizeof(*prop) + FDT_TAGALIGN(len); + return fdt_splice_struct_(fdt, prop, proplen, 0); +} + +int fdt_add_subnode_namelen(void *fdt, int parentoffset, + const char *name, int namelen) +{ + struct fdt_node_header *nh; + int offset, nextoffset; + int nodelen; + int err; + uint32_t tag; + fdt32_t *endtag; + + FDT_RW_PROBE(fdt); + + offset = fdt_subnode_offset_namelen(fdt, parentoffset, name, namelen); + if (offset >= 0) + return -FDT_ERR_EXISTS; + else if (offset != -FDT_ERR_NOTFOUND) + return offset; + + /* Try to place the new node after the parent's properties */ + fdt_next_tag(fdt, parentoffset, &nextoffset); /* skip the BEGIN_NODE */ + do { + offset = nextoffset; + tag = fdt_next_tag(fdt, offset, &nextoffset); + } while ((tag == FDT_PROP) || (tag == FDT_NOP)); + + nh = fdt_offset_ptr_w_(fdt, offset); + nodelen = sizeof(*nh) + FDT_TAGALIGN(namelen+1) + FDT_TAGSIZE; + + err = fdt_splice_struct_(fdt, nh, 0, nodelen); + if (err) + return err; + + nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE); + memset(nh->name, 0, FDT_TAGALIGN(namelen+1)); + memcpy(nh->name, name, namelen); + endtag = (fdt32_t *)((char *)nh + nodelen - FDT_TAGSIZE); + *endtag = cpu_to_fdt32(FDT_END_NODE); + + return offset; +} + +int fdt_add_subnode(void *fdt, int parentoffset, const char *name) +{ + return fdt_add_subnode_namelen(fdt, parentoffset, name, strlen(name)); +} + +int fdt_del_node(void *fdt, int nodeoffset) +{ + int endoffset; + + FDT_RW_PROBE(fdt); + + endoffset = fdt_node_end_offset_(fdt, nodeoffset); + if (endoffset < 0) + return endoffset; + + return fdt_splice_struct_(fdt, fdt_offset_ptr_w_(fdt, nodeoffset), + endoffset - nodeoffset, 0); +} + +static void fdt_packblocks_(const char *old, char *new, + int mem_rsv_size, int struct_size) +{ + int mem_rsv_off, struct_off, strings_off; + + mem_rsv_off = FDT_ALIGN(sizeof(struct fdt_header), 8); + struct_off = mem_rsv_off + mem_rsv_size; + strings_off = struct_off + struct_size; + + memmove(new + mem_rsv_off, old + fdt_off_mem_rsvmap(old), mem_rsv_size); + fdt_set_off_mem_rsvmap(new, mem_rsv_off); + + memmove(new + struct_off, old + fdt_off_dt_struct(old), struct_size); + fdt_set_off_dt_struct(new, struct_off); + fdt_set_size_dt_struct(new, struct_size); + + memmove(new + strings_off, old + fdt_off_dt_strings(old), + fdt_size_dt_strings(old)); + fdt_set_off_dt_strings(new, strings_off); + fdt_set_size_dt_strings(new, fdt_size_dt_strings(old)); +} + +int fdt_open_into(const void *fdt, void *buf, int bufsize) +{ + int err; + int mem_rsv_size, struct_size; + int newsize; + const char *fdtstart = fdt; + const char *fdtend = fdtstart + fdt_totalsize(fdt); + char *tmp; + + FDT_RO_PROBE(fdt); + + mem_rsv_size = (fdt_num_mem_rsv(fdt)+1) + * sizeof(struct fdt_reserve_entry); + + if (fdt_version(fdt) >= 17) { + struct_size = fdt_size_dt_struct(fdt); + } else { + struct_size = 0; + while (fdt_next_tag(fdt, struct_size, &struct_size) != FDT_END) + ; + if (struct_size < 0) + return struct_size; + } + + if (!fdt_blocks_misordered_(fdt, mem_rsv_size, struct_size)) { + /* no further work necessary */ + err = fdt_move(fdt, buf, bufsize); + if (err) + return err; + fdt_set_version(buf, 17); + fdt_set_size_dt_struct(buf, struct_size); + fdt_set_totalsize(buf, bufsize); + return 0; + } + + /* Need to reorder */ + newsize = FDT_ALIGN(sizeof(struct fdt_header), 8) + mem_rsv_size + + struct_size + fdt_size_dt_strings(fdt); + + if (bufsize < newsize) + return -FDT_ERR_NOSPACE; + + /* First attempt to build converted tree at beginning of buffer */ + tmp = buf; + /* But if that overlaps with the old tree... */ + if (((tmp + newsize) > fdtstart) && (tmp < fdtend)) { + /* Try right after the old tree instead */ + tmp = (char *)(uintptr_t)fdtend; + if ((tmp + newsize) > ((char *)buf + bufsize)) + return -FDT_ERR_NOSPACE; + } + + fdt_packblocks_(fdt, tmp, mem_rsv_size, struct_size); + memmove(buf, tmp, newsize); + + fdt_set_magic(buf, FDT_MAGIC); + fdt_set_totalsize(buf, bufsize); + fdt_set_version(buf, 17); + fdt_set_last_comp_version(buf, 16); + fdt_set_boot_cpuid_phys(buf, fdt_boot_cpuid_phys(fdt)); + + return 0; +} + +int fdt_pack(void *fdt) +{ + int mem_rsv_size; + + FDT_RW_PROBE(fdt); + + mem_rsv_size = (fdt_num_mem_rsv(fdt)+1) + * sizeof(struct fdt_reserve_entry); + fdt_packblocks_(fdt, fdt, mem_rsv_size, fdt_size_dt_struct(fdt)); + fdt_set_totalsize(fdt, fdt_data_size_(fdt)); + + return 0; +} diff --git a/fdt/fdt_strerror.c b/fdt/fdt_strerror.c new file mode 100644 index 0000000000..768db66ead --- /dev/null +++ b/fdt/fdt_strerror.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +struct fdt_errtabent { + const char *str; +}; + +#define FDT_ERRTABENT(val) \ + [(val)] = { .str = #val, } + +static struct fdt_errtabent fdt_errtable[] = { + FDT_ERRTABENT(FDT_ERR_NOTFOUND), + FDT_ERRTABENT(FDT_ERR_EXISTS), + FDT_ERRTABENT(FDT_ERR_NOSPACE), + + FDT_ERRTABENT(FDT_ERR_BADOFFSET), + FDT_ERRTABENT(FDT_ERR_BADPATH), + FDT_ERRTABENT(FDT_ERR_BADPHANDLE), + FDT_ERRTABENT(FDT_ERR_BADSTATE), + + FDT_ERRTABENT(FDT_ERR_TRUNCATED), + FDT_ERRTABENT(FDT_ERR_BADMAGIC), + FDT_ERRTABENT(FDT_ERR_BADVERSION), + FDT_ERRTABENT(FDT_ERR_BADSTRUCTURE), + FDT_ERRTABENT(FDT_ERR_BADLAYOUT), + FDT_ERRTABENT(FDT_ERR_INTERNAL), + FDT_ERRTABENT(FDT_ERR_BADNCELLS), + FDT_ERRTABENT(FDT_ERR_BADVALUE), + FDT_ERRTABENT(FDT_ERR_BADOVERLAY), + FDT_ERRTABENT(FDT_ERR_NOPHANDLES), + FDT_ERRTABENT(FDT_ERR_BADFLAGS), +}; +#define FDT_ERRTABSIZE (sizeof(fdt_errtable) / sizeof(fdt_errtable[0])) + +const char *fdt_strerror(int errval) +{ + if (errval > 0) + return ""; + else if (errval == 0) + return ""; + else if (errval > -FDT_ERRTABSIZE) { + const char *s = fdt_errtable[-errval].str; + + if (s) + return s; + } + + return ""; +} diff --git a/fdt/fdt_sw.c b/fdt/fdt_sw.c new file mode 100644 index 0000000000..76bea22f73 --- /dev/null +++ b/fdt/fdt_sw.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +static int fdt_sw_probe_(void *fdt) +{ + if (fdt_magic(fdt) == FDT_MAGIC) + return -FDT_ERR_BADSTATE; + else if (fdt_magic(fdt) != FDT_SW_MAGIC) + return -FDT_ERR_BADMAGIC; + return 0; +} + +#define FDT_SW_PROBE(fdt) \ + { \ + int err; \ + if ((err = fdt_sw_probe_(fdt)) != 0) \ + return err; \ + } + +/* 'memrsv' state: Initial state after fdt_create() + * + * Allowed functions: + * fdt_add_reservmap_entry() + * fdt_finish_reservemap() [moves to 'struct' state] + */ +static int fdt_sw_probe_memrsv_(void *fdt) +{ + int err = fdt_sw_probe_(fdt); + if (err) + return err; + + if (fdt_off_dt_strings(fdt) != 0) + return -FDT_ERR_BADSTATE; + return 0; +} + +#define FDT_SW_PROBE_MEMRSV(fdt) \ + { \ + int err; \ + if ((err = fdt_sw_probe_memrsv_(fdt)) != 0) \ + return err; \ + } + +/* 'struct' state: Enter this state after fdt_finish_reservemap() + * + * Allowed functions: + * fdt_begin_node() + * fdt_end_node() + * fdt_property*() + * fdt_finish() [moves to 'complete' state] + */ +static int fdt_sw_probe_struct_(void *fdt) +{ + int err = fdt_sw_probe_(fdt); + if (err) + return err; + + if (fdt_off_dt_strings(fdt) != fdt_totalsize(fdt)) + return -FDT_ERR_BADSTATE; + return 0; +} + +#define FDT_SW_PROBE_STRUCT(fdt) \ + { \ + int err; \ + if ((err = fdt_sw_probe_struct_(fdt)) != 0) \ + return err; \ + } + +static inline uint32_t sw_flags(void *fdt) +{ + /* assert: (fdt_magic(fdt) == FDT_SW_MAGIC) */ + return fdt_last_comp_version(fdt); +} + +/* 'complete' state: Enter this state after fdt_finish() + * + * Allowed functions: none + */ + +static void *fdt_grab_space_(void *fdt, size_t len) +{ + int offset = fdt_size_dt_struct(fdt); + int spaceleft; + + spaceleft = fdt_totalsize(fdt) - fdt_off_dt_struct(fdt) + - fdt_size_dt_strings(fdt); + + if ((offset + len < offset) || (offset + len > spaceleft)) + return NULL; + + fdt_set_size_dt_struct(fdt, offset + len); + return fdt_offset_ptr_w_(fdt, offset); +} + +int fdt_create_with_flags(void *buf, int bufsize, uint32_t flags) +{ + const size_t hdrsize = FDT_ALIGN(sizeof(struct fdt_header), + sizeof(struct fdt_reserve_entry)); + void *fdt = buf; + + if (bufsize < hdrsize) + return -FDT_ERR_NOSPACE; + + if (flags & ~FDT_CREATE_FLAGS_ALL) + return -FDT_ERR_BADFLAGS; + + memset(buf, 0, bufsize); + + /* + * magic and last_comp_version keep intermediate state during the fdt + * creation process, which is replaced with the proper FDT format by + * fdt_finish(). + * + * flags should be accessed with sw_flags(). + */ + fdt_set_magic(fdt, FDT_SW_MAGIC); + fdt_set_version(fdt, FDT_LAST_SUPPORTED_VERSION); + fdt_set_last_comp_version(fdt, flags); + + fdt_set_totalsize(fdt, bufsize); + + fdt_set_off_mem_rsvmap(fdt, hdrsize); + fdt_set_off_dt_struct(fdt, fdt_off_mem_rsvmap(fdt)); + fdt_set_off_dt_strings(fdt, 0); + + return 0; +} + +int fdt_create(void *buf, int bufsize) +{ + return fdt_create_with_flags(buf, bufsize, 0); +} + +int fdt_resize(void *fdt, void *buf, int bufsize) +{ + size_t headsize, tailsize; + char *oldtail, *newtail; + + FDT_SW_PROBE(fdt); + + headsize = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt); + tailsize = fdt_size_dt_strings(fdt); + + if ((headsize + tailsize) > fdt_totalsize(fdt)) + return -FDT_ERR_INTERNAL; + + if ((headsize + tailsize) > bufsize) + return -FDT_ERR_NOSPACE; + + oldtail = (char *)fdt + fdt_totalsize(fdt) - tailsize; + newtail = (char *)buf + bufsize - tailsize; + + /* Two cases to avoid clobbering data if the old and new + * buffers partially overlap */ + if (buf <= fdt) { + memmove(buf, fdt, headsize); + memmove(newtail, oldtail, tailsize); + } else { + memmove(newtail, oldtail, tailsize); + memmove(buf, fdt, headsize); + } + + fdt_set_totalsize(buf, bufsize); + if (fdt_off_dt_strings(buf)) + fdt_set_off_dt_strings(buf, bufsize); + + return 0; +} + +int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size) +{ + struct fdt_reserve_entry *re; + int offset; + + FDT_SW_PROBE_MEMRSV(fdt); + + offset = fdt_off_dt_struct(fdt); + if ((offset + sizeof(*re)) > fdt_totalsize(fdt)) + return -FDT_ERR_NOSPACE; + + re = (struct fdt_reserve_entry *)((char *)fdt + offset); + re->address = cpu_to_fdt64(addr); + re->size = cpu_to_fdt64(size); + + fdt_set_off_dt_struct(fdt, offset + sizeof(*re)); + + return 0; +} + +int fdt_finish_reservemap(void *fdt) +{ + int err = fdt_add_reservemap_entry(fdt, 0, 0); + + if (err) + return err; + + fdt_set_off_dt_strings(fdt, fdt_totalsize(fdt)); + return 0; +} + +int fdt_begin_node(void *fdt, const char *name) +{ + struct fdt_node_header *nh; + int namelen; + + FDT_SW_PROBE_STRUCT(fdt); + + namelen = strlen(name) + 1; + nh = fdt_grab_space_(fdt, sizeof(*nh) + FDT_TAGALIGN(namelen)); + if (! nh) + return -FDT_ERR_NOSPACE; + + nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE); + memcpy(nh->name, name, namelen); + return 0; +} + +int fdt_end_node(void *fdt) +{ + fdt32_t *en; + + FDT_SW_PROBE_STRUCT(fdt); + + en = fdt_grab_space_(fdt, FDT_TAGSIZE); + if (! en) + return -FDT_ERR_NOSPACE; + + *en = cpu_to_fdt32(FDT_END_NODE); + return 0; +} + +static int fdt_add_string_(void *fdt, const char *s) +{ + char *strtab = (char *)fdt + fdt_totalsize(fdt); + int strtabsize = fdt_size_dt_strings(fdt); + int len = strlen(s) + 1; + int struct_top, offset; + + offset = -strtabsize - len; + struct_top = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt); + if (fdt_totalsize(fdt) + offset < struct_top) + return 0; /* no more room :( */ + + memcpy(strtab + offset, s, len); + fdt_set_size_dt_strings(fdt, strtabsize + len); + return offset; +} + +/* Must only be used to roll back in case of error */ +static void fdt_del_last_string_(void *fdt, const char *s) +{ + int strtabsize = fdt_size_dt_strings(fdt); + int len = strlen(s) + 1; + + fdt_set_size_dt_strings(fdt, strtabsize - len); +} + +static int fdt_find_add_string_(void *fdt, const char *s, int *allocated) +{ + char *strtab = (char *)fdt + fdt_totalsize(fdt); + int strtabsize = fdt_size_dt_strings(fdt); + const char *p; + + *allocated = 0; + + p = fdt_find_string_(strtab - strtabsize, strtabsize, s); + if (p) + return p - strtab; + + *allocated = 1; + + return fdt_add_string_(fdt, s); +} + +int fdt_property_placeholder(void *fdt, const char *name, int len, void **valp) +{ + struct fdt_property *prop; + int nameoff; + int allocated; + + FDT_SW_PROBE_STRUCT(fdt); + + /* String de-duplication can be slow, _NO_NAME_DEDUP skips it */ + if (sw_flags(fdt) & FDT_CREATE_FLAG_NO_NAME_DEDUP) { + allocated = 1; + nameoff = fdt_add_string_(fdt, name); + } else { + nameoff = fdt_find_add_string_(fdt, name, &allocated); + } + if (nameoff == 0) + return -FDT_ERR_NOSPACE; + + prop = fdt_grab_space_(fdt, sizeof(*prop) + FDT_TAGALIGN(len)); + if (! prop) { + if (allocated) + fdt_del_last_string_(fdt, name); + return -FDT_ERR_NOSPACE; + } + + prop->tag = cpu_to_fdt32(FDT_PROP); + prop->nameoff = cpu_to_fdt32(nameoff); + prop->len = cpu_to_fdt32(len); + *valp = prop->data; + return 0; +} + +int fdt_property(void *fdt, const char *name, const void *val, int len) +{ + void *ptr; + int ret; + + ret = fdt_property_placeholder(fdt, name, len, &ptr); + if (ret) + return ret; + memcpy(ptr, val, len); + return 0; +} + +int fdt_finish(void *fdt) +{ + char *p = (char *)fdt; + fdt32_t *end; + int oldstroffset, newstroffset; + uint32_t tag; + int offset, nextoffset; + + FDT_SW_PROBE_STRUCT(fdt); + + /* Add terminator */ + end = fdt_grab_space_(fdt, sizeof(*end)); + if (! end) + return -FDT_ERR_NOSPACE; + *end = cpu_to_fdt32(FDT_END); + + /* Relocate the string table */ + oldstroffset = fdt_totalsize(fdt) - fdt_size_dt_strings(fdt); + newstroffset = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt); + memmove(p + newstroffset, p + oldstroffset, fdt_size_dt_strings(fdt)); + fdt_set_off_dt_strings(fdt, newstroffset); + + /* Walk the structure, correcting string offsets */ + offset = 0; + while ((tag = fdt_next_tag(fdt, offset, &nextoffset)) != FDT_END) { + if (tag == FDT_PROP) { + struct fdt_property *prop = + fdt_offset_ptr_w_(fdt, offset); + int nameoff; + + nameoff = fdt32_to_cpu(prop->nameoff); + nameoff += fdt_size_dt_strings(fdt); + prop->nameoff = cpu_to_fdt32(nameoff); + } + offset = nextoffset; + } + if (nextoffset < 0) + return nextoffset; + + /* Finally, adjust the header */ + fdt_set_totalsize(fdt, newstroffset + fdt_size_dt_strings(fdt)); + + /* And fix up fields that were keeping intermediate state. */ + fdt_set_last_comp_version(fdt, FDT_FIRST_SUPPORTED_VERSION); + fdt_set_magic(fdt, FDT_MAGIC); + + return 0; +} diff --git a/fdt/fdt_wip.c b/fdt/fdt_wip.c new file mode 100644 index 0000000000..f64139e0b3 --- /dev/null +++ b/fdt/fdt_wip.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include +#include + +#include "libfdt_internal.h" + +int fdt_setprop_inplace_namelen_partial(void *fdt, int nodeoffset, + const char *name, int namelen, + uint32_t idx, const void *val, + int len) +{ + void *propval; + int proplen; + + propval = fdt_getprop_namelen_w(fdt, nodeoffset, name, namelen, + &proplen); + if (!propval) + return proplen; + + if (proplen < (len + idx)) + return -FDT_ERR_NOSPACE; + + memcpy((char *)propval + idx, val, len); + return 0; +} + +int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + const void *propval; + int proplen; + + propval = fdt_getprop(fdt, nodeoffset, name, &proplen); + if (!propval) + return proplen; + + if (proplen != len) + return -FDT_ERR_NOSPACE; + + return fdt_setprop_inplace_namelen_partial(fdt, nodeoffset, name, + strlen(name), 0, + val, len); +} + +static void fdt_nop_region_(void *start, int len) +{ + fdt32_t *p; + + for (p = start; (char *)p < ((char *)start + len); p++) + *p = cpu_to_fdt32(FDT_NOP); +} + +int fdt_nop_property(void *fdt, int nodeoffset, const char *name) +{ + struct fdt_property *prop; + int len; + + prop = fdt_get_property_w(fdt, nodeoffset, name, &len); + if (!prop) + return len; + + fdt_nop_region_(prop, len + sizeof(*prop)); + + return 0; +} + +int fdt_node_end_offset_(void *fdt, int offset) +{ + int depth = 0; + + while ((offset >= 0) && (depth >= 0)) + offset = fdt_next_node(fdt, offset, &depth); + + return offset; +} + +int fdt_nop_node(void *fdt, int nodeoffset) +{ + int endoffset; + + endoffset = fdt_node_end_offset_(fdt, nodeoffset); + if (endoffset < 0) + return endoffset; + + fdt_nop_region_(fdt_offset_ptr_w(fdt, nodeoffset, 0), + endoffset - nodeoffset); + return 0; +} diff --git a/fdt/libfdt.h b/fdt/libfdt.h new file mode 100644 index 0000000000..d2356cce43 --- /dev/null +++ b/fdt/libfdt.h @@ -0,0 +1,2077 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ +#ifndef LIBFDT_H +#define LIBFDT_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ + +#include +#include + +#define FDT_FIRST_SUPPORTED_VERSION 0x02 +#define FDT_LAST_SUPPORTED_VERSION 0x11 + +/* Error codes: informative error codes */ +#define FDT_ERR_NOTFOUND 1 + /* FDT_ERR_NOTFOUND: The requested node or property does not exist */ +#define FDT_ERR_EXISTS 2 + /* FDT_ERR_EXISTS: Attempted to create a node or property which + * already exists */ +#define FDT_ERR_NOSPACE 3 + /* FDT_ERR_NOSPACE: Operation needed to expand the device + * tree, but its buffer did not have sufficient space to + * contain the expanded tree. Use fdt_open_into() to move the + * device tree to a buffer with more space. */ + +/* Error codes: codes for bad parameters */ +#define FDT_ERR_BADOFFSET 4 + /* FDT_ERR_BADOFFSET: Function was passed a structure block + * offset which is out-of-bounds, or which points to an + * unsuitable part of the structure for the operation. */ +#define FDT_ERR_BADPATH 5 + /* FDT_ERR_BADPATH: Function was passed a badly formatted path + * (e.g. missing a leading / for a function which requires an + * absolute path) */ +#define FDT_ERR_BADPHANDLE 6 + /* FDT_ERR_BADPHANDLE: Function was passed an invalid phandle. + * This can be caused either by an invalid phandle property + * length, or the phandle value was either 0 or -1, which are + * not permitted. */ +#define FDT_ERR_BADSTATE 7 + /* FDT_ERR_BADSTATE: Function was passed an incomplete device + * tree created by the sequential-write functions, which is + * not sufficiently complete for the requested operation. */ + +/* Error codes: codes for bad device tree blobs */ +#define FDT_ERR_TRUNCATED 8 + /* FDT_ERR_TRUNCATED: FDT or a sub-block is improperly + * terminated (overflows, goes outside allowed bounds, or + * isn't properly terminated). */ +#define FDT_ERR_BADMAGIC 9 + /* FDT_ERR_BADMAGIC: Given "device tree" appears not to be a + * device tree at all - it is missing the flattened device + * tree magic number. */ +#define FDT_ERR_BADVERSION 10 + /* FDT_ERR_BADVERSION: Given device tree has a version which + * can't be handled by the requested operation. For + * read-write functions, this may mean that fdt_open_into() is + * required to convert the tree to the expected version. */ +#define FDT_ERR_BADSTRUCTURE 11 + /* FDT_ERR_BADSTRUCTURE: Given device tree has a corrupt + * structure block or other serious error (e.g. misnested + * nodes, or subnodes preceding properties). */ +#define FDT_ERR_BADLAYOUT 12 + /* FDT_ERR_BADLAYOUT: For read-write functions, the given + * device tree has it's sub-blocks in an order that the + * function can't handle (memory reserve map, then structure, + * then strings). Use fdt_open_into() to reorganize the tree + * into a form suitable for the read-write operations. */ + +/* "Can't happen" error indicating a bug in libfdt */ +#define FDT_ERR_INTERNAL 13 + /* FDT_ERR_INTERNAL: libfdt has failed an internal assertion. + * Should never be returned, if it is, it indicates a bug in + * libfdt itself. */ + +/* Errors in device tree content */ +#define FDT_ERR_BADNCELLS 14 + /* FDT_ERR_BADNCELLS: Device tree has a #address-cells, #size-cells + * or similar property with a bad format or value */ + +#define FDT_ERR_BADVALUE 15 + /* FDT_ERR_BADVALUE: Device tree has a property with an unexpected + * value. For example: a property expected to contain a string list + * is not NUL-terminated within the length of its value. */ + +#define FDT_ERR_BADOVERLAY 16 + /* FDT_ERR_BADOVERLAY: The device tree overlay, while + * correctly structured, cannot be applied due to some + * unexpected or missing value, property or node. */ + +#define FDT_ERR_NOPHANDLES 17 + /* FDT_ERR_NOPHANDLES: The device tree doesn't have any + * phandle available anymore without causing an overflow */ + +#define FDT_ERR_BADFLAGS 18 + /* FDT_ERR_BADFLAGS: The function was passed a flags field that + * contains invalid flags or an invalid combination of flags. */ + +#define FDT_ERR_MAX 18 + +/* constants */ +#define FDT_MAX_PHANDLE 0xfffffffe + /* Valid values for phandles range from 1 to 2^32-2. */ + +#ifdef __cplusplus +extern "C" { +#endif +/**********************************************************************/ +/* Low-level functions (you probably don't need these) */ +/**********************************************************************/ + +#ifndef SWIG /* This function is not useful in Python */ +const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int checklen); +#endif +static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen) +{ + return (void *)(uintptr_t)fdt_offset_ptr(fdt, offset, checklen); +} + +uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset); + +/* + * Alignment helpers: + * These helpers access words from a device tree blob. They're + * built to work even with unaligned pointers on platforms (ike + * ARM) that don't like unaligned loads and stores + */ + +static inline uint32_t fdt32_ld(const fdt32_t *p) +{ + const uint8_t *bp = (const uint8_t *)p; + + return ((uint32_t)bp[0] << 24) + | ((uint32_t)bp[1] << 16) + | ((uint32_t)bp[2] << 8) + | bp[3]; +} + +static inline void fdt32_st(void *property, uint32_t value) +{ + uint8_t *bp = (uint8_t *)property; + + bp[0] = value >> 24; + bp[1] = (value >> 16) & 0xff; + bp[2] = (value >> 8) & 0xff; + bp[3] = value & 0xff; +} + +static inline uint64_t fdt64_ld(const fdt64_t *p) +{ + const uint8_t *bp = (const uint8_t *)p; + + return ((uint64_t)bp[0] << 56) + | ((uint64_t)bp[1] << 48) + | ((uint64_t)bp[2] << 40) + | ((uint64_t)bp[3] << 32) + | ((uint64_t)bp[4] << 24) + | ((uint64_t)bp[5] << 16) + | ((uint64_t)bp[6] << 8) + | bp[7]; +} + +static inline void fdt64_st(void *property, uint64_t value) +{ + uint8_t *bp = (uint8_t *)property; + + bp[0] = value >> 56; + bp[1] = (value >> 48) & 0xff; + bp[2] = (value >> 40) & 0xff; + bp[3] = (value >> 32) & 0xff; + bp[4] = (value >> 24) & 0xff; + bp[5] = (value >> 16) & 0xff; + bp[6] = (value >> 8) & 0xff; + bp[7] = value & 0xff; +} + +/**********************************************************************/ +/* Traversal functions */ +/**********************************************************************/ + +int fdt_next_node(const void *fdt, int offset, int *depth); + +/** + * fdt_first_subnode() - get offset of first direct subnode + * + * @fdt: FDT blob + * @offset: Offset of node to check + * @return offset of first subnode, or -FDT_ERR_NOTFOUND if there is none + */ +int fdt_first_subnode(const void *fdt, int offset); + +/** + * fdt_next_subnode() - get offset of next direct subnode + * + * After first calling fdt_first_subnode(), call this function repeatedly to + * get direct subnodes of a parent node. + * + * @fdt: FDT blob + * @offset: Offset of previous subnode + * @return offset of next subnode, or -FDT_ERR_NOTFOUND if there are no more + * subnodes + */ +int fdt_next_subnode(const void *fdt, int offset); + +/** + * fdt_for_each_subnode - iterate over all subnodes of a parent + * + * @node: child node (int, lvalue) + * @fdt: FDT blob (const void *) + * @parent: parent node (int) + * + * This is actually a wrapper around a for loop and would be used like so: + * + * fdt_for_each_subnode(node, fdt, parent) { + * Use node + * ... + * } + * + * if ((node < 0) && (node != -FDT_ERR_NOTFOUND)) { + * Error handling + * } + * + * Note that this is implemented as a macro and @node is used as + * iterator in the loop. The parent variable be constant or even a + * literal. + * + */ +#define fdt_for_each_subnode(node, fdt, parent) \ + for (node = fdt_first_subnode(fdt, parent); \ + node >= 0; \ + node = fdt_next_subnode(fdt, node)) + +/**********************************************************************/ +/* General functions */ +/**********************************************************************/ +#define fdt_get_header(fdt, field) \ + (fdt32_ld(&((const struct fdt_header *)(fdt))->field)) +#define fdt_magic(fdt) (fdt_get_header(fdt, magic)) +#define fdt_totalsize(fdt) (fdt_get_header(fdt, totalsize)) +#define fdt_off_dt_struct(fdt) (fdt_get_header(fdt, off_dt_struct)) +#define fdt_off_dt_strings(fdt) (fdt_get_header(fdt, off_dt_strings)) +#define fdt_off_mem_rsvmap(fdt) (fdt_get_header(fdt, off_mem_rsvmap)) +#define fdt_version(fdt) (fdt_get_header(fdt, version)) +#define fdt_last_comp_version(fdt) (fdt_get_header(fdt, last_comp_version)) +#define fdt_boot_cpuid_phys(fdt) (fdt_get_header(fdt, boot_cpuid_phys)) +#define fdt_size_dt_strings(fdt) (fdt_get_header(fdt, size_dt_strings)) +#define fdt_size_dt_struct(fdt) (fdt_get_header(fdt, size_dt_struct)) + +#define fdt_set_hdr_(name) \ + static inline void fdt_set_##name(void *fdt, uint32_t val) \ + { \ + struct fdt_header *fdth = (struct fdt_header *)fdt; \ + fdth->name = cpu_to_fdt32(val); \ + } +fdt_set_hdr_(magic); +fdt_set_hdr_(totalsize); +fdt_set_hdr_(off_dt_struct); +fdt_set_hdr_(off_dt_strings); +fdt_set_hdr_(off_mem_rsvmap); +fdt_set_hdr_(version); +fdt_set_hdr_(last_comp_version); +fdt_set_hdr_(boot_cpuid_phys); +fdt_set_hdr_(size_dt_strings); +fdt_set_hdr_(size_dt_struct); +#undef fdt_set_hdr_ + +/** + * fdt_header_size - return the size of the tree's header + * @fdt: pointer to a flattened device tree + */ +size_t fdt_header_size_(uint32_t version); +static inline size_t fdt_header_size(const void *fdt) +{ + return fdt_header_size_(fdt_version(fdt)); +} + +/** + * fdt_check_header - sanity check a device tree header + + * @fdt: pointer to data which might be a flattened device tree + * + * fdt_check_header() checks that the given buffer contains what + * appears to be a flattened device tree, and that the header contains + * valid information (to the extent that can be determined from the + * header alone). + * + * returns: + * 0, if the buffer appears to contain a valid device tree + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_TRUNCATED, standard meanings, as above + */ +int fdt_check_header(const void *fdt); + +/** + * fdt_move - move a device tree around in memory + * @fdt: pointer to the device tree to move + * @buf: pointer to memory where the device is to be moved + * @bufsize: size of the memory space at buf + * + * fdt_move() relocates, if possible, the device tree blob located at + * fdt to the buffer at buf of size bufsize. The buffer may overlap + * with the existing device tree blob at fdt. Therefore, + * fdt_move(fdt, fdt, fdt_totalsize(fdt)) + * should always succeed. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, bufsize is insufficient to contain the device tree + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_move(const void *fdt, void *buf, int bufsize); + +/**********************************************************************/ +/* Read-only functions */ +/**********************************************************************/ + +int fdt_check_full(const void *fdt, size_t bufsize); + +/** + * fdt_get_string - retrieve a string from the strings block of a device tree + * @fdt: pointer to the device tree blob + * @stroffset: offset of the string within the strings block (native endian) + * @lenp: optional pointer to return the string's length + * + * fdt_get_string() retrieves a pointer to a single string from the + * strings block of the device tree blob at fdt, and optionally also + * returns the string's length in *lenp. + * + * returns: + * a pointer to the string, on success + * NULL, if stroffset is out of bounds, or doesn't point to a valid string + */ +const char *fdt_get_string(const void *fdt, int stroffset, int *lenp); + +/** + * fdt_string - retrieve a string from the strings block of a device tree + * @fdt: pointer to the device tree blob + * @stroffset: offset of the string within the strings block (native endian) + * + * fdt_string() retrieves a pointer to a single string from the + * strings block of the device tree blob at fdt. + * + * returns: + * a pointer to the string, on success + * NULL, if stroffset is out of bounds, or doesn't point to a valid string + */ +const char *fdt_string(const void *fdt, int stroffset); + +/** + * fdt_find_max_phandle - find and return the highest phandle in a tree + * @fdt: pointer to the device tree blob + * @phandle: return location for the highest phandle value found in the tree + * + * fdt_find_max_phandle() finds the highest phandle value in the given device + * tree. The value returned in @phandle is only valid if the function returns + * success. + * + * returns: + * 0 on success or a negative error code on failure + */ +int fdt_find_max_phandle(const void *fdt, uint32_t *phandle); + +/** + * fdt_get_max_phandle - retrieves the highest phandle in a tree + * @fdt: pointer to the device tree blob + * + * fdt_get_max_phandle retrieves the highest phandle in the given + * device tree. This will ignore badly formatted phandles, or phandles + * with a value of 0 or -1. + * + * This function is deprecated in favour of fdt_find_max_phandle(). + * + * returns: + * the highest phandle on success + * 0, if no phandle was found in the device tree + * -1, if an error occurred + */ +static inline uint32_t fdt_get_max_phandle(const void *fdt) +{ + uint32_t phandle; + int err; + + err = fdt_find_max_phandle(fdt, &phandle); + if (err < 0) + return (uint32_t)-1; + + return phandle; +} + +/** + * fdt_generate_phandle - return a new, unused phandle for a device tree blob + * @fdt: pointer to the device tree blob + * @phandle: return location for the new phandle + * + * Walks the device tree blob and looks for the highest phandle value. On + * success, the new, unused phandle value (one higher than the previously + * highest phandle value in the device tree blob) will be returned in the + * @phandle parameter. + * + * Returns: + * 0 on success or a negative error-code on failure + */ +int fdt_generate_phandle(const void *fdt, uint32_t *phandle); + +/** + * fdt_num_mem_rsv - retrieve the number of memory reserve map entries + * @fdt: pointer to the device tree blob + * + * Returns the number of entries in the device tree blob's memory + * reservation map. This does not include the terminating 0,0 entry + * or any other (0,0) entries reserved for expansion. + * + * returns: + * the number of entries + */ +int fdt_num_mem_rsv(const void *fdt); + +/** + * fdt_get_mem_rsv - retrieve one memory reserve map entry + * @fdt: pointer to the device tree blob + * @address, @size: pointers to 64-bit variables + * + * On success, *address and *size will contain the address and size of + * the n-th reserve map entry from the device tree blob, in + * native-endian format. + * + * returns: + * 0, on success + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size); + +/** + * fdt_subnode_offset_namelen - find a subnode based on substring + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * @namelen: number of characters of name to consider + * + * Identical to fdt_subnode_offset(), but only examine the first + * namelen characters of name for matching the subnode name. This is + * useful for finding subnodes based on a portion of a larger string, + * such as a full path. + */ +#ifndef SWIG /* Not available in Python */ +int fdt_subnode_offset_namelen(const void *fdt, int parentoffset, + const char *name, int namelen); +#endif +/** + * fdt_subnode_offset - find a subnode of a given node + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * + * fdt_subnode_offset() finds a subnode of the node at structure block + * offset parentoffset with the given name. name may include a unit + * address, in which case fdt_subnode_offset() will find the subnode + * with that unit address, or the unit address may be omitted, in + * which case fdt_subnode_offset() will find an arbitrary subnode + * whose name excluding unit address matches the given name. + * + * returns: + * structure block offset of the requested subnode (>=0), on success + * -FDT_ERR_NOTFOUND, if the requested subnode does not exist + * -FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE + * tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_subnode_offset(const void *fdt, int parentoffset, const char *name); + +/** + * fdt_path_offset_namelen - find a tree node by its full path + * @fdt: pointer to the device tree blob + * @path: full path of the node to locate + * @namelen: number of characters of path to consider + * + * Identical to fdt_path_offset(), but only consider the first namelen + * characters of path as the path name. + */ +#ifndef SWIG /* Not available in Python */ +int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen); +#endif + +/** + * fdt_path_offset - find a tree node by its full path + * @fdt: pointer to the device tree blob + * @path: full path of the node to locate + * + * fdt_path_offset() finds a node of a given path in the device tree. + * Each path component may omit the unit address portion, but the + * results of this are undefined if any such path component is + * ambiguous (that is if there are multiple nodes at the relevant + * level matching the given component, differentiated only by unit + * address). + * + * returns: + * structure block offset of the node with the requested path (>=0), on + * success + * -FDT_ERR_BADPATH, given path does not begin with '/' or is invalid + * -FDT_ERR_NOTFOUND, if the requested node does not exist + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_path_offset(const void *fdt, const char *path); + +/** + * fdt_get_name - retrieve the name of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of the starting node + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_name() retrieves the name (including unit address) of the + * device tree node at structure block offset nodeoffset. If lenp is + * non-NULL, the length of this name is also returned, in the integer + * pointed to by lenp. + * + * returns: + * pointer to the node's name, on success + * If lenp is non-NULL, *lenp contains the length of that name + * (>=0) + * NULL, on error + * if lenp is non-NULL *lenp contains an error code (<0): + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE + * tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +const char *fdt_get_name(const void *fdt, int nodeoffset, int *lenp); + +/** + * fdt_first_property_offset - find the offset of a node's first property + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of a node + * + * fdt_first_property_offset() finds the first property of the node at + * the given structure block offset. + * + * returns: + * structure block offset of the property (>=0), on success + * -FDT_ERR_NOTFOUND, if the requested node has no properties + * -FDT_ERR_BADOFFSET, if nodeoffset did not point to an FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_first_property_offset(const void *fdt, int nodeoffset); + +/** + * fdt_next_property_offset - step through a node's properties + * @fdt: pointer to the device tree blob + * @offset: structure block offset of a property + * + * fdt_next_property_offset() finds the property immediately after the + * one at the given structure block offset. This will be a property + * of the same node as the given property. + * + * returns: + * structure block offset of the next property (>=0), on success + * -FDT_ERR_NOTFOUND, if the given property is the last in its node + * -FDT_ERR_BADOFFSET, if nodeoffset did not point to an FDT_PROP tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_next_property_offset(const void *fdt, int offset); + +/** + * fdt_for_each_property_offset - iterate over all properties of a node + * + * @property_offset: property offset (int, lvalue) + * @fdt: FDT blob (const void *) + * @node: node offset (int) + * + * This is actually a wrapper around a for loop and would be used like so: + * + * fdt_for_each_property_offset(property, fdt, node) { + * Use property + * ... + * } + * + * if ((property < 0) && (property != -FDT_ERR_NOTFOUND)) { + * Error handling + * } + * + * Note that this is implemented as a macro and property is used as + * iterator in the loop. The node variable can be constant or even a + * literal. + */ +#define fdt_for_each_property_offset(property, fdt, node) \ + for (property = fdt_first_property_offset(fdt, node); \ + property >= 0; \ + property = fdt_next_property_offset(fdt, property)) + +/** + * fdt_get_property_by_offset - retrieve the property at a given offset + * @fdt: pointer to the device tree blob + * @offset: offset of the property to retrieve + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_property_by_offset() retrieves a pointer to the + * fdt_property structure within the device tree blob at the given + * offset. If lenp is non-NULL, the length of the property value is + * also returned, in the integer pointed to by lenp. + * + * Note that this code only works on device tree versions >= 16. fdt_getprop() + * works on all versions. + * + * returns: + * pointer to the structure representing the property + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_PROP tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const struct fdt_property *fdt_get_property_by_offset(const void *fdt, + int offset, + int *lenp); + +/** + * fdt_get_property_namelen - find a property based on substring + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @namelen: number of characters of name to consider + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * Identical to fdt_get_property(), but only examine the first namelen + * characters of name for matching the property name. + */ +#ifndef SWIG /* Not available in Python */ +const struct fdt_property *fdt_get_property_namelen(const void *fdt, + int nodeoffset, + const char *name, + int namelen, int *lenp); +#endif + +/** + * fdt_get_property - find a given property in a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_property() retrieves a pointer to the fdt_property + * structure within the device tree blob corresponding to the property + * named 'name' of the node at offset nodeoffset. If lenp is + * non-NULL, the length of the property value is also returned, in the + * integer pointed to by lenp. + * + * returns: + * pointer to the structure representing the property + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_NOTFOUND, node does not have named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE + * tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const struct fdt_property *fdt_get_property(const void *fdt, int nodeoffset, + const char *name, int *lenp); +static inline struct fdt_property *fdt_get_property_w(void *fdt, int nodeoffset, + const char *name, + int *lenp) +{ + return (struct fdt_property *)(uintptr_t) + fdt_get_property(fdt, nodeoffset, name, lenp); +} + +/** + * fdt_getprop_by_offset - retrieve the value of a property at a given offset + * @fdt: pointer to the device tree blob + * @offset: offset of the property to read + * @namep: pointer to a string variable (will be overwritten) or NULL + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_getprop_by_offset() retrieves a pointer to the value of the + * property at structure block offset 'offset' (this will be a pointer + * to within the device blob itself, not a copy of the value). If + * lenp is non-NULL, the length of the property value is also + * returned, in the integer pointed to by lenp. If namep is non-NULL, + * the property's namne will also be returned in the char * pointed to + * by namep (this will be a pointer to within the device tree's string + * block, not a new copy of the name). + * + * returns: + * pointer to the property's value + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * if namep is non-NULL *namep contiains a pointer to the property + * name. + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_PROP tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#ifndef SWIG /* This function is not useful in Python */ +const void *fdt_getprop_by_offset(const void *fdt, int offset, + const char **namep, int *lenp); +#endif + +/** + * fdt_getprop_namelen - get property value based on substring + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @namelen: number of characters of name to consider + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * Identical to fdt_getprop(), but only examine the first namelen + * characters of name for matching the property name. + */ +#ifndef SWIG /* Not available in Python */ +const void *fdt_getprop_namelen(const void *fdt, int nodeoffset, + const char *name, int namelen, int *lenp); +static inline void *fdt_getprop_namelen_w(void *fdt, int nodeoffset, + const char *name, int namelen, + int *lenp) +{ + return (void *)(uintptr_t)fdt_getprop_namelen(fdt, nodeoffset, name, + namelen, lenp); +} +#endif + +/** + * fdt_getprop - retrieve the value of a given property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_getprop() retrieves a pointer to the value of the property + * named 'name' of the node at offset nodeoffset (this will be a + * pointer to within the device blob itself, not a copy of the value). + * If lenp is non-NULL, the length of the property value is also + * returned, in the integer pointed to by lenp. + * + * returns: + * pointer to the property's value + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_NOTFOUND, node does not have named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE + * tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const void *fdt_getprop(const void *fdt, int nodeoffset, + const char *name, int *lenp); +static inline void *fdt_getprop_w(void *fdt, int nodeoffset, + const char *name, int *lenp) +{ + return (void *)(uintptr_t)fdt_getprop(fdt, nodeoffset, name, lenp); +} + +/** + * fdt_get_phandle - retrieve the phandle of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of the node + * + * fdt_get_phandle() retrieves the phandle of the device tree node at + * structure block offset nodeoffset. + * + * returns: + * the phandle of the node at nodeoffset, on success (!= 0, != -1) + * 0, if the node has no phandle, or another error occurs + */ +uint32_t fdt_get_phandle(const void *fdt, int nodeoffset); + +/** + * fdt_get_alias_namelen - get alias based on substring + * @fdt: pointer to the device tree blob + * @name: name of the alias th look up + * @namelen: number of characters of name to consider + * + * Identical to fdt_get_alias(), but only examine the first namelen + * characters of name for matching the alias name. + */ +#ifndef SWIG /* Not available in Python */ +const char *fdt_get_alias_namelen(const void *fdt, + const char *name, int namelen); +#endif + +/** + * fdt_get_alias - retrieve the path referenced by a given alias + * @fdt: pointer to the device tree blob + * @name: name of the alias th look up + * + * fdt_get_alias() retrieves the value of a given alias. That is, the + * value of the property named 'name' in the node /aliases. + * + * returns: + * a pointer to the expansion of the alias named 'name', if it exists + * NULL, if the given alias or the /aliases node does not exist + */ +const char *fdt_get_alias(const void *fdt, const char *name); + +/** + * fdt_get_path - determine the full path of a node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose path to find + * @buf: character buffer to contain the returned path (will be overwritten) + * @buflen: size of the character buffer at buf + * + * fdt_get_path() computes the full path of the node at offset + * nodeoffset, and records that path in the buffer at buf. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + * 0, on success + * buf contains the absolute path of the node at + * nodeoffset, as a NUL-terminated string. + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_NOSPACE, the path of the given node is longer than (bufsize-1) + * characters and will not fit in the given buffer. + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen); + +/** + * fdt_supernode_atdepth_offset - find a specific ancestor of a node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * @supernodedepth: depth of the ancestor to find + * @nodedepth: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_supernode_atdepth_offset() finds an ancestor of the given node + * at a specific depth from the root (where the root itself has depth + * 0, its immediate subnodes depth 1 and so forth). So + * fdt_supernode_atdepth_offset(fdt, nodeoffset, 0, NULL); + * will always return 0, the offset of the root node. If the node at + * nodeoffset has depth D, then: + * fdt_supernode_atdepth_offset(fdt, nodeoffset, D, NULL); + * will return nodeoffset itself. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + * structure block offset of the node at node offset's ancestor + * of depth supernodedepth (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_NOTFOUND, supernodedepth was greater than the depth of + * nodeoffset + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset, + int supernodedepth, int *nodedepth); + +/** + * fdt_node_depth - find the depth of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * + * fdt_node_depth() finds the depth of a given node. The root node + * has depth 0, its immediate subnodes depth 1 and so forth. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + * depth of the node at nodeoffset (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_depth(const void *fdt, int nodeoffset); + +/** + * fdt_parent_offset - find the parent of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * + * fdt_parent_offset() locates the parent node of a given node (that + * is, it finds the offset of the node which contains the node at + * nodeoffset as a subnode). + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset, *twice*. + * + * returns: + * structure block offset of the parent of the node at nodeoffset + * (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_parent_offset(const void *fdt, int nodeoffset); + +/** + * fdt_node_offset_by_prop_value - find nodes with a given property value + * @fdt: pointer to the device tree blob + * @startoffset: only find nodes after this offset + * @propname: property name to check + * @propval: property value to search for + * @proplen: length of the value in propval + * + * fdt_node_offset_by_prop_value() returns the offset of the first + * node after startoffset, which has a property named propname whose + * value is of length proplen and has value equal to propval; or if + * startoffset is -1, the very first such node in the tree. + * + * To iterate through all nodes matching the criterion, the following + * idiom can be used: + * offset = fdt_node_offset_by_prop_value(fdt, -1, propname, + * propval, proplen); + * while (offset != -FDT_ERR_NOTFOUND) { + * // other code here + * offset = fdt_node_offset_by_prop_value(fdt, offset, propname, + * propval, proplen); + * } + * + * Note the -1 in the first call to the function, if 0 is used here + * instead, the function will never locate the root node, even if it + * matches the criterion. + * + * returns: + * structure block offset of the located node (>= 0, >startoffset), + * on success + * -FDT_ERR_NOTFOUND, no node matching the criterion exists in the + * tree after startoffset + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_prop_value(const void *fdt, int startoffset, + const char *propname, + const void *propval, int proplen); + +/** + * fdt_node_offset_by_phandle - find the node with a given phandle + * @fdt: pointer to the device tree blob + * @phandle: phandle value + * + * fdt_node_offset_by_phandle() returns the offset of the node + * which has the given phandle value. If there is more than one node + * in the tree with the given phandle (an invalid tree), results are + * undefined. + * + * returns: + * structure block offset of the located node (>= 0), on success + * -FDT_ERR_NOTFOUND, no node with that phandle exists + * -FDT_ERR_BADPHANDLE, given phandle value was invalid (0 or -1) + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle); + +/** + * fdt_node_check_compatible: check a node's compatible property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @compatible: string to match against + * + * + * fdt_node_check_compatible() returns 0 if the given node contains a + * 'compatible' property with the given string as one of its elements, + * it returns non-zero otherwise, or on error. + * + * returns: + * 0, if the node has a 'compatible' property listing the given string + * 1, if the node has a 'compatible' property, but it does not list + * the given string + * -FDT_ERR_NOTFOUND, if the given node has no 'compatible' property + * -FDT_ERR_BADOFFSET, if nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_check_compatible(const void *fdt, int nodeoffset, + const char *compatible); + +/** + * fdt_node_offset_by_compatible - find nodes with a given 'compatible' value + * @fdt: pointer to the device tree blob + * @startoffset: only find nodes after this offset + * @compatible: 'compatible' string to match against + * + * fdt_node_offset_by_compatible() returns the offset of the first + * node after startoffset, which has a 'compatible' property which + * lists the given compatible string; or if startoffset is -1, the + * very first such node in the tree. + * + * To iterate through all nodes matching the criterion, the following + * idiom can be used: + * offset = fdt_node_offset_by_compatible(fdt, -1, compatible); + * while (offset != -FDT_ERR_NOTFOUND) { + * // other code here + * offset = fdt_node_offset_by_compatible(fdt, offset, compatible); + * } + * + * Note the -1 in the first call to the function, if 0 is used here + * instead, the function will never locate the root node, even if it + * matches the criterion. + * + * returns: + * structure block offset of the located node (>= 0, >startoffset), + * on success + * -FDT_ERR_NOTFOUND, no node matching the criterion exists in the + * tree after startoffset + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_compatible(const void *fdt, int startoffset, + const char *compatible); + +/** + * fdt_stringlist_contains - check a string list property for a string + * @strlist: Property containing a list of strings to check + * @listlen: Length of property + * @str: String to search for + * + * This is a utility function provided for convenience. The list contains + * one or more strings, each terminated by \0, as is found in a device tree + * "compatible" property. + * + * @return: 1 if the string is found in the list, 0 not found, or invalid list + */ +int fdt_stringlist_contains(const char *strlist, int listlen, const char *str); + +/** + * fdt_stringlist_count - count the number of strings in a string list + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @property: name of the property containing the string list + * @return: + * the number of strings in the given property + * -FDT_ERR_BADVALUE if the property value is not NUL-terminated + * -FDT_ERR_NOTFOUND if the property does not exist + */ +int fdt_stringlist_count(const void *fdt, int nodeoffset, const char *property); + +/** + * fdt_stringlist_search - find a string in a string list and return its index + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @property: name of the property containing the string list + * @string: string to look up in the string list + * + * Note that it is possible for this function to succeed on property values + * that are not NUL-terminated. That's because the function will stop after + * finding the first occurrence of @string. This can for example happen with + * small-valued cell properties, such as #address-cells, when searching for + * the empty string. + * + * @return: + * the index of the string in the list of strings + * -FDT_ERR_BADVALUE if the property value is not NUL-terminated + * -FDT_ERR_NOTFOUND if the property does not exist or does not contain + * the given string + */ +int fdt_stringlist_search(const void *fdt, int nodeoffset, const char *property, + const char *string); + +/** + * fdt_stringlist_get() - obtain the string at a given index in a string list + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @property: name of the property containing the string list + * @index: index of the string to return + * @lenp: return location for the string length or an error code on failure + * + * Note that this will successfully extract strings from properties with + * non-NUL-terminated values. For example on small-valued cell properties + * this function will return the empty string. + * + * If non-NULL, the length of the string (on success) or a negative error-code + * (on failure) will be stored in the integer pointer to by lenp. + * + * @return: + * A pointer to the string at the given index in the string list or NULL on + * failure. On success the length of the string will be stored in the memory + * location pointed to by the lenp parameter, if non-NULL. On failure one of + * the following negative error codes will be returned in the lenp parameter + * (if non-NULL): + * -FDT_ERR_BADVALUE if the property value is not NUL-terminated + * -FDT_ERR_NOTFOUND if the property does not exist + */ +const char *fdt_stringlist_get(const void *fdt, int nodeoffset, + const char *property, int index, + int *lenp); + +/**********************************************************************/ +/* Read-only functions (addressing related) */ +/**********************************************************************/ + +/** + * FDT_MAX_NCELLS - maximum value for #address-cells and #size-cells + * + * This is the maximum value for #address-cells, #size-cells and + * similar properties that will be processed by libfdt. IEE1275 + * requires that OF implementations handle values up to 4. + * Implementations may support larger values, but in practice higher + * values aren't used. + */ +#define FDT_MAX_NCELLS 4 + +/** + * fdt_address_cells - retrieve address size for a bus represented in the tree + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to find the address size for + * + * When the node has a valid #address-cells property, returns its value. + * + * returns: + * 0 <= n < FDT_MAX_NCELLS, on success + * 2, if the node has no #address-cells property + * -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid + * #address-cells property + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_address_cells(const void *fdt, int nodeoffset); + +/** + * fdt_size_cells - retrieve address range size for a bus represented in the + * tree + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to find the address range size for + * + * When the node has a valid #size-cells property, returns its value. + * + * returns: + * 0 <= n < FDT_MAX_NCELLS, on success + * 1, if the node has no #size-cells property + * -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid + * #size-cells property + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_size_cells(const void *fdt, int nodeoffset); + + +/**********************************************************************/ +/* Write-in-place functions */ +/**********************************************************************/ + +/** + * fdt_setprop_inplace_namelen_partial - change a property's value, + * but not its size + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @namelen: number of characters of name to consider + * @idx: index of the property to change in the array + * @val: pointer to data to replace the property value with + * @len: length of the property value + * + * Identical to fdt_setprop_inplace(), but modifies the given property + * starting from the given index, and using only the first characters + * of the name. It is useful when you want to manipulate only one value of + * an array and you have a string that doesn't end with \0. + */ +#ifndef SWIG /* Not available in Python */ +int fdt_setprop_inplace_namelen_partial(void *fdt, int nodeoffset, + const char *name, int namelen, + uint32_t idx, const void *val, + int len); +#endif + +/** + * fdt_setprop_inplace - change a property's value, but not its size + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: pointer to data to replace the property value with + * @len: length of the property value + * + * fdt_setprop_inplace() replaces the value of a given property with + * the data in val, of length len. This function cannot change the + * size of a property, and so will only work if len is equal to the + * current length of the property. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if len is not equal to the property's current length + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#ifndef SWIG /* Not available in Python */ +int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name, + const void *val, int len); +#endif + +/** + * fdt_setprop_inplace_u32 - change the value of a 32-bit integer property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 32-bit integer value to replace the property with + * + * fdt_setprop_inplace_u32() replaces the value of a given property + * with the 32-bit integer value in val, converting val to big-endian + * if necessary. This function cannot change the size of a property, + * and so will only work if the property already exists and has length + * 4. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if the property's length is not equal to 4 + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_inplace_u32(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_setprop_inplace(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_inplace_u64 - change the value of a 64-bit integer property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 64-bit integer value to replace the property with + * + * fdt_setprop_inplace_u64() replaces the value of a given property + * with the 64-bit integer value in val, converting val to big-endian + * if necessary. This function cannot change the size of a property, + * and so will only work if the property already exists and has length + * 8. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if the property's length is not equal to 8 + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_inplace_u64(void *fdt, int nodeoffset, + const char *name, uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_setprop_inplace(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_inplace_cell - change the value of a single-cell property + * + * This is an alternative name for fdt_setprop_inplace_u32() + */ +static inline int fdt_setprop_inplace_cell(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + return fdt_setprop_inplace_u32(fdt, nodeoffset, name, val); +} + +/** + * fdt_nop_property - replace a property with nop tags + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to nop + * @name: name of the property to nop + * + * fdt_nop_property() will replace a given property's representation + * in the blob with FDT_NOP tags, effectively removing it from the + * tree. + * + * This function will alter only the bytes in the blob which contain + * the property, and will not alter or move any other part of the + * tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_nop_property(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_nop_node - replace a node (subtree) with nop tags + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to nop + * + * fdt_nop_node() will replace a given node's representation in the + * blob, including all its subnodes, if any, with FDT_NOP tags, + * effectively removing it from the tree. + * + * This function will alter only the bytes in the blob which contain + * the node and its properties and subnodes, and will not alter or + * move any other part of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_nop_node(void *fdt, int nodeoffset); + +/**********************************************************************/ +/* Sequential write functions */ +/**********************************************************************/ + +/* fdt_create_with_flags flags */ +#define FDT_CREATE_FLAG_NO_NAME_DEDUP 0x1 + /* FDT_CREATE_FLAG_NO_NAME_DEDUP: Do not try to de-duplicate property + * names in the fdt. This can result in faster creation times, but + * a larger fdt. */ + +#define FDT_CREATE_FLAGS_ALL (FDT_CREATE_FLAG_NO_NAME_DEDUP) + +/** + * fdt_create_with_flags - begin creation of a new fdt + * @fdt: pointer to memory allocated where fdt will be created + * @bufsize: size of the memory space at fdt + * @flags: a valid combination of FDT_CREATE_FLAG_ flags, or 0. + * + * fdt_create_with_flags() begins the process of creating a new fdt with + * the sequential write interface. + * + * fdt creation process must end with fdt_finished() to produce a valid fdt. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, bufsize is insufficient for a minimal fdt + * -FDT_ERR_BADFLAGS, flags is not valid + */ +int fdt_create_with_flags(void *buf, int bufsize, uint32_t flags); + +/** + * fdt_create - begin creation of a new fdt + * @fdt: pointer to memory allocated where fdt will be created + * @bufsize: size of the memory space at fdt + * + * fdt_create() is equivalent to fdt_create_with_flags() with flags=0. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, bufsize is insufficient for a minimal fdt + */ +int fdt_create(void *buf, int bufsize); + +int fdt_resize(void *fdt, void *buf, int bufsize); +int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size); +int fdt_finish_reservemap(void *fdt); +int fdt_begin_node(void *fdt, const char *name); +int fdt_property(void *fdt, const char *name, const void *val, int len); +static inline int fdt_property_u32(void *fdt, const char *name, uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_property(fdt, name, &tmp, sizeof(tmp)); +} +static inline int fdt_property_u64(void *fdt, const char *name, uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_property(fdt, name, &tmp, sizeof(tmp)); +} + +#ifndef SWIG /* Not available in Python */ +static inline int fdt_property_cell(void *fdt, const char *name, uint32_t val) +{ + return fdt_property_u32(fdt, name, val); +} +#endif + +/** + * fdt_property_placeholder - add a new property and return a ptr to its value + * + * @fdt: pointer to the device tree blob + * @name: name of property to add + * @len: length of property value in bytes + * @valp: returns a pointer to where where the value should be placed + * + * returns: + * 0, on success + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_NOSPACE, standard meanings + */ +int fdt_property_placeholder(void *fdt, const char *name, int len, void **valp); + +#define fdt_property_string(fdt, name, str) \ + fdt_property(fdt, name, str, strlen(str)+1) +int fdt_end_node(void *fdt); +int fdt_finish(void *fdt); + +/**********************************************************************/ +/* Read-write functions */ +/**********************************************************************/ + +int fdt_create_empty_tree(void *buf, int bufsize); +int fdt_open_into(const void *fdt, void *buf, int bufsize); +int fdt_pack(void *fdt); + +/** + * fdt_add_mem_rsv - add one memory reserve map entry + * @fdt: pointer to the device tree blob + * @address, @size: 64-bit values (native endian) + * + * Adds a reserve map entry to the given blob reserving a region at + * address address of length size. + * + * This function will insert data into the reserve map and will + * therefore change the indexes of some entries in the table. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new reservation entry + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size); + +/** + * fdt_del_mem_rsv - remove a memory reserve map entry + * @fdt: pointer to the device tree blob + * @n: entry to remove + * + * fdt_del_mem_rsv() removes the n-th memory reserve map entry from + * the blob. + * + * This function will delete data from the reservation table and will + * therefore change the indexes of some entries in the table. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, there is no entry of the given index (i.e. there + * are less than n+1 reserve map entries) + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_del_mem_rsv(void *fdt, int n); + +/** + * fdt_set_name - change the name of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of a node + * @name: name to give the node + * + * fdt_set_name() replaces the name (including unit address, if any) + * of the given node with the given string. NOTE: this function can't + * efficiently check if the new name is unique amongst the given + * node's siblings; results are undefined if this function is invoked + * with a name equal to one of the given node's siblings. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob + * to contain the new name + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_set_name(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_setprop - create or change a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: pointer to data to set the property value to + * @len: length of the property value + * + * fdt_setprop() sets the value of the named property in the given + * node to the given value and length, creating the property if it + * does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_setprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len); + +/** + * fdt_setprop_placeholder - allocate space for a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @len: length of the property value + * @prop_data: return pointer to property data + * + * fdt_setprop_placeholer() allocates the named property in the given node. + * If the property exists it is resized. In either case a pointer to the + * property data is returned. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_setprop_placeholder(void *fdt, int nodeoffset, const char *name, + int len, void **prop_data); + +/** + * fdt_setprop_u32 - set a property to a 32-bit integer + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 32-bit integer value for the property (native endian) + * + * fdt_setprop_u32() sets the value of the named property in the given + * node to the given 32-bit integer value (converting to big-endian if + * necessary), or creates a new property with that value if it does + * not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_u32(void *fdt, int nodeoffset, const char *name, + uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_setprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_u64 - set a property to a 64-bit integer + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 64-bit integer value for the property (native endian) + * + * fdt_setprop_u64() sets the value of the named property in the given + * node to the given 64-bit integer value (converting to big-endian if + * necessary), or creates a new property with that value if it does + * not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_u64(void *fdt, int nodeoffset, const char *name, + uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_setprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_cell - set a property to a single cell value + * + * This is an alternative name for fdt_setprop_u32() + */ +static inline int fdt_setprop_cell(void *fdt, int nodeoffset, const char *name, + uint32_t val) +{ + return fdt_setprop_u32(fdt, nodeoffset, name, val); +} + +/** + * fdt_setprop_string - set a property to a string value + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @str: string value for the property + * + * fdt_setprop_string() sets the value of the named property in the + * given node to the given string value (using the length of the + * string to determine the new length of the property), or creates a + * new property with that value if it does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#define fdt_setprop_string(fdt, nodeoffset, name, str) \ + fdt_setprop((fdt), (nodeoffset), (name), (str), strlen(str)+1) + + +/** + * fdt_setprop_empty - set a property to an empty value + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * + * fdt_setprop_empty() sets the value of the named property in the + * given node to an empty (zero length) value, or creates a new empty + * property if it does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#define fdt_setprop_empty(fdt, nodeoffset, name) \ + fdt_setprop((fdt), (nodeoffset), (name), NULL, 0) + +/** + * fdt_appendprop - append to or create a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to append to + * @val: pointer to data to append to the property value + * @len: length of the data to append to the property value + * + * fdt_appendprop() appends the value to the named property in the + * given node, creating the property if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_appendprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len); + +/** + * fdt_appendprop_u32 - append a 32-bit integer value to a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 32-bit integer value to append to the property (native endian) + * + * fdt_appendprop_u32() appends the given 32-bit integer value + * (converting to big-endian if necessary) to the value of the named + * property in the given node, or creates a new property with that + * value if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_appendprop_u32(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_appendprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_appendprop_u64 - append a 64-bit integer value to a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 64-bit integer value to append to the property (native endian) + * + * fdt_appendprop_u64() appends the given 64-bit integer value + * (converting to big-endian if necessary) to the value of the named + * property in the given node, or creates a new property with that + * value if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_appendprop_u64(void *fdt, int nodeoffset, + const char *name, uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_appendprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_appendprop_cell - append a single cell value to a property + * + * This is an alternative name for fdt_appendprop_u32() + */ +static inline int fdt_appendprop_cell(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + return fdt_appendprop_u32(fdt, nodeoffset, name, val); +} + +/** + * fdt_appendprop_string - append a string to a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @str: string value to append to the property + * + * fdt_appendprop_string() appends the given string to the value of + * the named property in the given node, or creates a new property + * with that value if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#define fdt_appendprop_string(fdt, nodeoffset, name, str) \ + fdt_appendprop((fdt), (nodeoffset), (name), (str), strlen(str)+1) + +/** + * fdt_appendprop_addrrange - append a address range property + * @fdt: pointer to the device tree blob + * @parent: offset of the parent node + * @nodeoffset: offset of the node to add a property at + * @name: name of property + * @addr: start address of a given range + * @size: size of a given range + * + * fdt_appendprop_addrrange() appends an address range value (start + * address and size) to the value of the named property in the given + * node, or creates a new property with that value if it does not + * already exist. + * If "name" is not specified, a default "reg" is used. + * Cell sizes are determined by parent's #address-cells and #size-cells. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid + * #address-cells property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADVALUE, addr or size doesn't fit to respective cells size + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain a new property + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_appendprop_addrrange(void *fdt, int parent, int nodeoffset, + const char *name, uint64_t addr, uint64_t size); + +/** + * fdt_delprop - delete a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to nop + * @name: name of the property to nop + * + * fdt_del_property() will delete the given property. + * + * This function will delete data from the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_delprop(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_add_subnode_namelen - creates a new node based on substring + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * @namelen: number of characters of name to consider + * + * Identical to fdt_add_subnode(), but use only the first namelen + * characters of name as the name of the new node. This is useful for + * creating subnodes based on a portion of a larger string, such as a + * full path. + */ +#ifndef SWIG /* Not available in Python */ +int fdt_add_subnode_namelen(void *fdt, int parentoffset, + const char *name, int namelen); +#endif + +/** + * fdt_add_subnode - creates a new node + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * + * fdt_add_subnode() creates a new node as a subnode of the node at + * structure block offset parentoffset, with the given name (which + * should include the unit address, if any). + * + * This function will insert data into the blob, and will therefore + * change the offsets of some existing nodes. + + * returns: + * structure block offset of the created nodeequested subnode (>=0), on + * success + * -FDT_ERR_NOTFOUND, if the requested subnode does not exist + * -FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE + * tag + * -FDT_ERR_EXISTS, if the node at parentoffset already has a subnode of + * the given name + * -FDT_ERR_NOSPACE, if there is insufficient free space in the + * blob to contain the new node + * -FDT_ERR_NOSPACE + * -FDT_ERR_BADLAYOUT + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_add_subnode(void *fdt, int parentoffset, const char *name); + +/** + * fdt_del_node - delete a node (subtree) + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to nop + * + * fdt_del_node() will remove the given node, including all its + * subnodes if any, from the blob. + * + * This function will delete data from the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_del_node(void *fdt, int nodeoffset); + +/** + * fdt_overlay_apply - Applies a DT overlay on a base DT + * @fdt: pointer to the base device tree blob + * @fdto: pointer to the device tree overlay blob + * + * fdt_overlay_apply() will apply the given device tree overlay on the + * given base device tree. + * + * Expect the base device tree to be modified, even if the function + * returns an error. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there's not enough space in the base device tree + * -FDT_ERR_NOTFOUND, the overlay points to some inexistant nodes or + * properties in the base DT + * -FDT_ERR_BADPHANDLE, + * -FDT_ERR_BADOVERLAY, + * -FDT_ERR_NOPHANDLES, + * -FDT_ERR_INTERNAL, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADOFFSET, + * -FDT_ERR_BADPATH, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_overlay_apply(void *fdt, void *fdto); + +/**********************************************************************/ +/* Debugging / informational functions */ +/**********************************************************************/ + +const char *fdt_strerror(int errval); +#ifdef __cplusplus +} +#endif + +#endif /* LIBFDT_H */ diff --git a/fdt/libfdt_env.h b/fdt/libfdt_env.h new file mode 100644 index 0000000000..2363810c17 --- /dev/null +++ b/fdt/libfdt_env.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ +#ifndef LIBFDT_ENV_H +#define LIBFDT_ENV_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * Copyright 2012 Kim Phillips, Freescale Semiconductor. + */ + +#include +#include +#include +#include + +#define INT_MAX INT32_MAX +#define UINT_MAX UINT32_MAX + +#ifdef __CHECKER__ +#define FDT_FORCE __attribute__((force)) +#define FDT_BITWISE __attribute__((bitwise)) +#else +#define FDT_FORCE +#define FDT_BITWISE +#endif + +typedef uint16_t FDT_BITWISE fdt16_t; +typedef uint32_t FDT_BITWISE fdt32_t; +typedef uint64_t FDT_BITWISE fdt64_t; + +#define EXTRACT_BYTE(x, n) ((unsigned long long)((uint8_t *)&x)[n]) +#define CPU_TO_FDT16(x) ((EXTRACT_BYTE(x, 0) << 8) | EXTRACT_BYTE(x, 1)) +#define CPU_TO_FDT32(x) ((EXTRACT_BYTE(x, 0) << 24) | (EXTRACT_BYTE(x, 1) << 16) | \ + (EXTRACT_BYTE(x, 2) << 8) | EXTRACT_BYTE(x, 3)) +#define CPU_TO_FDT64(x) ((EXTRACT_BYTE(x, 0) << 56) | (EXTRACT_BYTE(x, 1) << 48) | \ + (EXTRACT_BYTE(x, 2) << 40) | (EXTRACT_BYTE(x, 3) << 32) | \ + (EXTRACT_BYTE(x, 4) << 24) | (EXTRACT_BYTE(x, 5) << 16) | \ + (EXTRACT_BYTE(x, 6) << 8) | EXTRACT_BYTE(x, 7)) + +static inline uint16_t fdt16_to_cpu(fdt16_t x) +{ + return (FDT_FORCE uint16_t)CPU_TO_FDT16(x); +} +static inline fdt16_t cpu_to_fdt16(uint16_t x) +{ + return (FDT_FORCE fdt16_t)CPU_TO_FDT16(x); +} + +static inline uint32_t fdt32_to_cpu(fdt32_t x) +{ + return (FDT_FORCE uint32_t)CPU_TO_FDT32(x); +} +static inline fdt32_t cpu_to_fdt32(uint32_t x) +{ + return (FDT_FORCE fdt32_t)CPU_TO_FDT32(x); +} + +static inline uint64_t fdt64_to_cpu(fdt64_t x) +{ + return (FDT_FORCE uint64_t)CPU_TO_FDT64(x); +} +static inline fdt64_t cpu_to_fdt64(uint64_t x) +{ + return (FDT_FORCE fdt64_t)CPU_TO_FDT64(x); +} +#undef CPU_TO_FDT64 +#undef CPU_TO_FDT32 +#undef CPU_TO_FDT16 +#undef EXTRACT_BYTE + +#ifdef __APPLE__ +#include + +/* strnlen() is not available on Mac OS < 10.7 */ +# if !defined(MAC_OS_X_VERSION_10_7) || (MAC_OS_X_VERSION_MAX_ALLOWED < \ + MAC_OS_X_VERSION_10_7) + +#define strnlen fdt_strnlen + +/* + * fdt_strnlen: returns the length of a string or max_count - which ever is + * smallest. + * Input 1 string: the string whose size is to be determined + * Input 2 max_count: the maximum value returned by this function + * Output: length of the string or max_count (the smallest of the two) + */ +static inline size_t fdt_strnlen(const char *string, size_t max_count) +{ + const char *p = memchr(string, 0, max_count); + return p ? p - string : max_count; +} + +#endif /* !defined(MAC_OS_X_VERSION_10_7) || (MAC_OS_X_VERSION_MAX_ALLOWED < + MAC_OS_X_VERSION_10_7) */ + +#endif /* __APPLE__ */ + +#endif /* LIBFDT_ENV_H */ diff --git a/fdt/libfdt_internal.h b/fdt/libfdt_internal.h new file mode 100644 index 0000000000..741eeb3150 --- /dev/null +++ b/fdt/libfdt_internal.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ +#ifndef LIBFDT_INTERNAL_H +#define LIBFDT_INTERNAL_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include + +#define FDT_ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) +#define FDT_TAGALIGN(x) (FDT_ALIGN((x), FDT_TAGSIZE)) + +int fdt_ro_probe_(const void *fdt); +#define FDT_RO_PROBE(fdt) \ + { \ + int totalsize_; \ + if ((totalsize_ = fdt_ro_probe_(fdt)) < 0) \ + return totalsize_; \ + } + +int fdt_check_node_offset_(const void *fdt, int offset); +int fdt_check_prop_offset_(const void *fdt, int offset); +const char *fdt_find_string_(const char *strtab, int tabsize, const char *s); +int fdt_node_end_offset_(void *fdt, int nodeoffset); + +static inline const void *fdt_offset_ptr_(const void *fdt, int offset) +{ + return (const char *)fdt + fdt_off_dt_struct(fdt) + offset; +} + +static inline void *fdt_offset_ptr_w_(void *fdt, int offset) +{ + return (void *)(uintptr_t)fdt_offset_ptr_(fdt, offset); +} + +static inline const struct fdt_reserve_entry *fdt_mem_rsv_(const void *fdt, int n) +{ + const struct fdt_reserve_entry *rsv_table = + (const struct fdt_reserve_entry *) + ((const char *)fdt + fdt_off_mem_rsvmap(fdt)); + + return rsv_table + n; +} +static inline struct fdt_reserve_entry *fdt_mem_rsv_w_(void *fdt, int n) +{ + return (void *)(uintptr_t)fdt_mem_rsv_(fdt, n); +} + +#define FDT_SW_MAGIC (~FDT_MAGIC) + +#endif /* LIBFDT_INTERNAL_H */ diff --git a/fesvr/context.cc b/fesvr/context.cc new file mode 100644 index 0000000000..ca73813768 --- /dev/null +++ b/fesvr/context.cc @@ -0,0 +1,115 @@ +#include "context.h" +#include +#include +#include + +static __thread context_t* cur; + +context_t::context_t() + : creator(NULL), func(NULL), arg(NULL), +#ifndef USE_UCONTEXT + mutex(PTHREAD_MUTEX_INITIALIZER), + cond(PTHREAD_COND_INITIALIZER), flag(0) +#else + context(new ucontext_t) +#endif +{ +} + +#ifdef USE_UCONTEXT +#ifndef GLIBC_64BIT_PTR_BUG +void context_t::wrapper(context_t* ctx) +{ +#else +void context_t::wrapper(unsigned int hi, unsigned int lo) +{ + context_t* ctx = reinterpret_cast(static_cast(lo) | (static_cast(hi) << 32)); +#endif + ctx->creator->switch_to(); + ctx->func(ctx->arg); +} +#else +void* context_t::wrapper(void* a) +{ + context_t* ctx = static_cast(a); + cur = ctx; + ctx->creator->switch_to(); + + ctx->func(ctx->arg); + return NULL; +} +#endif + +void context_t::init(void (*f)(void*), void* a) +{ + func = f; + arg = a; + creator = current(); + +#ifdef USE_UCONTEXT + getcontext(context.get()); + context->uc_link = creator->context.get(); + context->uc_stack.ss_size = 64*1024; + context->uc_stack.ss_sp = new void*[context->uc_stack.ss_size/sizeof(void*)]; +#ifndef GLIBC_64BIT_PTR_BUG + makecontext(context.get(), (void(*)(void))&context_t::wrapper, 1, this); +#else + unsigned int hi(reinterpret_cast(this) >> 32); + unsigned int lo(reinterpret_cast(this)); + makecontext(context.get(), (void(*)(void))&context_t::wrapper, 2, hi, lo); +#endif + switch_to(); +#else + assert(flag == 0); + + pthread_mutex_lock(&creator->mutex); + creator->flag = 0; + if (pthread_create(&thread, NULL, &context_t::wrapper, this) != 0) + abort(); + pthread_detach(thread); + while (!creator->flag) + pthread_cond_wait(&creator->cond, &creator->mutex); + pthread_mutex_unlock(&creator->mutex); +#endif +} + +context_t::~context_t() +{ + assert(this != cur); +} + +void context_t::switch_to() +{ + assert(this != cur); +#ifdef USE_UCONTEXT + context_t* prev = cur; + cur = this; + if (swapcontext(prev->context.get(), context.get()) != 0) + abort(); +#else + cur->flag = 0; + this->flag = 1; + pthread_mutex_lock(&this->mutex); + pthread_cond_signal(&this->cond); + pthread_mutex_unlock(&this->mutex); + pthread_mutex_lock(&cur->mutex); + while (!cur->flag) + pthread_cond_wait(&cur->cond, &cur->mutex); + pthread_mutex_unlock(&cur->mutex); +#endif +} + +context_t* context_t::current() +{ + if (cur == NULL) + { + cur = new context_t; +#ifdef USE_UCONTEXT + getcontext(cur->context.get()); +#else + cur->thread = pthread_self(); + cur->flag = 1; +#endif + } + return cur; +} diff --git a/fesvr/context.h b/fesvr/context.h new file mode 100644 index 0000000000..18bf50ef8d --- /dev/null +++ b/fesvr/context.h @@ -0,0 +1,54 @@ +#ifndef _HTIF_CONTEXT_H +#define _HTIF_CONTEXT_H + +// A replacement for ucontext.h, which is sadly deprecated. + +#include + +#if defined(__GLIBC__) +# undef USE_UCONTEXT +# define USE_UCONTEXT +# include +# include +#include + +#if (ULONG_MAX > UINT_MAX) // 64-bit systems only +#if (100*GLIB_MAJOR_VERSION+GLIB_MINOR_VERSION < 208) +#define GLIBC_64BIT_PTR_BUG +static_assert (sizeof(unsigned int) == 4, "uint size doesn't match expected 32bit"); +static_assert (sizeof(unsigned long) == 8, "ulong size doesn't match expected 64bit"); +static_assert (sizeof(void*) == 8, "ptr size doesn't match expected 64bit"); +#endif +#endif /* ULONG_MAX > UINT_MAX */ + +#endif + +class context_t +{ + public: + context_t(); + ~context_t(); + void init(void (*func)(void*), void* arg); + void switch_to(); + static context_t* current(); + private: + context_t* creator; + void (*func)(void*); + void* arg; +#ifdef USE_UCONTEXT + std::unique_ptr context; +#ifndef GLIBC_64BIT_PTR_BUG + static void wrapper(context_t*); +#else + static void wrapper(unsigned int, unsigned int); +#endif +#else + pthread_t thread; + pthread_mutex_t mutex; + pthread_cond_t cond; + volatile int flag; + static void* wrapper(void*); +#endif +}; + +#endif diff --git a/fesvr/debug_defines.h b/fesvr/debug_defines.h new file mode 100644 index 0000000000..e5f9291058 --- /dev/null +++ b/fesvr/debug_defines.h @@ -0,0 +1,1418 @@ +#define DTM_IDCODE 0x01 +/* +* Identifies the release version of this part. + */ +#define DTM_IDCODE_VERSION_OFFSET 28 +#define DTM_IDCODE_VERSION_LENGTH 4 +#define DTM_IDCODE_VERSION (0xf << DTM_IDCODE_VERSION_OFFSET) +/* +* Identifies the designer's part number of this part. + */ +#define DTM_IDCODE_PARTNUMBER_OFFSET 12 +#define DTM_IDCODE_PARTNUMBER_LENGTH 16 +#define DTM_IDCODE_PARTNUMBER (0xffff << DTM_IDCODE_PARTNUMBER_OFFSET) +/* +* Identifies the designer/manufacturer of this part. Bits 6:0 must be +* bits 6:0 of the designer/manufacturer's Identification Code as +* assigned by JEDEC Standard JEP106. Bits 10:7 contain the modulo-16 +* count of the number of continuation characters (0x7f) in that same +* Identification Code. + */ +#define DTM_IDCODE_MANUFID_OFFSET 1 +#define DTM_IDCODE_MANUFID_LENGTH 11 +#define DTM_IDCODE_MANUFID (0x7ff << DTM_IDCODE_MANUFID_OFFSET) +#define DTM_IDCODE_1_OFFSET 0 +#define DTM_IDCODE_1_LENGTH 1 +#define DTM_IDCODE_1 (0x1 << DTM_IDCODE_1_OFFSET) +#define DTM_DTMCS 0x10 +/* +* Writing 1 to this bit does a hard reset of the DTM, +* causing the DTM to forget about any outstanding DMI transactions. +* In general this should only be used when the Debugger has +* reason to expect that the outstanding DMI transaction will never +* complete (e.g. a reset condition caused an inflight DMI transaction to +* be cancelled). + */ +#define DTM_DTMCS_DMIHARDRESET_OFFSET 17 +#define DTM_DTMCS_DMIHARDRESET_LENGTH 1 +#define DTM_DTMCS_DMIHARDRESET (0x1 << DTM_DTMCS_DMIHARDRESET_OFFSET) +/* +* Writing 1 to this bit clears the sticky error state +* and allows the DTM to retry or complete the previous +* transaction. + */ +#define DTM_DTMCS_DMIRESET_OFFSET 16 +#define DTM_DTMCS_DMIRESET_LENGTH 1 +#define DTM_DTMCS_DMIRESET (0x1 << DTM_DTMCS_DMIRESET_OFFSET) +/* +* This is a hint to the debugger of the minimum number of +* cycles a debugger should spend in +* Run-Test/Idle after every DMI scan to avoid a `busy' +* return code (\Fdmistat of 3). A debugger must still +* check \Fdmistat when necessary. +* +* 0: It is not necessary to enter Run-Test/Idle at all. +* +* 1: Enter Run-Test/Idle and leave it immediately. +* +* 2: Enter Run-Test/Idle and stay there for 1 cycle before leaving. +* +* And so on. + */ +#define DTM_DTMCS_IDLE_OFFSET 12 +#define DTM_DTMCS_IDLE_LENGTH 3 +#define DTM_DTMCS_IDLE (0x7 << DTM_DTMCS_IDLE_OFFSET) +/* +* 0: No error. +* +* 1: Reserved. Interpret the same as 2. +* +* 2: An operation failed (resulted in \Fop of 2). +* +* 3: An operation was attempted while a DMI access was still in +* progress (resulted in \Fop of 3). + */ +#define DTM_DTMCS_DMISTAT_OFFSET 10 +#define DTM_DTMCS_DMISTAT_LENGTH 2 +#define DTM_DTMCS_DMISTAT (0x3 << DTM_DTMCS_DMISTAT_OFFSET) +/* +* The size of \Faddress in \Rdmi. + */ +#define DTM_DTMCS_ABITS_OFFSET 4 +#define DTM_DTMCS_ABITS_LENGTH 6 +#define DTM_DTMCS_ABITS (0x3f << DTM_DTMCS_ABITS_OFFSET) +/* +* 0: Version described in spec version 0.11. +* +* 1: Version described in spec version 0.13 (and later?), which +* reduces the DMI data width to 32 bits. +* +* Other values are reserved for future use. + */ +#define DTM_DTMCS_VERSION_OFFSET 0 +#define DTM_DTMCS_VERSION_LENGTH 4 +#define DTM_DTMCS_VERSION (0xf << DTM_DTMCS_VERSION_OFFSET) +#define DTM_DMI 0x11 +/* +* Address used for DMI access. In Update-DR this value is used +* to access the DM over the DMI. + */ +#define DTM_DMI_ADDRESS_OFFSET 34 +#define DTM_DMI_ADDRESS_LENGTH abits +#define DTM_DMI_ADDRESS (((1L< +#include +#include +#include +#include +#include +#include +#include +using namespace std::placeholders; + +device_t::device_t() + : command_handlers(command_t::MAX_COMMANDS), + command_names(command_t::MAX_COMMANDS) +{ + for (size_t cmd = 0; cmd < command_t::MAX_COMMANDS; cmd++) + register_command(cmd, std::bind(&device_t::handle_null_command, this, _1), ""); + register_command(command_t::MAX_COMMANDS-1, std::bind(&device_t::handle_identify, this, _1), "identity"); +} + +void device_t::register_command(size_t cmd, command_func_t handler, const char* name) +{ + assert(cmd < command_t::MAX_COMMANDS); + assert(strlen(name) < IDENTITY_SIZE); + command_handlers[cmd] = handler; + command_names[cmd] = name; +} + +void device_t::handle_command(command_t cmd) +{ + command_handlers[cmd.cmd()](cmd); +} + +void device_t::handle_null_command(command_t cmd) +{ +} + +void device_t::handle_identify(command_t cmd) +{ + size_t what = cmd.payload() % command_t::MAX_COMMANDS; + uint64_t addr = cmd.payload() / command_t::MAX_COMMANDS; + assert(addr % IDENTITY_SIZE == 0); + + char id[IDENTITY_SIZE] = {0}; + if (what == command_t::MAX_COMMANDS-1) + { + assert(strlen(identity()) < IDENTITY_SIZE); + strcpy(id, identity()); + } + else + strcpy(id, command_names[what].c_str()); + + cmd.memif().write(addr, IDENTITY_SIZE, id); + cmd.respond(1); +} + +bcd_t::bcd_t() +{ + register_command(0, std::bind(&bcd_t::handle_read, this, _1), "read"); + register_command(1, std::bind(&bcd_t::handle_write, this, _1), "write"); +} + +void bcd_t::handle_read(command_t cmd) +{ + pending_reads.push(cmd); +} + +void bcd_t::handle_write(command_t cmd) +{ + canonical_terminal_t::write(cmd.payload()); +} + +void bcd_t::tick() +{ + int ch; + if (!pending_reads.empty() && (ch = canonical_terminal_t::read()) != -1) + { + pending_reads.front().respond(0x100 | ch); + pending_reads.pop(); + } +} + +disk_t::disk_t(const char* fn) +{ + fd = ::open(fn, O_RDWR); + if (fd < 0) + throw std::runtime_error("could not open " + std::string(fn)); + + register_command(0, std::bind(&disk_t::handle_read, this, _1), "read"); + register_command(1, std::bind(&disk_t::handle_write, this, _1), "write"); + + struct stat st; + if (fstat(fd, &st) < 0) + throw std::runtime_error("could not stat " + std::string(fn)); + + size = st.st_size; + id = "disk size=" + std::to_string(size); +} + +disk_t::~disk_t() +{ + close(fd); +} + +void disk_t::handle_read(command_t cmd) +{ + request_t req; + cmd.memif().read(cmd.payload(), sizeof(req), &req); + + std::vector buf(req.size); + if ((size_t)::pread(fd, &buf[0], buf.size(), req.offset) != req.size) + throw std::runtime_error("could not read " + id + " @ " + std::to_string(req.offset)); + + cmd.memif().write(req.addr, buf.size(), &buf[0]); + cmd.respond(req.tag); +} + +void disk_t::handle_write(command_t cmd) +{ + request_t req; + cmd.memif().read(cmd.payload(), sizeof(req), &req); + + std::vector buf(req.size); + cmd.memif().read(req.addr, buf.size(), &buf[0]); + + if ((size_t)::pwrite(fd, &buf[0], buf.size(), req.offset) != req.size) + throw std::runtime_error("could not write " + id + " @ " + std::to_string(req.offset)); + + cmd.respond(req.tag); +} + +device_list_t::device_list_t() + : devices(command_t::MAX_COMMANDS, &null_device), num_devices(0) +{ +} + +void device_list_t::register_device(device_t* dev) +{ + num_devices++; + assert(num_devices < command_t::MAX_DEVICES); + devices[num_devices-1] = dev; +} + +void device_list_t::handle_command(command_t cmd) +{ + devices[cmd.device()]->handle_command(cmd); +} + +void device_list_t::tick() +{ + for (size_t i = 0; i < num_devices; i++) + devices[i]->tick(); +} diff --git a/fesvr/device.h b/fesvr/device.h new file mode 100644 index 0000000000..1387b745ff --- /dev/null +++ b/fesvr/device.h @@ -0,0 +1,118 @@ +#ifndef _DEVICE_H +#define _DEVICE_H + +#include +#include +#include +#include +#include + +class memif_t; + +class command_t +{ + public: + typedef std::function callback_t; + command_t(memif_t& memif, uint64_t tohost, callback_t cb) + : _memif(memif), tohost(tohost), cb(cb) {} + + memif_t& memif() { return _memif; } + uint8_t device() { return tohost >> 56; } + uint8_t cmd() { return tohost >> 48; } + uint64_t payload() { return tohost << 16 >> 16; } + void respond(uint64_t resp) { cb((tohost >> 48 << 48) | (resp << 16 >> 16)); } + + static const size_t MAX_COMMANDS = 256; + static const size_t MAX_DEVICES = 256; + + private: + memif_t& _memif; + uint64_t tohost; + callback_t cb; +}; + +class device_t +{ + public: + device_t(); + virtual ~device_t() {} + virtual const char* identity() = 0; + virtual void tick() {} + + void handle_command(command_t cmd); + + protected: + typedef std::function command_func_t; + void register_command(size_t, command_func_t, const char*); + + private: + device_t& operator = (const device_t&); // disallow + device_t(const device_t&); // disallow + + static const size_t IDENTITY_SIZE = 64; + void handle_null_command(command_t cmd); + void handle_identify(command_t cmd); + + std::vector command_handlers; + std::vector command_names; +}; + +class bcd_t : public device_t +{ + public: + bcd_t(); + const char* identity() { return "bcd"; } + void tick(); + + private: + void handle_read(command_t cmd); + void handle_write(command_t cmd); + + std::queue pending_reads; +}; + +class disk_t : public device_t +{ + public: + disk_t(const char* fn); + ~disk_t(); + const char* identity() { return id.c_str(); } + + private: + struct request_t + { + uint64_t addr; + uint64_t offset; + uint64_t size; + uint64_t tag; + }; + + void handle_read(command_t cmd); + void handle_write(command_t cmd); + + std::string id; + size_t size; + int fd; +}; + +class null_device_t : public device_t +{ + public: + const char* identity() { return ""; } +}; + +class device_list_t +{ + public: + device_list_t(); + void register_device(device_t* dev); + void handle_command(command_t cmd); + void tick(); + + private: + std::vector devices; + null_device_t null_device; + size_t num_devices; +}; + +#endif diff --git a/fesvr/dtm.cc b/fesvr/dtm.cc new file mode 100644 index 0000000000..418ac63abd --- /dev/null +++ b/fesvr/dtm.cc @@ -0,0 +1,645 @@ +#include "dtm.h" +#include "debug_defines.h" +#include "encoding.h" +#include +#include +#include +#include +#include +#include + +#define RV_X(x, s, n) \ + (((x) >> (s)) & ((1 << (n)) - 1)) +#define ENCODE_ITYPE_IMM(x) \ + (RV_X(x, 0, 12) << 20) +#define ENCODE_STYPE_IMM(x) \ + ((RV_X(x, 0, 5) << 7) | (RV_X(x, 5, 7) << 25)) +#define ENCODE_SBTYPE_IMM(x) \ + ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) +#define ENCODE_UTYPE_IMM(x) \ + (RV_X(x, 12, 20) << 12) +#define ENCODE_UJTYPE_IMM(x) \ + ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) + +#define LOAD(xlen, dst, base, imm) \ + (((xlen) == 64 ? 0x00003003 : 0x00002003) \ + | ((dst) << 7) | ((base) << 15) | (uint32_t)ENCODE_ITYPE_IMM(imm)) +#define STORE(xlen, src, base, imm) \ + (((xlen) == 64 ? 0x00003023 : 0x00002023) \ + | ((src) << 20) | ((base) << 15) | (uint32_t)ENCODE_STYPE_IMM(imm)) +#define JUMP(there, here) (0x6f | (uint32_t)ENCODE_UJTYPE_IMM((there) - (here))) +#define BNE(r1, r2, there, here) (0x1063 | ((r1) << 15) | ((r2) << 20) | (uint32_t)ENCODE_SBTYPE_IMM((there) - (here))) +#define ADDI(dst, src, imm) (0x13 | ((dst) << 7) | ((src) << 15) | (uint32_t)ENCODE_ITYPE_IMM(imm)) +#define SRL(dst, src, sh) (0x5033 | ((dst) << 7) | ((src) << 15) | ((sh) << 20)) +#define FENCE_I 0x100f +#define EBREAK 0x00100073 +#define X0 0 +#define S0 8 +#define S1 9 + +#define AC_AR_REGNO(x) ((0x1000 | x) << AC_ACCESS_REGISTER_REGNO_OFFSET) +#define AC_AR_SIZE(x) (((x == 128)? 4 : (x == 64 ? 3 : 2)) << AC_ACCESS_REGISTER_SIZE_OFFSET) + +#define WRITE 1 +#define SET 2 +#define CLEAR 3 +#define CSRRx(type, dst, csr, src) (0x73 | ((type) << 12) | ((dst) << 7) | ((src) << 15) | (uint32_t)((csr) << 20)) + +#define get_field(reg, mask) (((reg) & (mask)) / ((mask) & ~((mask) << 1))) +#define set_field(reg, mask, val) (((reg) & ~(mask)) | (((val) * ((mask) & ~((mask) << 1))) & (mask))) + +#define RUN_AC_OR_DIE(a, b, c, d, e) { \ + uint32_t cmderr = run_abstract_command(a, b, c, d, e); \ + if (cmderr) { \ + die(cmderr); \ + } \ + } + +uint32_t dtm_t::do_command(dtm_t::req r) +{ + req_buf = r; + target->switch_to(); + assert(resp_buf.resp == 0); + return resp_buf.data; +} + +uint32_t dtm_t::read(uint32_t addr) +{ + return do_command((req){addr, 1, 0}); +} + +uint32_t dtm_t::write(uint32_t addr, uint32_t data) +{ + return do_command((req){addr, 2, data}); +} + +void dtm_t::nop() +{ + do_command((req){0, 0, 0}); +} + +void dtm_t::select_hart(int hartsel) { + int dmcontrol = read(DMI_DMCONTROL); + write (DMI_DMCONTROL, set_field(dmcontrol, DMI_DMCONTROL_HARTSEL, hartsel)); + current_hart = hartsel; +} + +int dtm_t::enumerate_harts() { + int max_hart = (1 << DMI_DMCONTROL_HARTSEL_LENGTH) - 1; + write(DMI_DMCONTROL, set_field(read(DMI_DMCONTROL), DMI_DMCONTROL_HARTSEL, max_hart)); + read(DMI_DMSTATUS); + max_hart = get_field(read(DMI_DMCONTROL), DMI_DMCONTROL_HARTSEL); + + int hartsel; + for (hartsel = 0; hartsel <= max_hart; hartsel++) { + select_hart(hartsel); + int dmstatus = read(DMI_DMSTATUS); + if (get_field(dmstatus, DMI_DMSTATUS_ANYNONEXISTENT)) + break; + } + return hartsel; +} + +void dtm_t::halt(int hartsel) +{ + if (running) { + write(DMI_DMCONTROL, DMI_DMCONTROL_DMACTIVE); + // Read dmstatus to avoid back-to-back writes to dmcontrol. + read(DMI_DMSTATUS); + } + + int dmcontrol = DMI_DMCONTROL_HALTREQ | DMI_DMCONTROL_DMACTIVE; + dmcontrol = set_field(dmcontrol, DMI_DMCONTROL_HARTSEL, hartsel); + write(DMI_DMCONTROL, dmcontrol); + int dmstatus; + do { + dmstatus = read(DMI_DMSTATUS); + } while(get_field(dmstatus, DMI_DMSTATUS_ALLHALTED) == 0); + dmcontrol &= ~DMI_DMCONTROL_HALTREQ; + write(DMI_DMCONTROL, dmcontrol); + // Read dmstatus to avoid back-to-back writes to dmcontrol. + read(DMI_DMSTATUS); + current_hart = hartsel; +} + +void dtm_t::resume(int hartsel) +{ + int dmcontrol = DMI_DMCONTROL_RESUMEREQ | DMI_DMCONTROL_DMACTIVE; + dmcontrol = set_field(dmcontrol, DMI_DMCONTROL_HARTSEL, hartsel); + write(DMI_DMCONTROL, dmcontrol); + int dmstatus; + do { + dmstatus = read(DMI_DMSTATUS); + } while (get_field(dmstatus, DMI_DMSTATUS_ALLRESUMEACK) == 0); + dmcontrol &= ~DMI_DMCONTROL_RESUMEREQ; + write(DMI_DMCONTROL, dmcontrol); + // Read dmstatus to avoid back-to-back writes to dmcontrol. + read(DMI_DMSTATUS); + current_hart = hartsel; + + if (running) { + write(DMI_DMCONTROL, DMI_DMCONTROL_DMACTIVE); + // Read dmstatus to avoid back-to-back writes to dmcontrol. + read(DMI_DMSTATUS); + } +} + +uint64_t dtm_t::save_reg(unsigned regno) +{ + uint32_t data[xlen/(8*4)]; + uint32_t command = AC_ACCESS_REGISTER_TRANSFER | AC_AR_SIZE(xlen) | AC_AR_REGNO(regno); + RUN_AC_OR_DIE(command, 0, 0, data, xlen / (8*4)); + + uint64_t result = data[0]; + if (xlen > 32) { + result |= ((uint64_t)data[1]) << 32; + } + return result; +} + +void dtm_t::restore_reg(unsigned regno, uint64_t val) +{ + uint32_t data[xlen/(8*4)]; + data[0] = (uint32_t) val; + if (xlen > 32) { + data[1] = (uint32_t) (val >> 32); + } + + uint32_t command = AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(regno); + + RUN_AC_OR_DIE(command, 0, 0, data, xlen / (8*4)); + +} + +uint32_t dtm_t::run_abstract_command(uint32_t command, + const uint32_t program[], size_t program_n, + uint32_t data[], size_t data_n) +{ + assert(program_n <= ram_words); + assert(data_n <= data_words); + + for (size_t i = 0; i < program_n; i++) { + write(DMI_PROGBUF0 + i, program[i]); + } + + if (get_field(command, AC_ACCESS_REGISTER_WRITE) && + get_field(command, AC_ACCESS_REGISTER_TRANSFER)) { + for (size_t i = 0; i < data_n; i++) { + write(DMI_DATA0 + i, data[i]); + } + } + + write(DMI_COMMAND, command); + + // Wait for not busy and then check for error. + uint32_t abstractcs; + do { + abstractcs = read(DMI_ABSTRACTCS); + } while (abstractcs & DMI_ABSTRACTCS_BUSY); + + if ((get_field(command, AC_ACCESS_REGISTER_WRITE) == 0) && + get_field(command, AC_ACCESS_REGISTER_TRANSFER)) { + for (size_t i = 0; i < data_n; i++){ + data[i] = read(DMI_DATA0 + i); + } + } + + return get_field(abstractcs, DMI_ABSTRACTCS_CMDERR); + +} + +size_t dtm_t::chunk_align() +{ + return xlen / 8; +} + +void dtm_t::read_chunk(uint64_t taddr, size_t len, void* dst) +{ + uint32_t prog[ram_words]; + uint32_t data[data_words]; + + uint8_t * curr = (uint8_t*) dst; + + halt(current_hart); + + uint64_t s0 = save_reg(S0); + uint64_t s1 = save_reg(S1); + + prog[0] = LOAD(xlen, S1, S0, 0); + prog[1] = ADDI(S0, S0, xlen/8); + prog[2] = EBREAK; + + data[0] = (uint32_t) taddr; + if (xlen > 32) { + data[1] = (uint32_t) (taddr >> 32); + } + + // Write s0 with the address, then execute program buffer. + // This will get S1 with the data and increment s0. + uint32_t command = AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_ACCESS_REGISTER_POSTEXEC | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(S0); + + RUN_AC_OR_DIE(command, prog, 3, data, xlen/(4*8)); + + // TODO: could use autoexec here. + for (size_t i = 0; i < (len * 8 / xlen); i++){ + command = AC_ACCESS_REGISTER_TRANSFER | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(S1); + if ((i + 1) < (len * 8 / xlen)) { + command |= AC_ACCESS_REGISTER_POSTEXEC; + } + + RUN_AC_OR_DIE(command, 0, 0, data, xlen/(4*8)); + + memcpy(curr, data, xlen/8); + curr += xlen/8; + } + + restore_reg(S0, s0); + restore_reg(S1, s1); + + resume(current_hart); + +} + +void dtm_t::write_chunk(uint64_t taddr, size_t len, const void* src) +{ + uint32_t prog[ram_words]; + uint32_t data[data_words]; + + const uint8_t * curr = (const uint8_t*) src; + + halt(current_hart); + + uint64_t s0 = save_reg(S0); + uint64_t s1 = save_reg(S1); + + prog[0] = STORE(xlen, S1, S0, 0); + prog[1] = ADDI(S0, S0, xlen/8); + prog[2] = EBREAK; + + data[0] = (uint32_t) taddr; + if (xlen > 32) { + data[1] = (uint32_t) (taddr >> 32); + } + + // Write the program (not used yet). + // Write s0 with the address. + uint32_t command = AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(S0); + + RUN_AC_OR_DIE(command, prog, 3, data, xlen/(4*8)); + + // Use Autoexec for more than one word of transfer. + // Write S1 with data, then execution stores S1 to + // 0(S0) and increments S0. + // Each time we write XLEN bits. + memcpy(data, curr, xlen/8); + curr += xlen/8; + + command = AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_POSTEXEC | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(S1); + + RUN_AC_OR_DIE(command, 0, 0, data, xlen/(4*8)); + + uint32_t abstractcs; + for (size_t i = 1; i < (len * 8 / xlen); i++){ + if (i == 1) { + write(DMI_ABSTRACTAUTO, 1 << DMI_ABSTRACTAUTO_AUTOEXECDATA_OFFSET); + } + memcpy(data, curr, xlen/8); + curr += xlen/8; + if (xlen == 64) { + write(DMI_DATA0 + 1, data[1]); + } + write(DMI_DATA0, data[0]); //Triggers a command w/ autoexec. + + do { + abstractcs = read(DMI_ABSTRACTCS); + } while (abstractcs & DMI_ABSTRACTCS_BUSY); + if ( get_field(abstractcs, DMI_ABSTRACTCS_CMDERR)) { + die(get_field(abstractcs, DMI_ABSTRACTCS_CMDERR)); + } + } + if ((len * 8 / xlen) > 1) { + write(DMI_ABSTRACTAUTO, 0); + } + + restore_reg(S0, s0); + restore_reg(S1, s1); + resume(current_hart); +} + +void dtm_t::die(uint32_t cmderr) +{ + const char * codes[] = { + "OK", + "BUSY", + "NOT_SUPPORTED", + "EXCEPTION", + "HALT/RESUME" + }; + const char * msg; + if (cmderr < (sizeof(codes) / sizeof(*codes))){ + msg = codes[cmderr]; + } else { + msg = "OTHER"; + } + //throw std::runtime_error("Debug Abstract Command Error #" + std::to_string(cmderr) + "(" + msg + ")"); + printf("ERROR: %s:%d, Debug Abstract Command Error #%d (%s)", __FILE__, __LINE__, cmderr, msg); + printf("ERROR: %s:%d, Should die, but allowing simulation to continue and fail.", __FILE__, __LINE__); + write(DMI_ABSTRACTCS, DMI_ABSTRACTCS_CMDERR); + +} + +void dtm_t::clear_chunk(uint64_t taddr, size_t len) +{ + uint32_t prog[ram_words]; + uint32_t data[data_words]; + + halt(current_hart); + uint64_t s0 = save_reg(S0); + uint64_t s1 = save_reg(S1); + + uint32_t command; + + // S0 = Addr + data[0] = (uint32_t) taddr; + data[1] = (uint32_t) (taddr >> 32); + command = AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(S0); + RUN_AC_OR_DIE(command, 0, 0, data, xlen/(4*8)); + + // S1 = Addr + len, loop until S0 = S1 + prog[0] = STORE(xlen, X0, S0, 0); + prog[1] = ADDI(S0, S0, xlen/8); + prog[2] = BNE(S0, S1, 0*4, 2*4); + prog[3] = EBREAK; + + data[0] = (uint32_t) (taddr + len); + data[1] = (uint32_t) ((taddr + len) >> 32); + command = AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(S1) | + AC_ACCESS_REGISTER_POSTEXEC; + RUN_AC_OR_DIE(command, prog, 4, data, xlen/(4*8)); + + restore_reg(S0, s0); + restore_reg(S1, s1); + + resume(current_hart); +} + +uint64_t dtm_t::write_csr(unsigned which, uint64_t data) +{ + return modify_csr(which, data, WRITE); +} + +uint64_t dtm_t::set_csr(unsigned which, uint64_t data) +{ + return modify_csr(which, data, SET); +} + +uint64_t dtm_t::clear_csr(unsigned which, uint64_t data) +{ + return modify_csr(which, data, CLEAR); +} + +uint64_t dtm_t::read_csr(unsigned which) +{ + return set_csr(which, 0); +} + +uint64_t dtm_t::modify_csr(unsigned which, uint64_t data, uint32_t type) +{ + halt(current_hart); + + // This code just uses DSCRATCH to save S0 + // and data_base to do the transfer so we don't + // need to run more commands to save and restore + // S0. + uint32_t prog[] = { + CSRRx(WRITE, S0, CSR_DSCRATCH0, S0), + LOAD(xlen, S0, X0, data_base), + CSRRx(type, S0, which, S0), + STORE(xlen, S0, X0, data_base), + CSRRx(WRITE, S0, CSR_DSCRATCH0, S0), + EBREAK + }; + + //TODO: Use transfer = 0. For now both HW and OpenOCD + // ignore transfer bit, so use "store to X0" NOOP. + // We sort of need this anyway because run_abstract_command + // needs the DATA to be written so may as well use the WRITE flag. + + uint32_t adata[] = {(uint32_t) data, + (uint32_t) (data >> 32)}; + + uint32_t command = AC_ACCESS_REGISTER_POSTEXEC | + AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(X0); + + RUN_AC_OR_DIE(command, prog, sizeof(prog) / sizeof(*prog), adata, xlen/(4*8)); + + uint64_t res = read(DMI_DATA0);//adata[0]; + if (xlen == 64) + res |= read(DMI_DATA0 + 1);//((uint64_t) adata[1]) << 32; + + resume(current_hart); + return res; +} + +size_t dtm_t::chunk_max_size() +{ + // Arbitrary choice. 4k Page size seems reasonable. + return 4096; +} + +uint32_t dtm_t::get_xlen() +{ + // Attempt to read S0 to find out what size it is. + // You could also attempt to run code, but you need to save registers + // to do that anyway. If what you really want to do is figure out + // the size of S0 so you can save it later, then do that. + uint32_t command = AC_ACCESS_REGISTER_TRANSFER | AC_AR_REGNO(S0); + uint32_t cmderr; + + const uint32_t prog[] = {}; + uint32_t data[] = {}; + + cmderr = run_abstract_command(command | AC_AR_SIZE(128), prog, 0, data, 0); + if (cmderr == 0){ + throw std::runtime_error("FESVR DTM Does not support 128-bit"); + abort(); + return 128; + } + write(DMI_ABSTRACTCS, DMI_ABSTRACTCS_CMDERR); + + cmderr = run_abstract_command(command | AC_AR_SIZE(64), prog, 0, data, 0); + if (cmderr == 0){ + return 64; + } + write(DMI_ABSTRACTCS, DMI_ABSTRACTCS_CMDERR); + + cmderr = run_abstract_command(command | AC_AR_SIZE(32), prog, 0, data, 0); + if (cmderr == 0){ + return 32; + } + + throw std::runtime_error("FESVR DTM can't determine XLEN. Aborting"); +} + +void dtm_t::fence_i() +{ + halt(current_hart); + + const uint32_t prog[] = { + FENCE_I, + EBREAK + }; + + //TODO: Use the transfer = 0. + uint32_t command = AC_ACCESS_REGISTER_POSTEXEC | + AC_ACCESS_REGISTER_TRANSFER | + AC_ACCESS_REGISTER_WRITE | + AC_AR_SIZE(xlen) | + AC_AR_REGNO(X0); + + RUN_AC_OR_DIE(command, prog, sizeof(prog)/sizeof(*prog), 0, 0); + + resume(current_hart); + +} + +void host_thread_main(void* arg) +{ + ((dtm_t*)arg)->producer_thread(); +} + +void dtm_t::reset() +{ + for (int hartsel = 0; hartsel < num_harts; hartsel ++ ){ + select_hart(hartsel); + // this command also does a halt and resume + fence_i(); + // after this command, the hart will run from _start. + write_csr(0x7b1, get_entry_point()); + } + // In theory any hart can handle the memory accesses, + // this will enforce that hart 0 handles them. + select_hart(0); + read(DMI_DMSTATUS); +} + +void dtm_t::idle() +{ + for (int idle_cycles = 0; idle_cycles < max_idle_cycles; idle_cycles++) + nop(); +} + +void dtm_t::producer_thread() +{ + // Learn about the Debug Module and assert things we + // depend on in this code. + + // Enable the debugger. + write(DMI_DMCONTROL, DMI_DMCONTROL_DMACTIVE); + // Poll until the debugger agrees it's enabled. + while ((read(DMI_DMCONTROL) & DMI_DMCONTROL_DMACTIVE) == 0) ; + + // These are checked every time we run an abstract command. + uint32_t abstractcs = read(DMI_ABSTRACTCS); + ram_words = get_field(abstractcs, DMI_ABSTRACTCS_PROGSIZE); + data_words = get_field(abstractcs, DMI_ABSTRACTCS_DATACOUNT); + + // These things are only needed for the 'modify_csr' function. + // That could be re-written to not use these at some performance + // overhead. + uint32_t hartinfo = read(DMI_HARTINFO); + assert(get_field(hartinfo, DMI_HARTINFO_NSCRATCH) > 0); + assert(get_field(hartinfo, DMI_HARTINFO_DATAACCESS)); + + data_base = get_field(hartinfo, DMI_HARTINFO_DATAADDR); + + num_harts = enumerate_harts(); + halt(0); + // Note: We don't support systems with heterogeneous XLEN. + // It's possible to do this at the cost of extra cycles. + xlen = get_xlen(); + resume(0); + + running = true; + + htif_t::run(); + + while (true) + nop(); +} + +void dtm_t::start_host_thread() +{ + req_wait = false; + resp_wait = false; + + target = context_t::current(); + host.init(host_thread_main, this); + host.switch_to(); +} + +dtm_t::dtm_t(int argc, char** argv) + : htif_t(argc, argv), running(false) +{ + start_host_thread(); +} + +dtm_t::~dtm_t() +{ +} + +void dtm_t::tick( + bool req_ready, + bool resp_valid, + resp resp_bits) +{ + if (!resp_wait) { + if (!req_wait) { + req_wait = true; + } else if (req_ready) { + req_wait = false; + resp_wait = true; + } + } + + if (resp_valid) { + assert(resp_wait); + resp_wait = false; + + resp_buf = resp_bits; + // update the target with the current context + target = context_t::current(); + host.switch_to(); + } +} + +void dtm_t::return_resp(resp resp_bits){ + resp_buf = resp_bits; + target = context_t::current(); + host.switch_to(); +} diff --git a/fesvr/dtm.h b/fesvr/dtm.h new file mode 100644 index 0000000000..fbf161efec --- /dev/null +++ b/fesvr/dtm.h @@ -0,0 +1,115 @@ +#ifndef _ROCKET_DTM_H +#define _ROCKET_DTM_H + +#include "htif.h" +#include "context.h" +#include +#include +#include +#include +#include +#include + +// abstract debug transport module +class dtm_t : public htif_t +{ + public: + dtm_t(int argc, char**argv); + ~dtm_t(); + + struct req { + uint32_t addr; + uint32_t op; + uint32_t data; + }; + + struct resp { + uint32_t resp; + uint32_t data; + }; + + void tick( + bool req_ready, + bool resp_valid, + resp resp_bits + ); + // Akin to tick, but the target thread returns a response on every invocation + void return_resp( + resp resp_bits + ); + + + bool req_valid() { return req_wait; } + req req_bits() { return req_buf; } + bool resp_ready() { return true; } + + uint32_t read(uint32_t addr); + uint32_t write(uint32_t addr, uint32_t data); + void nop(); + + uint64_t read_csr(unsigned which); + uint64_t write_csr(unsigned which, uint64_t data); + uint64_t clear_csr(unsigned which, uint64_t data); + uint64_t set_csr(unsigned which, uint64_t data); + void fence_i(); + + void producer_thread(); + + protected: + virtual void read_chunk(addr_t taddr, size_t len, void* dst) override; + virtual void write_chunk(addr_t taddr, size_t len, const void* src) override; + virtual void clear_chunk(addr_t taddr, size_t len) override; + virtual size_t chunk_align() override; + virtual size_t chunk_max_size() override; + virtual void reset() override; + virtual void idle() override; + + private: + context_t host; + context_t* target; + pthread_t producer; + sem_t req_produce; + sem_t req_consume; + sem_t resp_produce; + sem_t resp_consume; + req req_buf; + resp resp_buf; + bool running; + + uint32_t run_abstract_command(uint32_t command, const uint32_t program[], size_t program_n, + uint32_t data[], size_t data_n); + + void die(uint32_t cmderr); + void halt(int); + int enumerate_harts(); + void select_hart(int); + void resume(int); + uint64_t save_reg(unsigned regno); + void restore_reg(unsigned regno, uint64_t val); + + uint64_t modify_csr(unsigned which, uint64_t data, uint32_t type); + + bool req_wait; + bool resp_wait; + uint32_t data_base; + + uint32_t xlen; + + static const int max_idle_cycles = 10000; + + size_t ram_words; + size_t data_words; + int num_harts; + int current_hart; + + uint32_t get_xlen(); + uint32_t do_command(dtm_t::req r); + + void parse_args(const std::vector& args); + void register_devices(); + void start_host_thread(); + + friend class memif_t; +}; + +#endif diff --git a/fesvr/dummy.cc b/fesvr/dummy.cc new file mode 100644 index 0000000000..a155d3e56c --- /dev/null +++ b/fesvr/dummy.cc @@ -0,0 +1,4 @@ +// See LICENSE for license details. + +// help out poor, C-centric autoconf +extern "C" void libfesvr_is_present() {} diff --git a/fesvr/elf.h b/fesvr/elf.h new file mode 100644 index 0000000000..a213832755 --- /dev/null +++ b/fesvr/elf.h @@ -0,0 +1,132 @@ +// See LICENSE for details. + +#ifndef _ELF_H +#define _ELF_H + +#include + +#define ET_EXEC 2 +#define EM_RISCV 243 +#define EM_NONE 0 +#define EV_CURRENT 1 + +#define IS_ELF(hdr) \ + ((hdr).e_ident[0] == 0x7f && (hdr).e_ident[1] == 'E' && \ + (hdr).e_ident[2] == 'L' && (hdr).e_ident[3] == 'F') + +#define IS_ELF32(hdr) (IS_ELF(hdr) && (hdr).e_ident[4] == 1) +#define IS_ELF64(hdr) (IS_ELF(hdr) && (hdr).e_ident[4] == 2) +#define IS_ELFLE(hdr) (IS_ELF(hdr) && (hdr).e_ident[5] == 1) +#define IS_ELFBE(hdr) (IS_ELF(hdr) && (hdr).e_ident[5] == 2) +#define IS_ELF_EXEC(hdr) (IS_ELF(hdr) && (hdr).e_type == ET_EXEC) +#define IS_ELF_RISCV(hdr) (IS_ELF(hdr) && (hdr).e_machine == EM_RISCV) +#define IS_ELF_EM_NONE(hdr) (IS_ELF(hdr) && (hdr).e_machine == EM_NONE) +#define IS_ELF_VCURRENT(hdr) (IS_ELF(hdr) && (hdr).e_version == EV_CURRENT) + +#define PT_LOAD 1 + +#define SHT_NOBITS 8 + +typedef struct { + uint8_t e_ident[16]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint32_t e_entry; + uint32_t e_phoff; + uint32_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} Elf32_Ehdr; + +typedef struct { + uint32_t sh_name; + uint32_t sh_type; + uint32_t sh_flags; + uint32_t sh_addr; + uint32_t sh_offset; + uint32_t sh_size; + uint32_t sh_link; + uint32_t sh_info; + uint32_t sh_addralign; + uint32_t sh_entsize; +} Elf32_Shdr; + +typedef struct +{ + uint32_t p_type; + uint32_t p_offset; + uint32_t p_vaddr; + uint32_t p_paddr; + uint32_t p_filesz; + uint32_t p_memsz; + uint32_t p_flags; + uint32_t p_align; +} Elf32_Phdr; + +typedef struct +{ + uint32_t st_name; + uint32_t st_value; + uint32_t st_size; + uint8_t st_info; + uint8_t st_other; + uint16_t st_shndx; +} Elf32_Sym; + +typedef struct { + uint8_t e_ident[16]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint64_t e_entry; + uint64_t e_phoff; + uint64_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} Elf64_Ehdr; + +typedef struct { + uint32_t sh_name; + uint32_t sh_type; + uint64_t sh_flags; + uint64_t sh_addr; + uint64_t sh_offset; + uint64_t sh_size; + uint32_t sh_link; + uint32_t sh_info; + uint64_t sh_addralign; + uint64_t sh_entsize; +} Elf64_Shdr; + +typedef struct { + uint32_t p_type; + uint32_t p_flags; + uint64_t p_offset; + uint64_t p_vaddr; + uint64_t p_paddr; + uint64_t p_filesz; + uint64_t p_memsz; + uint64_t p_align; +} Elf64_Phdr; + +typedef struct { + uint32_t st_name; + uint8_t st_info; + uint8_t st_other; + uint16_t st_shndx; + uint64_t st_value; + uint64_t st_size; +} Elf64_Sym; + +#endif diff --git a/fesvr/elf2hex.cc b/fesvr/elf2hex.cc new file mode 100644 index 0000000000..327cf2d933 --- /dev/null +++ b/fesvr/elf2hex.cc @@ -0,0 +1,47 @@ +// See LICENSE for license details. + +#include +#include "htif_hexwriter.h" +#include "memif.h" +#include "elfloader.h" + +int main(int argc, char** argv) +{ + if(argc < 4 || argc > 5) + { + std::cerr << "Usage: " << argv[0] << " [base]" << std::endl; + return 1; + } + + unsigned width = atoi(argv[1]); + if(width == 0 || (width & (width-1))) + { + std::cerr << "width must be a power of 2" << std::endl; + return 1; + } + + unsigned long long int base = 0; + if(argc==5) { + base = atoll(argv[4]); + if(base & (width-1)) + { + std::cerr << "base must be divisible by width" << std::endl; + return 1; + } + } + + unsigned depth = atoi(argv[2]); + if(depth == 0 || (depth & (depth-1))) + { + std::cerr << "depth must be a power of 2" << std::endl; + return 1; + } + + htif_hexwriter_t htif(base, width, depth); + memif_t memif(&htif); + reg_t entry; + load_elf(argv[3], &memif, &entry); + std::cout << htif; + + return 0; +} diff --git a/fesvr/elfloader.cc b/fesvr/elfloader.cc new file mode 100644 index 0000000000..a4bae1e7d7 --- /dev/null +++ b/fesvr/elfloader.cc @@ -0,0 +1,94 @@ +// See LICENSE for license details. + +#include "elf.h" +#include "memif.h" +#include "byteorder.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +std::map load_elf(const char* fn, memif_t* memif, reg_t* entry) +{ + int fd = open(fn, O_RDONLY); + struct stat s; + assert(fd != -1); + if (fstat(fd, &s) < 0) + abort(); + size_t size = s.st_size; + + char* buf = (char*)mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + assert(buf != MAP_FAILED); + close(fd); + + assert(size >= sizeof(Elf64_Ehdr)); + const Elf64_Ehdr* eh64 = (const Elf64_Ehdr*)buf; + assert(IS_ELF32(*eh64) || IS_ELF64(*eh64)); + assert(IS_ELFLE(*eh64)); + assert(IS_ELF_EXEC(*eh64)); + assert(IS_ELF_RISCV(*eh64) || IS_ELF_EM_NONE(*eh64)); + assert(IS_ELF_VCURRENT(*eh64)); + + std::vector zeros; + std::map symbols; + + #define LOAD_ELF(ehdr_t, phdr_t, shdr_t, sym_t, bswap) do { \ + ehdr_t* eh = (ehdr_t*)buf; \ + phdr_t* ph = (phdr_t*)(buf + bswap(eh->e_phoff)); \ + *entry = bswap(eh->e_entry); \ + assert(size >= bswap(eh->e_phoff) + bswap(eh->e_phnum)*sizeof(*ph)); \ + for (unsigned i = 0; i < bswap(eh->e_phnum); i++) { \ + if(bswap(ph[i].p_type) == PT_LOAD && bswap(ph[i].p_memsz)) { \ + if (bswap(ph[i].p_filesz)) { \ + assert(size >= bswap(ph[i].p_offset) + bswap(ph[i].p_filesz)); \ + memif->write(bswap(ph[i].p_paddr), bswap(ph[i].p_filesz), (uint8_t*)buf + bswap(ph[i].p_offset)); \ + } \ + zeros.resize(bswap(ph[i].p_memsz) - bswap(ph[i].p_filesz)); \ + memif->write(bswap(ph[i].p_paddr) + bswap(ph[i].p_filesz), bswap(ph[i].p_memsz) - bswap(ph[i].p_filesz), &zeros[0]); \ + } \ + } \ + shdr_t* sh = (shdr_t*)(buf + bswap(eh->e_shoff)); \ + assert(size >= bswap(eh->e_shoff) + bswap(eh->e_shnum)*sizeof(*sh)); \ + assert(bswap(eh->e_shstrndx) < bswap(eh->e_shnum)); \ + assert(size >= bswap(sh[bswap(eh->e_shstrndx)].sh_offset) + bswap(sh[bswap(eh->e_shstrndx)].sh_size)); \ + char *shstrtab = buf + bswap(sh[bswap(eh->e_shstrndx)].sh_offset); \ + unsigned strtabidx = 0, symtabidx = 0; \ + for (unsigned i = 0; i < bswap(eh->e_shnum); i++) { \ + unsigned max_len = bswap(sh[bswap(eh->e_shstrndx)].sh_size) - bswap(sh[i].sh_name); \ + assert(bswap(sh[i].sh_name) < bswap(sh[bswap(eh->e_shstrndx)].sh_size)); \ + assert(strnlen(shstrtab + bswap(sh[i].sh_name), max_len) < max_len); \ + if (bswap(sh[i].sh_type) & SHT_NOBITS) continue; \ + assert(size >= bswap(sh[i].sh_offset) + bswap(sh[i].sh_size)); \ + if (strcmp(shstrtab + bswap(sh[i].sh_name), ".strtab") == 0) \ + strtabidx = i; \ + if (strcmp(shstrtab + bswap(sh[i].sh_name), ".symtab") == 0) \ + symtabidx = i; \ + } \ + if (strtabidx && symtabidx) { \ + char* strtab = buf + bswap(sh[strtabidx].sh_offset); \ + sym_t* sym = (sym_t*)(buf + bswap(sh[symtabidx].sh_offset)); \ + for (unsigned i = 0; i < bswap(sh[symtabidx].sh_size)/sizeof(sym_t); i++) { \ + unsigned max_len = bswap(sh[strtabidx].sh_size) - bswap(sym[i].st_name); \ + assert(bswap(sym[i].st_name) < bswap(sh[strtabidx].sh_size)); \ + assert(strnlen(strtab + bswap(sym[i].st_name), max_len) < max_len); \ + symbols[strtab + bswap(sym[i].st_name)] = bswap(sym[i].st_value); \ + } \ + } \ + } while(0) + + if (IS_ELF32(*eh64)) + LOAD_ELF(Elf32_Ehdr, Elf32_Phdr, Elf32_Shdr, Elf32_Sym, from_le); + else + LOAD_ELF(Elf64_Ehdr, Elf64_Phdr, Elf64_Shdr, Elf64_Sym, from_le); + + munmap(buf, size); + + return symbols; +} diff --git a/fesvr/elfloader.h b/fesvr/elfloader.h new file mode 100644 index 0000000000..696ef47849 --- /dev/null +++ b/fesvr/elfloader.h @@ -0,0 +1,13 @@ +// See LICENSE for license details. + +#ifndef _ELFLOADER_H +#define _ELFLOADER_H + +#include "elf.h" +#include +#include + +class memif_t; +std::map load_elf(const char* fn, memif_t* memif, reg_t* entry); + +#endif diff --git a/fesvr/fesvr.ac b/fesvr/fesvr.ac new file mode 100644 index 0000000000..60e6c57fc3 --- /dev/null +++ b/fesvr/fesvr.ac @@ -0,0 +1 @@ +AC_CHECK_LIB(pthread, pthread_create, [], [AC_MSG_ERROR([libpthread is required])]) diff --git a/fesvr/fesvr.mk.in b/fesvr/fesvr.mk.in new file mode 100644 index 0000000000..30c8bfeb94 --- /dev/null +++ b/fesvr/fesvr.mk.in @@ -0,0 +1,40 @@ +fesvr_hdrs = \ + elf.h \ + elfloader.h \ + htif.h \ + dtm.h \ + memif.h \ + syscall.h \ + context.h \ + htif_pthread.h \ + htif_hexwriter.h \ + option_parser.h \ + term.h \ + device.h \ + rfb.h \ + tsi.h \ + +fesvr_CFLAGS = -fPIC + +fesvr_install_hdrs = $(fesvr_hdrs) + +fesvr_install_lib = yes + +fesvr_srcs = \ + elfloader.cc \ + htif.cc \ + memif.cc \ + dtm.cc \ + syscall.cc \ + device.cc \ + rfb.cc \ + context.cc \ + htif_pthread.cc \ + htif_hexwriter.cc \ + dummy.cc \ + option_parser.cc \ + term.cc \ + tsi.cc \ + +fesvr_install_prog_srcs = \ + elf2hex.cc \ diff --git a/fesvr/fesvr.pc.in b/fesvr/fesvr.pc.in new file mode 100644 index 0000000000..f2d12563b4 --- /dev/null +++ b/fesvr/fesvr.pc.in @@ -0,0 +1,26 @@ +#========================================================================= +# Modular C++ Build System Subproject Package Config +#========================================================================= +# Please read the documenation in 'mcppbs-uguide.txt' for more details +# on how the Modular C++ Build System works. + +#------------------------------------------------------------------------- +# Generic variables +#------------------------------------------------------------------------- + +prefix=@prefix@ +include_dir=${prefix}/include/fesvr +lib_dir=${prefix}/lib + +#------------------------------------------------------------------------- +# Keywords +#------------------------------------------------------------------------- + +Name : fesvr +Version : @PACKAGE_VERSION@ +Description : Frontend Server C/C++ API +Requires : @fesvr_pkcdeps@ +Cflags : -I${include_dir} @CPPFLAGS@ @fesvr_extra_cppflags@ +Libs : -L${lib_dir} @LDFLAGS@ @fesvr_extra_ldflags@ \ + -lfesvr @fesvr_extra_libs@ + diff --git a/fesvr/htif.cc b/fesvr/htif.cc new file mode 100644 index 0000000000..f828494654 --- /dev/null +++ b/fesvr/htif.cc @@ -0,0 +1,371 @@ +// See LICENSE for license details. + +#include "htif.h" +#include "rfb.h" +#include "elfloader.h" +#include "encoding.h" +#include "byteorder.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Attempt to determine the execution prefix automatically. autoconf + * sets PREFIX, and pconfigure sets __PCONFIGURE__PREFIX. */ +#if !defined(PREFIX) && defined(__PCONFIGURE__PREFIX) +# define PREFIX __PCONFIGURE__PREFIX +#endif + +#ifndef TARGET_ARCH +# define TARGET_ARCH "riscv64-unknown-elf" +#endif + +#ifndef TARGET_DIR +# define TARGET_DIR "/" TARGET_ARCH "/bin/" +#endif + +static volatile bool signal_exit = false; +static void handle_signal(int sig) +{ + if (sig == SIGABRT || signal_exit) // someone set up us the bomb! + exit(-1); + signal_exit = true; + signal(sig, &handle_signal); +} + +htif_t::htif_t() + : mem(this), entry(DRAM_BASE), sig_addr(0), sig_len(0), + tohost_addr(0), fromhost_addr(0), exitcode(0), stopped(false), + syscall_proxy(this) +{ + signal(SIGINT, &handle_signal); + signal(SIGTERM, &handle_signal); + signal(SIGABRT, &handle_signal); // we still want to call static destructors +} + +htif_t::htif_t(int argc, char** argv) : htif_t() +{ + parse_arguments(argc, argv); + register_devices(); +} + +htif_t::htif_t(const std::vector& args) : htif_t() +{ + int argc = args.size() + 1; + char * argv[argc]; + argv[0] = (char *) "htif"; + for (unsigned int i = 0; i < args.size(); i++) { + argv[i+1] = (char *) args[i].c_str(); + } + + parse_arguments(argc, argv); + register_devices(); +} + +htif_t::~htif_t() +{ + for (auto d : dynamic_devices) + delete d; +} + +void htif_t::start() +{ + if (!targs.empty() && targs[0] != "none") + load_program(); + + reset(); +} + +std::map htif_t::load_payload(const std::string& payload, reg_t* entry) +{ + std::string path; + if (access(payload.c_str(), F_OK) == 0) + path = payload; + else if (payload.find('/') == std::string::npos) + { + std::string test_path = PREFIX TARGET_DIR + payload; + if (access(test_path.c_str(), F_OK) == 0) + path = test_path; + } + + if (path.empty()) + throw std::runtime_error( + "could not open " + payload + + " (did you misspell it? If VCS, did you forget +permissive/+permissive-off?)"); + + // temporarily construct a memory interface that skips writing bytes + // that have already been preloaded through a sideband + class preload_aware_memif_t : public memif_t { + public: + preload_aware_memif_t(htif_t* htif) : memif_t(htif), htif(htif) {} + + void write(addr_t taddr, size_t len, const void* src) override + { + if (!htif->is_address_preloaded(taddr, len)) + memif_t::write(taddr, len, src); + } + + private: + htif_t* htif; + } preload_aware_memif(this); + + return load_elf(path.c_str(), &preload_aware_memif, entry); +} + +void htif_t::load_program() +{ + std::map symbols = load_payload(targs[0], &entry); + + if (symbols.count("tohost") && symbols.count("fromhost")) { + tohost_addr = symbols["tohost"]; + fromhost_addr = symbols["fromhost"]; + } else { + fprintf(stderr, "warning: tohost and fromhost symbols not in ELF; can't communicate with target\n"); + } + + // detect torture tests so we can print the memory signature at the end + if (symbols.count("begin_signature") && symbols.count("end_signature")) + { + sig_addr = symbols["begin_signature"]; + sig_len = symbols["end_signature"] - sig_addr; + } + + for (auto payload : payloads) + { + reg_t dummy_entry; + load_payload(payload, &dummy_entry); + } + + for (auto i : symbols) + { + auto it = addr2symbol.find(i.second); + if ( it == addr2symbol.end()) + addr2symbol[i.second] = i.first; + } + + return; +} + +const char* htif_t::get_symbol(uint64_t addr) +{ + auto it = addr2symbol.find(addr); + + if(it == addr2symbol.end()) + return nullptr; + + return it->second.c_str(); +} + +void htif_t::stop() +{ + if (!sig_file.empty() && sig_len) // print final torture test signature + { + std::vector buf(sig_len); + mem.read(sig_addr, sig_len, &buf[0]); + + std::ofstream sigs(sig_file); + assert(sigs && "can't open signature file!"); + sigs << std::setfill('0') << std::hex; + + const addr_t incr = 16; + assert(sig_len % incr == 0); + for (addr_t i = 0; i < sig_len; i += incr) + { + for (addr_t j = incr; j > 0; j--) + sigs << std::setw(2) << (uint16_t)buf[i+j-1]; + sigs << '\n'; + } + + sigs.close(); + } + + stopped = true; +} + +void htif_t::clear_chunk(addr_t taddr, size_t len) +{ + char zeros[chunk_max_size()]; + memset(zeros, 0, chunk_max_size()); + + for (size_t pos = 0; pos < len; pos += chunk_max_size()) + write_chunk(taddr + pos, std::min(len - pos, chunk_max_size()), zeros); +} + +int htif_t::run() +{ + start(); + + auto enq_func = [](std::queue* q, uint64_t x) { q->push(x); }; + std::queue fromhost_queue; + std::function fromhost_callback = + std::bind(enq_func, &fromhost_queue, std::placeholders::_1); + + if (tohost_addr == 0) { + while (true) + idle(); + } + + while (!signal_exit && exitcode == 0) + { + if (auto tohost = from_le(mem.read_uint64(tohost_addr))) { + mem.write_uint64(tohost_addr, 0); + command_t cmd(mem, tohost, fromhost_callback); + device_list.handle_command(cmd); + } else { + idle(); + } + + device_list.tick(); + + if (!fromhost_queue.empty() && mem.read_uint64(fromhost_addr) == 0) { + mem.write_uint64(fromhost_addr, to_le(fromhost_queue.front())); + fromhost_queue.pop(); + } + } + + stop(); + + return exit_code(); +} + +bool htif_t::done() +{ + return stopped; +} + +int htif_t::exit_code() +{ + return exitcode >> 1; +} + +void htif_t::parse_arguments(int argc, char ** argv) +{ + optind = 0; // reset optind as HTIF may run getopt _after_ others + while (1) { + static struct option long_options[] = { HTIF_LONG_OPTIONS }; + int option_index = 0; + int c = getopt_long(argc, argv, "-h", long_options, &option_index); + + if (c == -1) break; + retry: + switch (c) { + case 'h': usage(argv[0]); + throw std::invalid_argument("User queried htif_t help text"); + case HTIF_LONG_OPTIONS_OPTIND: + if (optarg) dynamic_devices.push_back(new rfb_t(atoi(optarg))); + else dynamic_devices.push_back(new rfb_t); + break; + case HTIF_LONG_OPTIONS_OPTIND + 1: + // [TODO] Remove once disks are supported again + throw std::invalid_argument("--disk/+disk unsupported (use a ramdisk)"); + dynamic_devices.push_back(new disk_t(optarg)); + break; + case HTIF_LONG_OPTIONS_OPTIND + 2: + sig_file = optarg; + break; + case HTIF_LONG_OPTIONS_OPTIND + 3: + syscall_proxy.set_chroot(optarg); + break; + case HTIF_LONG_OPTIONS_OPTIND + 4: + payloads.push_back(optarg); + break; + case '?': + if (!opterr) + break; + throw std::invalid_argument("Unknown argument (did you mean to enable +permissive parsing?)"); + case 1: { + std::string arg = optarg; + if (arg == "+h" || arg == "+help") { + c = 'h'; + optarg = nullptr; + } + else if (arg == "+rfb") { + c = HTIF_LONG_OPTIONS_OPTIND; + optarg = nullptr; + } + else if (arg.find("+rfb=") == 0) { + c = HTIF_LONG_OPTIONS_OPTIND; + optarg = optarg + 5; + } + else if (arg.find("+disk=") == 0) { + c = HTIF_LONG_OPTIONS_OPTIND + 1; + optarg = optarg + 6; + } + else if (arg.find("+signature=") == 0) { + c = HTIF_LONG_OPTIONS_OPTIND + 2; + optarg = optarg + 11; + } + else if (arg.find("+chroot=") == 0) { + c = HTIF_LONG_OPTIONS_OPTIND + 3; + optarg = optarg + 8; + } + else if (arg.find("+payload=") == 0) { + c = HTIF_LONG_OPTIONS_OPTIND + 4; + optarg = optarg + 9; + } + else if (arg.find("+permissive-off") == 0) { + if (opterr) + throw std::invalid_argument("Found +permissive-off when not parsing permissively"); + opterr = 1; + break; + } + else if (arg.find("+permissive") == 0) { + if (!opterr) + throw std::invalid_argument("Found +permissive when already parsing permissively"); + opterr = 0; + break; + } + else { + if (!opterr) + break; + else { + optind--; + goto done_processing; + } + } + goto retry; + } + } + } + +done_processing: + while (optind < argc) + targs.push_back(argv[optind++]); + if (!targs.size()) { + usage(argv[0]); + throw std::invalid_argument("No binary specified (Did you forget it? Did you forget '+permissive-off' if running with +permissive?)"); + } +} + +void htif_t::register_devices() +{ + device_list.register_device(&syscall_proxy); + device_list.register_device(&bcd); + for (auto d : dynamic_devices) + device_list.register_device(d); +} + +void htif_t::usage(const char * program_name) +{ + printf("Usage: %s [EMULATOR OPTION]... [VERILOG PLUSARG]... [HOST OPTION]... BINARY [TARGET OPTION]...\n ", + program_name); + fputs("\ +Run a BINARY on the Rocket Chip emulator.\n\ +\n\ +Mandatory arguments to long options are mandatory for short options too.\n\ +\n\ +EMULATOR OPTIONS\n\ + Consult emulator.cc if using Verilator or VCS documentation if using VCS\n\ + for available options.\n\ +EMUALTOR VERILOG PLUSARGS\n\ + Consult generated-src*/*.plusArgs for available options\n\ +", stdout); + fputs("\n" HTIF_USAGE_OPTIONS, stdout); +} diff --git a/fesvr/htif.h b/fesvr/htif.h new file mode 100644 index 0000000000..5b16a60db4 --- /dev/null +++ b/fesvr/htif.h @@ -0,0 +1,126 @@ +// See LICENSE for license details. + +#ifndef __HTIF_H +#define __HTIF_H + +#include "memif.h" +#include "syscall.h" +#include "device.h" +#include +#include +#include + +class htif_t : public chunked_memif_t +{ + public: + htif_t(); + htif_t(int argc, char** argv); + htif_t(const std::vector& args); + virtual ~htif_t(); + + virtual void start(); + virtual void stop(); + + int run(); + bool done(); + int exit_code(); + + virtual memif_t& memif() { return mem; } + + protected: + virtual void reset() = 0; + + virtual void read_chunk(addr_t taddr, size_t len, void* dst) = 0; + virtual void write_chunk(addr_t taddr, size_t len, const void* src) = 0; + virtual void clear_chunk(addr_t taddr, size_t len); + + virtual size_t chunk_align() = 0; + virtual size_t chunk_max_size() = 0; + + virtual std::map load_payload(const std::string& payload, reg_t* entry); + virtual void load_program(); + virtual void idle() {} + + const std::vector& host_args() { return hargs; } + + reg_t get_entry_point() { return entry; } + + // indicates that the initial program load can skip writing this address + // range to memory, because it has already been loaded through a sideband + virtual bool is_address_preloaded(addr_t taddr, size_t len) { return false; } + + // Given an address, return symbol from addr2symbol map + const char* get_symbol(uint64_t addr); + + private: + void parse_arguments(int argc, char ** argv); + void register_devices(); + void usage(const char * program_name); + + memif_t mem; + reg_t entry; + bool writezeros; + std::vector hargs; + std::vector targs; + std::string sig_file; + addr_t sig_addr; // torture + addr_t sig_len; // torture + addr_t tohost_addr; + addr_t fromhost_addr; + int exitcode; + bool stopped; + + device_list_t device_list; + syscall_t syscall_proxy; + bcd_t bcd; + std::vector dynamic_devices; + std::vector payloads; + + const std::vector& target_args() { return targs; } + + std::map addr2symbol; + + friend class memif_t; + friend class syscall_t; +}; + +/* Alignment guide for emulator.cc options: + -x, --long-option Description with max 80 characters --------------->\n\ + +plus-arg-equivalent\n\ + */ +#define HTIF_USAGE_OPTIONS \ +"HOST OPTIONS\n\ + -h, --help Display this help and exit\n\ + +h, +help\n\ + +permissive The host will ignore any unparsed options up until\n\ + +permissive-off (Only needed for VCS)\n\ + +permissive-off Stop ignoring options. This is mandatory if using\n\ + +permissive (Only needed for VCS)\n\ + --rfb=DISPLAY Add new remote frame buffer on display DISPLAY\n\ + +rfb=DISPLAY to be accessible on 5900 + DISPLAY (default = 0)\n\ + --signature=FILE Write torture test signature to FILE\n\ + +signature=FILE\n\ + --chroot=PATH Use PATH as location of syscall-servicing binaries\n\ + +chroot=PATH\n\ + --payload=PATH Load PATH memory as an additional ELF payload\n\ + +payload=PATH\n\ +\n\ +HOST OPTIONS (currently unsupported)\n\ + --disk=DISK Add DISK device. Use a ramdisk since this isn't\n\ + +disk=DISK supported\n\ +\n\ +TARGET (RISC-V BINARY) OPTIONS\n\ + These are the options passed to the program executing on the emulated RISC-V\n\ + microprocessor.\n" + +#define HTIF_LONG_OPTIONS_OPTIND 1024 +#define HTIF_LONG_OPTIONS \ +{"help", no_argument, 0, 'h' }, \ +{"rfb", optional_argument, 0, HTIF_LONG_OPTIONS_OPTIND }, \ +{"disk", required_argument, 0, HTIF_LONG_OPTIONS_OPTIND + 1 }, \ +{"signature", required_argument, 0, HTIF_LONG_OPTIONS_OPTIND + 2 }, \ +{"chroot", required_argument, 0, HTIF_LONG_OPTIONS_OPTIND + 3 }, \ +{"payload", required_argument, 0, HTIF_LONG_OPTIONS_OPTIND + 4 }, \ +{0, 0, 0, 0} + +#endif // __HTIF_H diff --git a/fesvr/htif_hexwriter.cc b/fesvr/htif_hexwriter.cc new file mode 100644 index 0000000000..e4811b3bee --- /dev/null +++ b/fesvr/htif_hexwriter.cc @@ -0,0 +1,76 @@ +// See LICENSE for license details. + +#include +#include +#include "htif_hexwriter.h" + +htif_hexwriter_t::htif_hexwriter_t(size_t b, size_t w, size_t d) + : base(b), width(w), depth(d) +{ +} + +void htif_hexwriter_t::read_chunk(addr_t taddr, size_t len, void* vdst) +{ + taddr -= base; + + assert(len % chunk_align() == 0); + assert(taddr < width*depth); + assert(taddr+len <= width*depth); + + uint8_t* dst = (uint8_t*)vdst; + while(len) + { + if(mem[taddr/width].size() == 0) + mem[taddr/width].resize(width,0); + + for(size_t j = 0; j < width; j++) + dst[j] = mem[taddr/width][j]; + + len -= width; + taddr += width; + dst += width; + } +} + +void htif_hexwriter_t::write_chunk(addr_t taddr, size_t len, const void* vsrc) +{ + taddr -= base; + + assert(len % chunk_align() == 0); + assert(taddr < width*depth); + assert(taddr+len <= width*depth); + + const uint8_t* src = (const uint8_t*)vsrc; + while(len) + { + if(mem[taddr/width].size() == 0) + mem[taddr/width].resize(width,0); + + for(size_t j = 0; j < width; j++) + mem[taddr/width][j] = src[j]; + + len -= width; + taddr += width; + } +} + +std::ostream& operator<< (std::ostream& o, const htif_hexwriter_t& h) +{ + std::ios_base::fmtflags flags = o.setf(std::ios::hex,std::ios::basefield); + + for(size_t addr = 0; addr < h.depth; addr++) + { + std::map >::const_iterator i = h.mem.find(addr); + if(i == h.mem.end()) + for(size_t j = 0; j < h.width; j++) + o << "00"; + else + for(size_t j = 0; j < h.width; j++) + o << ((i->second[h.width-j-1] >> 4) & 0xF) << (i->second[h.width-j-1] & 0xF); + o << std::endl; + } + + o.setf(flags); + + return o; +} diff --git a/fesvr/htif_hexwriter.h b/fesvr/htif_hexwriter.h new file mode 100644 index 0000000000..725616626e --- /dev/null +++ b/fesvr/htif_hexwriter.h @@ -0,0 +1,32 @@ +// See LICENSE for license details. + +#ifndef __HTIF_HEXWRITER_H +#define __HTIF_HEXWRITER_H + +#include +#include +#include +#include "memif.h" + +class htif_hexwriter_t : public chunked_memif_t +{ +public: + htif_hexwriter_t(size_t b, size_t w, size_t d); + +protected: + size_t base; + size_t width; + size_t depth; + std::map > mem; + + void read_chunk(addr_t taddr, size_t len, void* dst); + void write_chunk(addr_t taddr, size_t len, const void* src); + void clear_chunk(addr_t taddr, size_t len) {} + + size_t chunk_max_size() { return width; } + size_t chunk_align() { return width; } + + friend std::ostream& operator<< (std::ostream&, const htif_hexwriter_t&); +}; + +#endif // __HTIF_HEXWRITER_H diff --git a/fesvr/htif_pthread.cc b/fesvr/htif_pthread.cc new file mode 100644 index 0000000000..b9e3832b94 --- /dev/null +++ b/fesvr/htif_pthread.cc @@ -0,0 +1,66 @@ +// See LICENSE for license details. + +#include "htif_pthread.h" +#include +#include + +void htif_pthread_t::thread_main(void* arg) +{ + htif_pthread_t* htif = static_cast(arg); + htif->run(); + while (true) + htif->target->switch_to(); +} + +htif_pthread_t::htif_pthread_t(int argc, char** argv) + : htif_t(argc, argv) +{ + target = context_t::current(); + host.init(thread_main, this); +} + +htif_pthread_t::~htif_pthread_t() +{ +} + +ssize_t htif_pthread_t::read(void* buf, size_t max_size) +{ + while (th_data.size() == 0) + target->switch_to(); + + size_t s = std::min(max_size, th_data.size()); + std::copy(th_data.begin(), th_data.begin() + s, (char*)buf); + th_data.erase(th_data.begin(), th_data.begin() + s); + + return s; +} + +ssize_t htif_pthread_t::write(const void* buf, size_t size) +{ + ht_data.insert(ht_data.end(), (const char*)buf, (const char*)buf + size); + return size; +} + +void htif_pthread_t::send(const void* buf, size_t size) +{ + th_data.insert(th_data.end(), (const char*)buf, (const char*)buf + size); +} + +void htif_pthread_t::recv(void* buf, size_t size) +{ + while (!this->recv_nonblocking(buf, size)) + ; +} + +bool htif_pthread_t::recv_nonblocking(void* buf, size_t size) +{ + if (ht_data.size() < size) + { + host.switch_to(); + return false; + } + + std::copy(ht_data.begin(), ht_data.begin() + size, (char*)buf); + ht_data.erase(ht_data.begin(), ht_data.begin() + size); + return true; +} diff --git a/fesvr/htif_pthread.h b/fesvr/htif_pthread.h new file mode 100644 index 0000000000..c00c38230c --- /dev/null +++ b/fesvr/htif_pthread.h @@ -0,0 +1,38 @@ +// See LICENSE for license details. + +#ifndef _HTIF_PTHREAD_H +#define _HTIF_PTHREAD_H + +#include "htif.h" +#include "context.h" +#include + +class htif_pthread_t : public htif_t +{ + public: + htif_pthread_t(int argc, char** argv); + virtual ~htif_pthread_t(); + + // target inteface + void send(const void* buf, size_t size); + void recv(void* buf, size_t size); + bool recv_nonblocking(void* buf, size_t size); + + protected: + // host interface + virtual ssize_t read(void* buf, size_t max_size); + virtual ssize_t write(const void* buf, size_t size); + + virtual size_t chunk_align() { return 64; } + virtual size_t chunk_max_size() { return 1024; } + + private: + context_t host; + context_t* target; + std::deque th_data; + std::deque ht_data; + + static void thread_main(void* htif); +}; + +#endif diff --git a/fesvr/memif.cc b/fesvr/memif.cc new file mode 100644 index 0000000000..fd9629144f --- /dev/null +++ b/fesvr/memif.cc @@ -0,0 +1,183 @@ +// See LICENSE for license details. + +#include +#include +#include +#include +#include "memif.h" + +void memif_t::read(addr_t addr, size_t len, void* bytes) +{ + size_t align = cmemif->chunk_align(); + if (len && (addr & (align-1))) + { + size_t this_len = std::min(len, align - size_t(addr & (align-1))); + uint8_t chunk[align]; + + cmemif->read_chunk(addr & ~(align-1), align, chunk); + memcpy(bytes, chunk + (addr & (align-1)), this_len); + + bytes = (char*)bytes + this_len; + addr += this_len; + len -= this_len; + } + + if (len & (align-1)) + { + size_t this_len = len & (align-1); + size_t start = len - this_len; + uint8_t chunk[align]; + + cmemif->read_chunk(addr + start, align, chunk); + memcpy((char*)bytes + start, chunk, this_len); + + len -= this_len; + } + + // now we're aligned + for (size_t pos = 0; pos < len; pos += cmemif->chunk_max_size()) + cmemif->read_chunk(addr + pos, std::min(cmemif->chunk_max_size(), len - pos), (char*)bytes + pos); +} + +void memif_t::write(addr_t addr, size_t len, const void* bytes) +{ + size_t align = cmemif->chunk_align(); + if (len && (addr & (align-1))) + { + size_t this_len = std::min(len, align - size_t(addr & (align-1))); + uint8_t chunk[align]; + + cmemif->read_chunk(addr & ~(align-1), align, chunk); + memcpy(chunk + (addr & (align-1)), bytes, this_len); + cmemif->write_chunk(addr & ~(align-1), align, chunk); + + bytes = (char*)bytes + this_len; + addr += this_len; + len -= this_len; + } + + if (len & (align-1)) + { + size_t this_len = len & (align-1); + size_t start = len - this_len; + uint8_t chunk[align]; + + cmemif->read_chunk(addr + start, align, chunk); + memcpy(chunk, (char*)bytes + start, this_len); + cmemif->write_chunk(addr + start, align, chunk); + + len -= this_len; + } + + // now we're aligned + bool all_zero = len != 0; + for (size_t i = 0; i < len; i++) + all_zero &= ((const char*)bytes)[i] == 0; + + if (all_zero) { + cmemif->clear_chunk(addr, len); + } else { + size_t max_chunk = cmemif->chunk_max_size(); + for (size_t pos = 0; pos < len; pos += max_chunk) + cmemif->write_chunk(addr + pos, std::min(max_chunk, len - pos), (char*)bytes + pos); + } +} + +#define MEMIF_READ_FUNC \ + if(addr & (sizeof(val)-1)) \ + throw std::runtime_error("misaligned address"); \ + this->read(addr, sizeof(val), &val); \ + return val + +#define MEMIF_WRITE_FUNC \ + if(addr & (sizeof(val)-1)) \ + throw std::runtime_error("misaligned address"); \ + this->write(addr, sizeof(val), &val) + +uint8_t memif_t::read_uint8(addr_t addr) +{ + uint8_t val; + MEMIF_READ_FUNC; +} + +int8_t memif_t::read_int8(addr_t addr) +{ + int8_t val; + MEMIF_READ_FUNC; +} + +void memif_t::write_uint8(addr_t addr, uint8_t val) +{ + MEMIF_WRITE_FUNC; +} + +void memif_t::write_int8(addr_t addr, int8_t val) +{ + MEMIF_WRITE_FUNC; +} + +uint16_t memif_t::read_uint16(addr_t addr) +{ + uint16_t val; + MEMIF_READ_FUNC; +} + +int16_t memif_t::read_int16(addr_t addr) +{ + int16_t val; + MEMIF_READ_FUNC; +} + +void memif_t::write_uint16(addr_t addr, uint16_t val) +{ + MEMIF_WRITE_FUNC; +} + +void memif_t::write_int16(addr_t addr, int16_t val) +{ + MEMIF_WRITE_FUNC; +} + +uint32_t memif_t::read_uint32(addr_t addr) +{ + uint32_t val; + MEMIF_READ_FUNC; +} + +int32_t memif_t::read_int32(addr_t addr) +{ + int32_t val; + MEMIF_READ_FUNC; +} + +void memif_t::write_uint32(addr_t addr, uint32_t val) +{ + MEMIF_WRITE_FUNC; +} + +void memif_t::write_int32(addr_t addr, int32_t val) +{ + MEMIF_WRITE_FUNC; +} + +uint64_t memif_t::read_uint64(addr_t addr) +{ + uint64_t val; + MEMIF_READ_FUNC; +} + +int64_t memif_t::read_int64(addr_t addr) +{ + int64_t val; + MEMIF_READ_FUNC; +} + +void memif_t::write_uint64(addr_t addr, uint64_t val) +{ + MEMIF_WRITE_FUNC; +} + +void memif_t::write_int64(addr_t addr, int64_t val) +{ + MEMIF_WRITE_FUNC; +} diff --git a/fesvr/memif.h b/fesvr/memif.h new file mode 100644 index 0000000000..3854d664c8 --- /dev/null +++ b/fesvr/memif.h @@ -0,0 +1,62 @@ +// See LICENSE for license details. + +#ifndef __MEMIF_H +#define __MEMIF_H + +#include +#include + +typedef uint64_t reg_t; +typedef int64_t sreg_t; +typedef reg_t addr_t; + +class chunked_memif_t +{ +public: + virtual void read_chunk(addr_t taddr, size_t len, void* dst) = 0; + virtual void write_chunk(addr_t taddr, size_t len, const void* src) = 0; + virtual void clear_chunk(addr_t taddr, size_t len) = 0; + + virtual size_t chunk_align() = 0; + virtual size_t chunk_max_size() = 0; +}; + +class memif_t +{ +public: + memif_t(chunked_memif_t* _cmemif) : cmemif(_cmemif) {} + virtual ~memif_t(){} + + // read and write byte arrays + virtual void read(addr_t addr, size_t len, void* bytes); + virtual void write(addr_t addr, size_t len, const void* bytes); + + // read and write 8-bit words + virtual uint8_t read_uint8(addr_t addr); + virtual int8_t read_int8(addr_t addr); + virtual void write_uint8(addr_t addr, uint8_t val); + virtual void write_int8(addr_t addr, int8_t val); + + // read and write 16-bit words + virtual uint16_t read_uint16(addr_t addr); + virtual int16_t read_int16(addr_t addr); + virtual void write_uint16(addr_t addr, uint16_t val); + virtual void write_int16(addr_t addr, int16_t val); + + // read and write 32-bit words + virtual uint32_t read_uint32(addr_t addr); + virtual int32_t read_int32(addr_t addr); + virtual void write_uint32(addr_t addr, uint32_t val); + virtual void write_int32(addr_t addr, int32_t val); + + // read and write 64-bit words + virtual uint64_t read_uint64(addr_t addr); + virtual int64_t read_int64(addr_t addr); + virtual void write_uint64(addr_t addr, uint64_t val); + virtual void write_int64(addr_t addr, int64_t val); + +protected: + chunked_memif_t* cmemif; +}; + +#endif // __MEMIF_H diff --git a/fesvr/option_parser.cc b/fesvr/option_parser.cc new file mode 100644 index 0000000000..72daec40ef --- /dev/null +++ b/fesvr/option_parser.cc @@ -0,0 +1,51 @@ +// See LICENSE for license details. + +#include "option_parser.h" +#include +#include +#include +#include + +void option_parser_t::option(char c, const char* s, int arg, std::function action) +{ + opts.push_back(option_t(c, s, arg, action)); +} + +const char* const* option_parser_t::parse(const char* const* argv0) +{ + assert(argv0); + const char* const* argv = argv0 + 1; + for (const char* opt; (opt = *argv) != NULL && opt[0] == '-'; argv++) + { + bool found = false; + for (auto it = opts.begin(); !found && it != opts.end(); it++) + { + size_t slen = it->str ? strlen(it->str) : 0; + bool chr_match = opt[1] != '-' && it->chr && opt[1] == it->chr; + bool str_match = opt[1] == '-' && slen && strncmp(opt+2, it->str, slen) == 0; + if (chr_match || (str_match && (opt[2+slen] == '=' || opt[2+slen] == '\0'))) + { + const char* optarg = + chr_match ? (opt[2] ? &opt[2] : NULL) : + opt[2+slen] ? &opt[3+slen] : + it->arg ? *(++argv) : NULL; + if (optarg && !it->arg) + error("no argument allowed for option", *argv0, opt); + if (!optarg && it->arg) + error("argument required for option", *argv0, opt); + it->func(optarg); + found = true; + } + } + if (!found) + error("unrecognized option", *argv0, opt); + } + return argv; +} + +void option_parser_t::error(const char* msg, const char* argv0, const char* arg) +{ + fprintf(stderr, "%s: %s %s\n", argv0, msg, arg ? arg : ""); + if (helpmsg) helpmsg(); + exit(1); +} diff --git a/fesvr/option_parser.h b/fesvr/option_parser.h new file mode 100644 index 0000000000..b2cb8edf9d --- /dev/null +++ b/fesvr/option_parser.h @@ -0,0 +1,31 @@ +// See LICENSE for license details. + +#ifndef _OPTION_PARSER_H +#define _OPTION_PARSER_H + +#include +#include + +class option_parser_t +{ + public: + option_parser_t() : helpmsg(0) {} + void help(void (*helpm)(void)) { helpmsg = helpm; } + void option(char c, const char* s, int arg, std::function action); + const char* const* parse(const char* const* argv0); + private: + struct option_t + { + char chr; + const char* str; + int arg; + std::function func; + option_t(char chr, const char* str, int arg, std::function func) + : chr(chr), str(str), arg(arg), func(func) {} + }; + std::vector opts; + void (*helpmsg)(void); + void error(const char* msg, const char* argv0, const char* arg); +}; + +#endif diff --git a/fesvr/rfb.cc b/fesvr/rfb.cc new file mode 100644 index 0000000000..2594a1b871 --- /dev/null +++ b/fesvr/rfb.cc @@ -0,0 +1,230 @@ +#include "rfb.h" +#include "memif.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std::placeholders; + +rfb_t::rfb_t(int display) + : sockfd(-1), afd(-1), + memif(0), addr(0), width(0), height(0), bpp(0), display(display), + thread(pthread_self()), fb1(0), fb2(0), read_pos(0), + lock(PTHREAD_MUTEX_INITIALIZER) +{ + register_command(0, std::bind(&rfb_t::handle_configure, this, _1), "configure"); + register_command(1, std::bind(&rfb_t::handle_set_address, this, _1), "set_address"); +} + +void* rfb_thread_main(void* arg) +{ + ((rfb_t*)arg)->thread_main(); + return 0; +} + +void rfb_t::thread_main() +{ + pthread_mutex_lock(&lock); + + int port = 5900 + display; + sockfd = socket(PF_INET, SOCK_STREAM, 0); + if (sockfd < 0) + throw std::runtime_error("could not acquire tcp socket"); + + struct sockaddr_in saddr, caddr; + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = INADDR_ANY; + saddr.sin_port = htons(port); + if (bind(sockfd, (struct sockaddr*)&saddr, sizeof(saddr)) < 0) + throw std::runtime_error("could not bind to port " + std::to_string(port)); + if (listen(sockfd, 0) < 0) + throw std::runtime_error("could not listen on port " + std::to_string(port)); + + socklen_t clen = sizeof(caddr); + afd = accept(sockfd, (struct sockaddr*)&caddr, &clen); + if (afd < 0) + throw std::runtime_error("could not accept connection"); + + std::string version = "RFB 003.003\n"; + write(version); + if (read() != version) + throw std::runtime_error("bad client version"); + + write(str(uint32_t(htonl(1)))); + + read(); // clientinit + + std::string serverinit; + serverinit += str(uint16_t(htons(width))); + serverinit += str(uint16_t(htons(height))); + serverinit += pixel_format(); + std::string name = "RISC-V"; + serverinit += str(uint32_t(htonl(name.length()))); + serverinit += name; + write(serverinit); + + pthread_mutex_unlock(&lock); + + while (memif == NULL) + sched_yield(); + + while (memif != NULL) + { + std::string s = read(); + if (s.length() < 4) + break; //throw std::runtime_error("bad command"); + + switch (s[0]) + { + case 0: set_pixel_format(s); break; + case 2: set_encodings(s); break; + case 3: break; + } + } + + pthread_mutex_lock(&lock); + close(afd); + close(sockfd); + afd = -1; + sockfd = -1; + pthread_mutex_unlock(&lock); + + thread_main(); +} + +rfb_t::~rfb_t() +{ + memif = 0; + if (!pthread_equal(pthread_self(), thread)) + pthread_join(thread, 0); + delete [] fb1; + delete [] fb2; +} + +void rfb_t::set_encodings(const std::string& s) +{ + uint16_t n = htons(*(uint16_t*)&s[2]); + for (size_t b = s.length(); b < 4U+4U*n; b += read().length()); +} + +void rfb_t::set_pixel_format(const std::string& s) +{ + if (s.length() != 20 || s.substr(4, 16) != pixel_format()) + throw std::runtime_error("bad pixel format"); +} + +void rfb_t::fb_update(const std::string& s) +{ + std::string u; + u += str(uint8_t(0)); + u += str(uint8_t(0)); + u += str(uint16_t(htons(1))); + u += str(uint16_t(htons(0))); + u += str(uint16_t(htons(0))); + u += str(uint16_t(htons(width))); + u += str(uint16_t(htons(height))); + u += str(uint32_t(htonl(0))); + u += std::string((char*)fb1, fb_bytes()); + + try + { + write(u); + } + catch (std::runtime_error& e) + { + } +} + +void rfb_t::tick() +{ + if (fb_bytes() == 0 || memif == NULL) + return; + + memif->read(addr + read_pos, FB_ALIGN, const_cast(fb2 + read_pos)); + read_pos = (read_pos + FB_ALIGN) % fb_bytes(); + if (read_pos == 0) + { + std::swap(fb1, fb2); + if (pthread_mutex_trylock(&lock) == 0) + { + fb_update(""); + pthread_mutex_unlock(&lock); + } + } +} + +std::string rfb_t::pixel_format() +{ + int red_bits = 8, green_bits = 8, blue_bits = 8; + int bpp = red_bits + green_bits + blue_bits; + while (bpp & (bpp-1)) bpp++; + + std::string fmt; + fmt += str(uint8_t(bpp)); + fmt += str(uint8_t(red_bits + green_bits + blue_bits)); + fmt += str(uint8_t(0)); // little-endian + fmt += str(uint8_t(1)); // true color + fmt += str(uint16_t(htons((1<> 16; + + bpp = cmd.payload() >> 32; + if (bpp != 32) + throw std::runtime_error("rfb requires 32 bpp true color"); + + if (fb_bytes() % FB_ALIGN != 0) + throw std::runtime_error("rfb size must be a multiple of " + std::to_string(FB_ALIGN)); + + fb1 = new char[fb_bytes()]; + fb2 = new char[fb_bytes()]; + if (pthread_create(&thread, 0, rfb_thread_main, this)) + throw std::runtime_error("could not create thread"); + cmd.respond(1); +} + +void rfb_t::handle_set_address(command_t cmd) +{ + addr = cmd.payload(); + if (addr % FB_ALIGN != 0) + throw std::runtime_error("rfb address must be " + std::to_string(FB_ALIGN) + "-byte aligned"); + memif = &cmd.memif(); + cmd.respond(1); +} diff --git a/fesvr/rfb.h b/fesvr/rfb.h new file mode 100644 index 0000000000..263663a24d --- /dev/null +++ b/fesvr/rfb.h @@ -0,0 +1,53 @@ +#ifndef _RFB_H +#define _RFB_H + +#include "device.h" +#include "memif.h" +#include + +// remote frame buffer +class rfb_t : public device_t +{ + public: + rfb_t(int display = 0); + ~rfb_t(); + void tick(); + std::string name() { return "RISC-V"; } + const char* identity() { return "rfb"; } + + private: + template + std::string str(T x) + { + return std::string((char*)&x, sizeof(x)); + } + size_t fb_bytes() { return size_t(width) * height * bpp/8; } + void thread_main(); + friend void* rfb_thread_main(void*); + std::string pixel_format(); + void fb_update(const std::string& s); + void set_encodings(const std::string& s); + void set_pixel_format(const std::string& s); + void write(const std::string& s); + std::string read(); + void handle_configure(command_t cmd); + void handle_set_address(command_t cmd); + + int sockfd; + int afd; + memif_t* memif; + reg_t addr; + uint16_t width; + uint16_t height; + uint16_t bpp; + int display; + pthread_t thread; + volatile char* volatile fb1; + volatile char* volatile fb2; + size_t read_pos; + pthread_mutex_t lock; + + static const int FB_ALIGN = 256; +}; + +#endif diff --git a/fesvr/syscall.cc b/fesvr/syscall.cc new file mode 100644 index 0000000000..f0bdd259bd --- /dev/null +++ b/fesvr/syscall.cc @@ -0,0 +1,395 @@ +// See LICENSE for license details. + +#include "syscall.h" +#include "htif.h" +#include "byteorder.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std::placeholders; + +#define RISCV_AT_FDCWD -100 + +struct riscv_stat +{ + uint64_t dev; + uint64_t ino; + uint32_t mode; + uint32_t nlink; + uint32_t uid; + uint32_t gid; + uint64_t rdev; + uint64_t __pad1; + uint64_t size; + uint32_t blksize; + uint32_t __pad2; + uint64_t blocks; + uint64_t atime; + uint64_t __pad3; + uint64_t mtime; + uint64_t __pad4; + uint64_t ctime; + uint64_t __pad5; + uint32_t __unused4; + uint32_t __unused5; + + riscv_stat(const struct stat& s) + : dev(s.st_dev), ino(s.st_ino), mode(s.st_mode), nlink(s.st_nlink), + uid(s.st_uid), gid(s.st_gid), rdev(s.st_rdev), __pad1(0), + size(s.st_size), blksize(s.st_blksize), __pad2(0), + blocks(s.st_blocks), atime(s.st_atime), __pad3(0), + mtime(s.st_mtime), __pad4(0), ctime(s.st_ctime), __pad5(0), + __unused4(0), __unused5(0) {} +}; + +syscall_t::syscall_t(htif_t* htif) + : htif(htif), memif(&htif->memif()), table(2048) +{ + table[17] = &syscall_t::sys_getcwd; + table[25] = &syscall_t::sys_fcntl; + table[34] = &syscall_t::sys_mkdirat; + table[35] = &syscall_t::sys_unlinkat; + table[37] = &syscall_t::sys_linkat; + table[38] = &syscall_t::sys_renameat; + table[46] = &syscall_t::sys_ftruncate; + table[48] = &syscall_t::sys_faccessat; + table[49] = &syscall_t::sys_chdir; + table[56] = &syscall_t::sys_openat; + table[57] = &syscall_t::sys_close; + table[62] = &syscall_t::sys_lseek; + table[63] = &syscall_t::sys_read; + table[64] = &syscall_t::sys_write; + table[67] = &syscall_t::sys_pread; + table[68] = &syscall_t::sys_pwrite; + table[79] = &syscall_t::sys_fstatat; + table[80] = &syscall_t::sys_fstat; + table[93] = &syscall_t::sys_exit; + table[1039] = &syscall_t::sys_lstat; + table[2011] = &syscall_t::sys_getmainvars; + + register_command(0, std::bind(&syscall_t::handle_syscall, this, _1), "syscall"); + + int stdin_fd = dup(0), stdout_fd0 = dup(1), stdout_fd1 = dup(1); + if (stdin_fd < 0 || stdout_fd0 < 0 || stdout_fd1 < 0) + throw std::runtime_error("could not dup stdin/stdout"); + + fds.alloc(stdin_fd); // stdin -> stdin + fds.alloc(stdout_fd0); // stdout -> stdout + fds.alloc(stdout_fd1); // stderr -> stdout +} + +std::string syscall_t::do_chroot(const char* fn) +{ + if (!chroot.empty() && *fn == '/') + return chroot + fn; + return fn; +} + +std::string syscall_t::undo_chroot(const char* fn) +{ + if (chroot.empty()) + return fn; + if (strncmp(fn, chroot.c_str(), chroot.size()) == 0 + && (chroot.back() == '/' || fn[chroot.size()] == '/')) + return fn + chroot.size() - (chroot.back() == '/'); + return "/"; +} + +void syscall_t::handle_syscall(command_t cmd) +{ + if (cmd.payload() & 1) // test pass/fail + { + htif->exitcode = cmd.payload(); + if (htif->exit_code()) + std::cerr << "*** FAILED *** (tohost = " << htif->exit_code() << ")" << std::endl; + return; + } + else // proxied system call + dispatch(cmd.payload()); + + cmd.respond(1); +} + +reg_t syscall_t::sys_exit(reg_t code, reg_t a1, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + htif->exitcode = code << 1 | 1; + return 0; +} + +static reg_t sysret_errno(sreg_t ret) +{ + return ret == -1 ? -errno : ret; +} + +reg_t syscall_t::sys_read(reg_t fd, reg_t pbuf, reg_t len, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector buf(len); + ssize_t ret = read(fds.lookup(fd), &buf[0], len); + reg_t ret_errno = sysret_errno(ret); + if (ret > 0) + memif->write(pbuf, ret, &buf[0]); + return ret_errno; +} + +reg_t syscall_t::sys_pread(reg_t fd, reg_t pbuf, reg_t len, reg_t off, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector buf(len); + ssize_t ret = pread(fds.lookup(fd), &buf[0], len, off); + reg_t ret_errno = sysret_errno(ret); + if (ret > 0) + memif->write(pbuf, ret, &buf[0]); + return ret_errno; +} + +reg_t syscall_t::sys_write(reg_t fd, reg_t pbuf, reg_t len, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector buf(len); + memif->read(pbuf, len, &buf[0]); + reg_t ret = sysret_errno(write(fds.lookup(fd), &buf[0], len)); + return ret; +} + +reg_t syscall_t::sys_pwrite(reg_t fd, reg_t pbuf, reg_t len, reg_t off, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector buf(len); + memif->read(pbuf, len, &buf[0]); + reg_t ret = sysret_errno(pwrite(fds.lookup(fd), &buf[0], len, off)); + return ret; +} + +reg_t syscall_t::sys_close(reg_t fd, reg_t a1, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + if (close(fds.lookup(fd)) < 0) + return sysret_errno(-1); + fds.dealloc(fd); + return 0; +} + +reg_t syscall_t::sys_lseek(reg_t fd, reg_t ptr, reg_t dir, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + return sysret_errno(lseek(fds.lookup(fd), ptr, dir)); +} + +reg_t syscall_t::sys_fstat(reg_t fd, reg_t pbuf, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + struct stat buf; + reg_t ret = sysret_errno(fstat(fds.lookup(fd), &buf)); + if (ret != (reg_t)-1) + { + riscv_stat rbuf(buf); + memif->write(pbuf, sizeof(rbuf), &rbuf); + } + return ret; +} + +reg_t syscall_t::sys_fcntl(reg_t fd, reg_t cmd, reg_t arg, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + return sysret_errno(fcntl(fds.lookup(fd), cmd, arg)); +} + +reg_t syscall_t::sys_ftruncate(reg_t fd, reg_t len, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + return sysret_errno(ftruncate(fds.lookup(fd), len)); +} + +reg_t syscall_t::sys_lstat(reg_t pname, reg_t len, reg_t pbuf, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector name(len); + memif->read(pname, len, &name[0]); + + struct stat buf; + reg_t ret = sysret_errno(lstat(do_chroot(&name[0]).c_str(), &buf)); + riscv_stat rbuf(buf); + if (ret != (reg_t)-1) + { + riscv_stat rbuf(buf); + memif->write(pbuf, sizeof(rbuf), &rbuf); + } + return ret; +} + +#define AT_SYSCALL(syscall, fd, name, ...) \ + (syscall(fds.lookup(fd), int(fd) == RISCV_AT_FDCWD ? do_chroot(name).c_str() : (name), __VA_ARGS__)) + +reg_t syscall_t::sys_openat(reg_t dirfd, reg_t pname, reg_t len, reg_t flags, reg_t mode, reg_t a5, reg_t a6) +{ + std::vector name(len); + memif->read(pname, len, &name[0]); + int fd = sysret_errno(AT_SYSCALL(openat, dirfd, &name[0], flags, mode)); + if (fd < 0) + return sysret_errno(-1); + return fds.alloc(fd); +} + +reg_t syscall_t::sys_fstatat(reg_t dirfd, reg_t pname, reg_t len, reg_t pbuf, reg_t flags, reg_t a5, reg_t a6) +{ + std::vector name(len); + memif->read(pname, len, &name[0]); + + struct stat buf; + reg_t ret = sysret_errno(AT_SYSCALL(fstatat, dirfd, &name[0], &buf, flags)); + if (ret != (reg_t)-1) + { + riscv_stat rbuf(buf); + memif->write(pbuf, sizeof(rbuf), &rbuf); + } + return ret; +} + +reg_t syscall_t::sys_faccessat(reg_t dirfd, reg_t pname, reg_t len, reg_t mode, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector name(len); + memif->read(pname, len, &name[0]); + return sysret_errno(AT_SYSCALL(faccessat, dirfd, &name[0], mode, 0)); +} + +reg_t syscall_t::sys_renameat(reg_t odirfd, reg_t popath, reg_t olen, reg_t ndirfd, reg_t pnpath, reg_t nlen, reg_t a6) +{ + std::vector opath(olen), npath(nlen); + memif->read(popath, olen, &opath[0]); + memif->read(pnpath, nlen, &npath[0]); + return sysret_errno(renameat(fds.lookup(odirfd), int(odirfd) == RISCV_AT_FDCWD ? do_chroot(&opath[0]).c_str() : &opath[0], + fds.lookup(ndirfd), int(ndirfd) == RISCV_AT_FDCWD ? do_chroot(&npath[0]).c_str() : &npath[0])); +} + +reg_t syscall_t::sys_linkat(reg_t odirfd, reg_t poname, reg_t olen, reg_t ndirfd, reg_t pnname, reg_t nlen, reg_t flags) +{ + std::vector oname(olen), nname(nlen); + memif->read(poname, olen, &oname[0]); + memif->read(pnname, nlen, &nname[0]); + return sysret_errno(linkat(fds.lookup(odirfd), int(odirfd) == RISCV_AT_FDCWD ? do_chroot(&oname[0]).c_str() : &oname[0], + fds.lookup(ndirfd), int(ndirfd) == RISCV_AT_FDCWD ? do_chroot(&nname[0]).c_str() : &nname[0], + flags)); +} + +reg_t syscall_t::sys_unlinkat(reg_t dirfd, reg_t pname, reg_t len, reg_t flags, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector name(len); + memif->read(pname, len, &name[0]); + return sysret_errno(AT_SYSCALL(unlinkat, dirfd, &name[0], flags)); +} + +reg_t syscall_t::sys_mkdirat(reg_t dirfd, reg_t pname, reg_t len, reg_t mode, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector name(len); + memif->read(pname, len, &name[0]); + return sysret_errno(AT_SYSCALL(mkdirat, dirfd, &name[0], mode)); +} + +reg_t syscall_t::sys_getcwd(reg_t pbuf, reg_t size, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector buf(size); + char* ret = getcwd(&buf[0], size); + if (ret == NULL) + return sysret_errno(-1); + std::string tmp = undo_chroot(&buf[0]); + if (size <= tmp.size()) + return -ENOMEM; + memif->write(pbuf, tmp.size() + 1, &tmp[0]); + return tmp.size() + 1; +} + +reg_t syscall_t::sys_getmainvars(reg_t pbuf, reg_t limit, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + std::vector args = htif->target_args(); + std::vector words(args.size() + 3); + words[0] = to_le(args.size()); + words[args.size()+1] = 0; // argv[argc] = NULL + words[args.size()+2] = 0; // envp[0] = NULL + + size_t sz = (args.size() + 3) * sizeof(words[0]); + for (size_t i = 0; i < args.size(); i++) + { + words[i+1] = to_le(sz + pbuf); + sz += args[i].length() + 1; + } + + std::vector bytes(sz); + memcpy(&bytes[0], &words[0], sizeof(words[0]) * words.size()); + for (size_t i = 0; i < args.size(); i++) + strcpy(&bytes[from_le(words[i+1]) - pbuf], args[i].c_str()); + + if (bytes.size() > limit) + return -ENOMEM; + + memif->write(pbuf, bytes.size(), &bytes[0]); + return 0; +} + +reg_t syscall_t::sys_chdir(reg_t path, reg_t a1, reg_t a2, reg_t a3, reg_t a4, reg_t a5, reg_t a6) +{ + size_t size = 0; + while (memif->read_uint8(path + size++)) + ; + std::vector buf(size); + for (size_t offset = 0;; offset++) + { + buf[offset] = memif->read_uint8(path + offset); + if (!buf[offset]) + break; + } + return sysret_errno(chdir(buf.data())); +} + +void syscall_t::dispatch(reg_t mm) +{ + reg_t magicmem[8]; + memif->read(mm, sizeof(magicmem), magicmem); + + reg_t n = from_le(magicmem[0]); + if (n >= table.size() || !table[n]) + throw std::runtime_error("bad syscall #" + std::to_string(n)); + + magicmem[0] = to_le((this->*table[n])(from_le(magicmem[1]), from_le(magicmem[2]), from_le(magicmem[3]), from_le(magicmem[4]), from_le(magicmem[5]), from_le(magicmem[6]), from_le(magicmem[7]))); + + memif->write(mm, sizeof(magicmem), magicmem); +} + +reg_t fds_t::alloc(int fd) +{ + reg_t i; + for (i = 0; i < fds.size(); i++) + if (fds[i] == -1) + break; + + if (i == fds.size()) + fds.resize(i+1); + + fds[i] = fd; + return i; +} + +void fds_t::dealloc(reg_t fd) +{ + fds[fd] = -1; +} + +int fds_t::lookup(reg_t fd) +{ + if (int(fd) == RISCV_AT_FDCWD) + return AT_FDCWD; + return fd >= fds.size() ? -1 : fds[fd]; +} + +void syscall_t::set_chroot(const char* where) +{ + char buf1[PATH_MAX], buf2[PATH_MAX]; + + if (getcwd(buf1, sizeof(buf1)) == NULL + || chdir(where) != 0 + || getcwd(buf2, sizeof(buf2)) == NULL + || chdir(buf1) != 0) + { + fprintf(stderr, "could not chroot to %s\n", where); + exit(-1); + } + + chroot = buf2; +} diff --git a/fesvr/syscall.h b/fesvr/syscall.h new file mode 100644 index 0000000000..82946969b3 --- /dev/null +++ b/fesvr/syscall.h @@ -0,0 +1,72 @@ +// See LICENSE for license details. + +#ifndef __SYSCALL_H +#define __SYSCALL_H + +#include "device.h" +#include "memif.h" +#include +#include + +class syscall_t; +typedef reg_t (syscall_t::*syscall_func_t)(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + +class htif_t; +class memif_t; + +class fds_t +{ + public: + reg_t alloc(int fd); + void dealloc(reg_t fd); + int lookup(reg_t fd); + private: + std::vector fds; +}; + +class syscall_t : public device_t +{ + public: + syscall_t(htif_t*); + + void set_chroot(const char* where); + + private: + const char* identity() { return "syscall_proxy"; } + + htif_t* htif; + memif_t* memif; + std::vector table; + fds_t fds; + + void handle_syscall(command_t cmd); + void dispatch(addr_t mm); + + std::string chroot; + std::string do_chroot(const char* fn); + std::string undo_chroot(const char* fn); + + reg_t sys_exit(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_openat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_read(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_pread(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_write(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_pwrite(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_close(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_lseek(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_fstat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_lstat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_fstatat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_faccessat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_fcntl(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_ftruncate(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_renameat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_linkat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_unlinkat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_mkdirat(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_getcwd(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_getmainvars(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); + reg_t sys_chdir(reg_t, reg_t, reg_t, reg_t, reg_t, reg_t, reg_t); +}; + +#endif diff --git a/fesvr/term.cc b/fesvr/term.cc new file mode 100644 index 0000000000..c4cba0c07c --- /dev/null +++ b/fesvr/term.cc @@ -0,0 +1,53 @@ +#include "term.h" +#include +#include +#include +#include +#include + +class canonical_termios_t +{ + public: + canonical_termios_t() + : restore_tios(false) + { + if (tcgetattr(0, &old_tios) == 0) + { + struct termios new_tios = old_tios; + new_tios.c_lflag &= ~(ICANON | ECHO); + if (tcsetattr(0, TCSANOW, &new_tios) == 0) + restore_tios = true; + } + } + + ~canonical_termios_t() + { + if (restore_tios) + tcsetattr(0, TCSANOW, &old_tios); + } + private: + struct termios old_tios; + bool restore_tios; +}; + +static canonical_termios_t tios; // exit() will clean up for us + +int canonical_terminal_t::read() +{ + struct pollfd pfd; + pfd.fd = 0; + pfd.events = POLLIN; + int ret = poll(&pfd, 1, 0); + if (ret <= 0 || !(pfd.revents & POLLIN)) + return -1; + + unsigned char ch; + ret = ::read(0, &ch, 1); + return ret <= 0 ? -1 : ch; +} + +void canonical_terminal_t::write(char ch) +{ + if (::write(1, &ch, 1) != 1) + abort(); +} diff --git a/fesvr/term.h b/fesvr/term.h new file mode 100644 index 0000000000..7a2c22fc28 --- /dev/null +++ b/fesvr/term.h @@ -0,0 +1,11 @@ +#ifndef _TERM_H +#define _TERM_H + +class canonical_terminal_t +{ + public: + static int read(); + static void write(char); +}; + +#endif diff --git a/fesvr/tsi.cc b/fesvr/tsi.cc new file mode 100644 index 0000000000..5ccafc4b77 --- /dev/null +++ b/fesvr/tsi.cc @@ -0,0 +1,115 @@ +#include "tsi.h" +#include +#include + +#define NHARTS_MAX 16 + +void tsi_t::host_thread(void *arg) +{ + tsi_t *tsi = static_cast(arg); + tsi->run(); + + while (true) + tsi->target->switch_to(); +} + +tsi_t::tsi_t(int argc, char** argv) : htif_t(argc, argv) +{ + target = context_t::current(); + host.init(host_thread, this); +} + +tsi_t::~tsi_t(void) +{ +} + +#define MSIP_BASE 0x2000000 + +// Interrupt core 0 to make it start executing the program in DRAM +void tsi_t::reset() +{ + uint32_t one = 1; + + write_chunk(MSIP_BASE, sizeof(uint32_t), &one); +} + +void tsi_t::push_addr(addr_t addr) +{ + for (int i = 0; i < SAI_ADDR_CHUNKS; i++) { + in_data.push_back(addr & 0xffffffff); + addr = addr >> 32; + } +} + +void tsi_t::push_len(addr_t len) +{ + for (int i = 0; i < SAI_LEN_CHUNKS; i++) { + in_data.push_back(len & 0xffffffff); + len = len >> 32; + } +} + +void tsi_t::read_chunk(addr_t taddr, size_t nbytes, void* dst) +{ + uint32_t *result = static_cast(dst); + size_t len = nbytes / sizeof(uint32_t); + + in_data.push_back(SAI_CMD_READ); + push_addr(taddr); + push_len(len - 1); + + for (size_t i = 0; i < len; i++) { + while (out_data.empty()) + switch_to_target(); + result[i] = out_data.front(); + out_data.pop_front(); + } +} + +void tsi_t::write_chunk(addr_t taddr, size_t nbytes, const void* src) +{ + const uint32_t *src_data = static_cast(src); + size_t len = nbytes / sizeof(uint32_t); + + in_data.push_back(SAI_CMD_WRITE); + push_addr(taddr); + push_len(len - 1); + + in_data.insert(in_data.end(), src_data, src_data + len); +} + +void tsi_t::send_word(uint32_t word) +{ + out_data.push_back(word); +} + +uint32_t tsi_t::recv_word(void) +{ + uint32_t word = in_data.front(); + in_data.pop_front(); + return word; +} + +bool tsi_t::data_available(void) +{ + return !in_data.empty(); +} + +void tsi_t::switch_to_host(void) +{ + host.switch_to(); +} + +void tsi_t::switch_to_target(void) +{ + target->switch_to(); +} + +void tsi_t::tick(bool out_valid, uint32_t out_bits, bool in_ready) +{ + if (out_valid && out_ready()) + out_data.push_back(out_bits); + + if (in_valid() && in_ready) + in_data.pop_front(); +} diff --git a/fesvr/tsi.h b/fesvr/tsi.h new file mode 100644 index 0000000000..825a3a0038 --- /dev/null +++ b/fesvr/tsi.h @@ -0,0 +1,57 @@ +#ifndef __SAI_H +#define __SAI_H + +#include "htif.h" +#include "context.h" + +#include +#include +#include +#include + +#define SAI_CMD_READ 0 +#define SAI_CMD_WRITE 1 + +#define SAI_ADDR_CHUNKS 2 +#define SAI_LEN_CHUNKS 2 + +class tsi_t : public htif_t +{ + public: + tsi_t(int argc, char** argv); + virtual ~tsi_t(); + + bool data_available(); + void send_word(uint32_t word); + uint32_t recv_word(); + void switch_to_host(); + + uint32_t in_bits() { return in_data.front(); } + bool in_valid() { return !in_data.empty(); } + bool out_ready() { return true; } + void tick(bool out_valid, uint32_t out_bits, bool in_ready); + + protected: + void reset() override; + void read_chunk(addr_t taddr, size_t nbytes, void* dst) override; + void write_chunk(addr_t taddr, size_t nbytes, const void* src) override; + void switch_to_target(); + + size_t chunk_align() override { return 4; } + size_t chunk_max_size() override { return 1024; } + + int get_ipi_addrs(addr_t *addrs); + + private: + context_t host; + context_t* target; + std::deque in_data; + std::deque out_data; + + void push_addr(addr_t addr); + void push_len(addr_t len); + + static void host_thread(void *tsi); +}; + +#endif diff --git a/riscv-disasm.pc.in b/riscv-disasm.pc.in new file mode 100644 index 0000000000..8e022e930f --- /dev/null +++ b/riscv-disasm.pc.in @@ -0,0 +1,11 @@ +prefix=@prefix@ +exec_prefix=@prefix@ +libdir=${prefix}/@libdir@ +includedir=${prefix}/@includedir@ + +Name: riscv-disasm +Description: RISC-V disassembler +Version: git +Libs: -Wl,-rpath,${libdir} -L${libdir} -ldisasm +Cflags: -I${includedir} +URL: http://riscv.org/download.html#tab_disasm diff --git a/riscv-dummy_rocc.pc.in b/riscv-dummy_rocc.pc.in deleted file mode 100644 index 31635f6dad..0000000000 --- a/riscv-dummy_rocc.pc.in +++ /dev/null @@ -1,11 +0,0 @@ -prefix=@prefix@ -exec_prefix=@prefix@ -libdir=${prefix}/@libdir@ -includedir=${prefix}/@includedir@ - -Name: riscv-dummy_rocc -Description: Example RISC-V ROCC accelerator -Version: git -Libs: -Wl,-rpath,${libdir} -L${libdir} -ldummy_rocc -Cflags: -I${includedir} -URL: http://riscv.org/download.html#tab_spike diff --git a/riscv-fesvr.pc.in b/riscv-fesvr.pc.in new file mode 100644 index 0000000000..efd7eed1e3 --- /dev/null +++ b/riscv-fesvr.pc.in @@ -0,0 +1,11 @@ +prefix=@prefix@ +exec_prefix=@prefix@ +libdir=${prefix}/@libdir@ +includedir=${prefix}/@includedir@ + +Name: riscv-fesvr +Description: RISC-V front-end server +Version: git +Libs: -Wl,-rpath,${libdir} -L${libdir} -lfesvr +Cflags: -I${includedir} +URL: http://riscv.org/download.html#tab_fesvr diff --git a/riscv-riscv.pc.in b/riscv-riscv.pc.in deleted file mode 100644 index 5e86b1c448..0000000000 --- a/riscv-riscv.pc.in +++ /dev/null @@ -1,11 +0,0 @@ -prefix=@prefix@ -exec_prefix=@prefix@ -libdir=${prefix}/@libdir@ -includedir=${prefix}/@includedir@ - -Name: riscv-riscv -Description: RISC-V -Version: git -Libs: -Wl,-rpath,${libdir} -L${libdir} -lriscv -Cflags: -I${includedir} -URL: http://riscv.org/download.html#tab_spike diff --git a/riscv-softfloat.pc.in b/riscv-softfloat.pc.in deleted file mode 100644 index 6b18e88441..0000000000 --- a/riscv-softfloat.pc.in +++ /dev/null @@ -1,11 +0,0 @@ -prefix=@prefix@ -exec_prefix=@prefix@ -libdir=${prefix}/@libdir@ -includedir=${prefix}/@includedir@ - -Name: riscv-softfloat -Description: RISC-V softfloat library -Version: git -Libs: -Wl,-rpath,${libdir} -L${libdir} -lsoftfloat -Cflags: -I${includedir} -URL: http://riscv.org/download.html#tab_spike diff --git a/riscv-spike.pc.in b/riscv-spike.pc.in deleted file mode 100644 index 007ad392bd..0000000000 --- a/riscv-spike.pc.in +++ /dev/null @@ -1,10 +0,0 @@ -prefix=@prefix@ -exec_prefix=@prefix@ -libdir=${prefix}/@libdir@ -includedir=${prefix}/@includedir@ - -Name: riscv-spike -Description: RISC-V spike meta library -Version: git -Depends: riscv-spike_main riscv-riscv riscv-softfloat -URL: http://riscv.org/download.html#tab_spike diff --git a/riscv-spike_main.pc.in b/riscv-spike_main.pc.in deleted file mode 100644 index c9b0eccca8..0000000000 --- a/riscv-spike_main.pc.in +++ /dev/null @@ -1,12 +0,0 @@ -prefix=@prefix@ -exec_prefix=@prefix@ -libdir=${prefix}/@libdir@ -includedir=${prefix}/@includedir@ - -Name: riscv-spike_main -Description: RISC-V ISA simulator library -Version: git -Depends: riscv-riscv riscv-softfloat -Libs: -Wl,-rpath,${libdir} -L${libdir} -lspike_main -Cflags: -I${includedir} -URL: http://riscv.org/download.html#tab_spike diff --git a/riscv/arith.h b/riscv/arith.h new file mode 100644 index 0000000000..35dd53051e --- /dev/null +++ b/riscv/arith.h @@ -0,0 +1,123 @@ +// See LICENSE for license details. + +#ifndef _RISCV_ARITH_H +#define _RISCV_ARITH_H + +#include +#include +#include + +inline uint64_t mulhu(uint64_t a, uint64_t b) +{ + uint64_t t; + uint32_t y1, y2, y3; + uint64_t a0 = (uint32_t)a, a1 = a >> 32; + uint64_t b0 = (uint32_t)b, b1 = b >> 32; + + t = a1*b0 + ((a0*b0) >> 32); + y1 = t; + y2 = t >> 32; + + t = a0*b1 + y1; + y1 = t; + + t = a1*b1 + y2 + (t >> 32); + y2 = t; + y3 = t >> 32; + + return ((uint64_t)y3 << 32) | y2; +} + +inline int64_t mulh(int64_t a, int64_t b) +{ + int negate = (a < 0) != (b < 0); + uint64_t res = mulhu(a < 0 ? -a : a, b < 0 ? -b : b); + return negate ? ~res + (a * b == 0) : res; +} + +inline int64_t mulhsu(int64_t a, uint64_t b) +{ + int negate = a < 0; + uint64_t res = mulhu(a < 0 ? -a : a, b); + return negate ? ~res + (a * b == 0) : res; +} + +//ref: https://locklessinc.com/articles/sat_arithmetic/ +template +static inline T sat_add(T x, T y, bool &sat) +{ + UT ux = x; + UT uy = y; + UT res = ux + uy; + sat = false; + int sh = sizeof(T) * 8 - 1; + + /* Calculate overflowed result. (Don't change the sign bit of ux) */ + ux = (ux >> sh) + (((UT)0x1 << sh) - 1); + + /* Force compiler to use cmovns instruction */ + if ((T) ((ux ^ uy) | ~(uy ^ res)) >= 0) { + res = ux; + sat = true; + } + + return res; +} + +template +static inline T sat_sub(T x, T y, bool &sat) +{ + UT ux = x; + UT uy = y; + UT res = ux - uy; + sat = false; + int sh = sizeof(T) * 8 - 1; + + /* Calculate overflowed result. (Don't change the sign bit of ux) */ + ux = (ux >> sh) + (((UT)0x1 << sh) - 1); + + /* Force compiler to use cmovns instruction */ + if ((T) ((ux ^ uy) & (ux ^ res)) < 0) { + res = ux; + sat = true; + } + + return res; +} + +template +T sat_addu(T x, T y, bool &sat) +{ + T res = x + y; + sat = false; + + sat = res < x; + res |= -(res < x); + + return res; +} + +template +T sat_subu(T x, T y, bool &sat) +{ + T res = x - y; + sat = false; + + sat = !(res <= x); + res &= -(res <= x); + + return res; +} + +static inline uint64_t extract64(uint64_t val, int pos, int len) +{ + assert(pos >= 0 && len > 0 && len <= 64 - pos); + return (val >> pos) & (~UINT64_C(0) >> (64 - len)); +} + +static inline uint64_t make_mask64(int pos, int len) +{ + assert(pos >= 0 && len > 0 && pos < 64 && len <= 64); + return (UINT64_MAX >> (64 - len)) << pos; +} +#endif diff --git a/riscv/byteorder.h b/riscv/byteorder.h new file mode 100644 index 0000000000..393a70bd7b --- /dev/null +++ b/riscv/byteorder.h @@ -0,0 +1,30 @@ +// See LICENSE for license details. + +#ifndef _RISCV_BYTEORDER_H +#define _RISCV_BYTEORDER_H + +#include "config.h" +#include + +static inline uint8_t swap(uint8_t n) { return n; } +static inline uint16_t swap(uint16_t n) { return __builtin_bswap16(n); } +static inline uint32_t swap(uint32_t n) { return __builtin_bswap32(n); } +static inline uint64_t swap(uint64_t n) { return __builtin_bswap64(n); } +static inline int8_t swap(int8_t n) { return n; } +static inline int16_t swap(int16_t n) { return __builtin_bswap16(n); } +static inline int32_t swap(int32_t n) { return __builtin_bswap32(n); } +static inline int64_t swap(int64_t n) { return __builtin_bswap64(n); } + +#ifdef WORDS_BIGENDIAN +template static inline T from_be(T n) { return n; } +template static inline T to_be(T n) { return n; } +template static inline T from_le(T n) { return swap(n); } +template static inline T to_le(T n) { return swap(n); } +#else +template static inline T from_le(T n) { return n; } +template static inline T to_le(T n) { return n; } +template static inline T from_be(T n) { return swap(n); } +template static inline T to_be(T n) { return swap(n); } +#endif + +#endif diff --git a/riscv/clint.cc b/riscv/clint.cc index 08508b43fa..aee995bfbb 100644 --- a/riscv/clint.cc +++ b/riscv/clint.cc @@ -1,9 +1,16 @@ +#include #include "devices.h" #include "processor.h" -clint_t::clint_t(std::vector& procs) - : procs(procs), mtimecmp(procs.size()) +clint_t::clint_t(std::vector& procs, uint64_t freq_hz, bool real_time) + : procs(procs), freq_hz(freq_hz), real_time(real_time), mtime(0), mtimecmp(procs.size()) { + struct timeval base; + + gettimeofday(&base, NULL); + + real_time_ref_secs = base.tv_sec; + real_time_ref_usecs = base.tv_usec; } /* 0000 msip hart 0 @@ -22,6 +29,7 @@ clint_t::clint_t(std::vector& procs) bool clint_t::load(reg_t addr, size_t len, uint8_t* bytes) { + increment(0); if (addr >= MSIP_BASE && addr + len <= MSIP_BASE + procs.size()*sizeof(msip_t)) { std::vector msip(procs.size()); for (size_t i = 0; i < procs.size(); ++i) @@ -63,7 +71,16 @@ bool clint_t::store(reg_t addr, size_t len, const uint8_t* bytes) void clint_t::increment(reg_t inc) { - mtime += inc; + if (real_time) { + struct timeval now; + uint64_t diff_usecs; + + gettimeofday(&now, NULL); + diff_usecs = ((now.tv_sec - real_time_ref_secs) * 1000000) + (now.tv_usec - real_time_ref_usecs); + mtime = diff_usecs * freq_hz / 1000000; + } else { + mtime += inc; + } for (size_t i = 0; i < procs.size(); i++) { procs[i]->state.mip &= ~MIP_MTIP; if (mtime >= mtimecmp[i]) diff --git a/riscv/common.h b/riscv/common.h index 8ddd9849d8..3c523d00d7 100644 --- a/riscv/common.h +++ b/riscv/common.h @@ -6,4 +6,6 @@ #define likely(x) __builtin_expect(x, 1) #define unlikely(x) __builtin_expect(x, 0) +#define NOINLINE __attribute__ ((noinline)) + #endif diff --git a/riscv/debug_defines.h b/riscv/debug_defines.h index d6ddd4ff1e..e6c2c5d3ea 100644 --- a/riscv/debug_defines.h +++ b/riscv/debug_defines.h @@ -84,8 +84,7 @@ /* * 0: Version described in spec version 0.11. * -* 1: Version described in spec version 0.13 (and later?), which -* reduces the DMI data width to 32 bits. +* 1: Version described in spec version 0.13. * * 15: Version not described in any available version of this spec. */ @@ -134,7 +133,7 @@ * cleared by writing \Fdmireset in \Rdtmcs. * * This indicates that the DM itself responded with an error. -* Note: there are no specified cases in which the DM would +* There are no specified cases in which the DM would * respond with an error, and DMI is not required to support * returning errors. * @@ -145,11 +144,6 @@ * needs to give the target more TCK edges between Update-DR and * Capture-DR. The simplest way to do that is to add extra transitions * in Run-Test/Idle. -* -* (The DTM, DM, and/or component may be in different clock domains, -* so synchronization may be required. Some relatively fixed number of -* TCK ticks may be needed for the request to reach the DM, complete, -* and for the response to be synchronized back into the TCK domain.) */ #define DTM_DMI_OP_OFFSET 0 #define DTM_DMI_OP_LENGTH 2 @@ -167,20 +161,28 @@ #define CSR_DCSR_XDEBUGVER_LENGTH 4 #define CSR_DCSR_XDEBUGVER (0xfU << CSR_DCSR_XDEBUGVER_OFFSET) /* -* When 1, {\tt ebreak} instructions in Machine Mode enter Debug Mode. +* 0: {\tt ebreak} instructions in M-mode behave as described in the +* Privileged Spec. +* +* 1: {\tt ebreak} instructions in M-mode enter Debug Mode. */ #define CSR_DCSR_EBREAKM_OFFSET 15 #define CSR_DCSR_EBREAKM_LENGTH 1 #define CSR_DCSR_EBREAKM (0x1U << CSR_DCSR_EBREAKM_OFFSET) /* -* When 1, {\tt ebreak} instructions in Supervisor Mode enter Debug Mode. +* 0: {\tt ebreak} instructions in S-mode behave as described in the +* Privileged Spec. +* +* 1: {\tt ebreak} instructions in S-mode enter Debug Mode. */ #define CSR_DCSR_EBREAKS_OFFSET 13 #define CSR_DCSR_EBREAKS_LENGTH 1 #define CSR_DCSR_EBREAKS (0x1U << CSR_DCSR_EBREAKS_OFFSET) /* -* When 1, {\tt ebreak} instructions in User/Application Mode enter -* Debug Mode. +* 0: {\tt ebreak} instructions in U-mode behave as described in the +* Privileged Spec. +* +* 1: {\tt ebreak} instructions in U-mode enter Debug Mode. */ #define CSR_DCSR_EBREAKU_OFFSET 12 #define CSR_DCSR_EBREAKU_LENGTH 1 @@ -191,9 +193,10 @@ * 1: Interrupts are enabled during single stepping. * * Implementations may hard wire this bit to 0. -* The debugger must read back the value it -* writes to check whether the feature is supported. If not -* supported, interrupt behavior can be emulated by the debugger. +* In that case interrupt behavior can be emulated by the debugger. +* +* The debugger must not change the value of this bit while the hart +* is running. */ #define CSR_DCSR_STEPIE_OFFSET 11 #define CSR_DCSR_STEPIE_LENGTH 1 @@ -201,14 +204,13 @@ /* * 0: Increment counters as usual. * -* 1: Don't increment any counters while in Debug Mode or on {\tt -* ebreak} instructions that cause entry into Debug Mode. These -* counters include the {\tt cycle} and {\tt instret} CSRs. This is -* preferred for most debugging scenarios. +* 1: Don't increment any hart-local counters while in Debug Mode or +* on {\tt ebreak} instructions that cause entry into Debug Mode. +* These counters include the {\tt instret} CSR. On single-hart cores +* {\tt cycle} should be stopped, but on multi-hart cores it must keep +* incrementing. * -* An implementation may choose not to support writing to this bit. -* The debugger must read back the value it writes to check whether -* the feature is supported. +* An implementation may hardwire this bit to 0 or 1. */ #define CSR_DCSR_STOPCOUNT_OFFSET 10 #define CSR_DCSR_STOPCOUNT_LENGTH 1 @@ -218,9 +220,7 @@ * * 1: Don't increment any hart-local timers while in Debug Mode. * -* An implementation may choose not to support writing to this bit. -* The debugger must read back the value it writes to check whether -* the feature is supported. +* An implementation may hardwire this bit to 0 or 1. */ #define CSR_DCSR_STOPTIME_OFFSET 9 #define CSR_DCSR_STOPTIME_LENGTH 1 @@ -236,9 +236,16 @@ * * 2: The Trigger Module caused a breakpoint exception. (priority 4) * -* 3: The debugger requested entry to Debug Mode. (priority 2) +* 3: The debugger requested entry to Debug Mode using \Fhaltreq. +* (priority 1) * -* 4: The hart single stepped because \Fstep was set. (priority 1) +* 4: The hart single stepped because \Fstep was set. (priority 0, lowest) +* +* 5: The hart halted directly out of reset due to \Fresethaltreq. It +* is also acceptable to report 3 when this happens. (priority 2) +* +* 6: The hart halted because it's part of a halt group. (priority 5, +* highest) Harts may report 3 for this cause instead. * * Other values are reserved for future use. */ @@ -246,10 +253,11 @@ #define CSR_DCSR_CAUSE_LENGTH 3 #define CSR_DCSR_CAUSE (0x7U << CSR_DCSR_CAUSE_OFFSET) /* -* When 1, \Fmprv in \Rmstatus takes effect during debug mode. -* When 0, it is ignored during debug mode. -* Implementing this bit is optional. -* If not implemented it should be tied to 0. +* 0: \Fmprv in \Rmstatus is ignored in Debug Mode. +* +* 1: \Fmprv in \Rmstatus takes effect in Debug Mode. +* +* Implementing this bit is optional. It may be tied to either 0 or 1. */ #define CSR_DCSR_MPRVEN_OFFSET 4 #define CSR_DCSR_MPRVEN_LENGTH 1 @@ -270,6 +278,9 @@ * If the instruction does not complete due to an exception, * the hart will immediately enter Debug Mode before executing * the trap handler, with appropriate exception registers set. +* +* The debugger must not change the value of this bit while the hart +* is running. */ #define CSR_DCSR_STEP_OFFSET 2 #define CSR_DCSR_STEP_LENGTH 1 @@ -289,14 +300,14 @@ #define CSR_DCSR_PRV (0x3U << CSR_DCSR_PRV_OFFSET) #define CSR_DPC 0x7b1 #define CSR_DPC_DPC_OFFSET 0 -#define CSR_DPC_DPC_LENGTH MXLEN -#define CSR_DPC_DPC (((1L<0, 2->1, 3->2, 4->2 +static unsigned field_width(unsigned n) +{ + unsigned i = 0; + n -= 1; + while (n) { + i++; + n >>= 1; + } + return i; +} + ///////////////////////// debug_module_t -debug_module_t::debug_module_t(sim_t *sim, unsigned progbufsize, unsigned max_bus_master_bits, - bool require_authentication) : - progbufsize(progbufsize), - program_buffer_bytes(4 + 4*progbufsize), - max_bus_master_bits(max_bus_master_bits), - require_authentication(require_authentication), +debug_module_t::debug_module_t(sim_t *sim, const debug_module_config_t &config) : + nprocs(sim->nprocs()), + config(config), + program_buffer_bytes((config.support_impebreak ? 4 : 0) + 4*config.progbufsize), debug_progbuf_start(debug_data_start - program_buffer_bytes), debug_abstract_start(debug_progbuf_start - debug_abstract_size*4), custom_base(0), - sim(sim) + hartsellen(field_width(sim->nprocs())), + sim(sim), + // The spec lets a debugger select nonexistent harts. Create hart_state for + // them because I'm too lazy to add the code to just ignore accesses. + hart_state(1 << field_width(sim->nprocs())), + hart_array_mask(sim->nprocs()), + rti_remaining(0) { D(fprintf(stderr, "debug_data_start=0x%x\n", debug_data_start)); D(fprintf(stderr, "debug_progbuf_start=0x%x\n", debug_progbuf_start)); D(fprintf(stderr, "debug_abstract_start=0x%x\n", debug_abstract_start)); + assert(nprocs <= 1024); + program_buffer = new uint8_t[program_buffer_bytes]; - memset(halted, 0, sizeof(halted)); memset(debug_rom_flags, 0, sizeof(debug_rom_flags)); - memset(resumeack, 0, sizeof(resumeack)); - memset(havereset, 0, sizeof(havereset)); memset(program_buffer, 0, program_buffer_bytes); - program_buffer[4*progbufsize] = ebreak(); - program_buffer[4*progbufsize+1] = ebreak() >> 8; - program_buffer[4*progbufsize+2] = ebreak() >> 16; - program_buffer[4*progbufsize+3] = ebreak() >> 24; memset(dmdata, 0, sizeof(dmdata)); + if (config.support_impebreak) { + program_buffer[4*config.progbufsize] = ebreak(); + program_buffer[4*config.progbufsize+1] = ebreak() >> 8; + program_buffer[4*config.progbufsize+2] = ebreak() >> 16; + program_buffer[4*config.progbufsize+3] = ebreak() >> 24; + } + write32(debug_rom_whereto, 0, jal(ZERO, debug_abstract_start - DEBUG_ROM_WHERETO)); @@ -60,37 +80,38 @@ debug_module_t::~debug_module_t() void debug_module_t::reset() { + assert(sim->nprocs() > 0); for (unsigned i = 0; i < sim->nprocs(); i++) { processor_t *proc = sim->get_core(i); if (proc) - proc->halt_request = false; + proc->halt_request = proc->HR_NONE; } dmcontrol = {0}; dmstatus = {0}; - dmstatus.impebreak = true; - dmstatus.authenticated = !require_authentication; + dmstatus.impebreak = config.support_impebreak; + dmstatus.authenticated = !config.require_authentication; dmstatus.version = 2; abstractcs = {0}; abstractcs.datacount = sizeof(dmdata) / 4; - abstractcs.progbufsize = progbufsize; + abstractcs.progbufsize = config.progbufsize; abstractauto = {0}; sbcs = {0}; - if (max_bus_master_bits > 0) { + if (config.max_bus_master_bits > 0) { sbcs.version = 1; sbcs.asize = sizeof(reg_t) * 8; } - if (max_bus_master_bits >= 64) + if (config.max_bus_master_bits >= 64) sbcs.access64 = true; - if (max_bus_master_bits >= 32) + if (config.max_bus_master_bits >= 32) sbcs.access32 = true; - if (max_bus_master_bits >= 16) + if (config.max_bus_master_bits >= 16) sbcs.access16 = true; - if (max_bus_master_bits >= 8) + if (config.max_bus_master_bits >= 8) sbcs.access8 = true; challenge = random(); @@ -135,8 +156,8 @@ bool debug_module_t::load(reg_t addr, size_t len, uint8_t* bytes) return true; } - fprintf(stderr, "ERROR: invalid load from debug module: %zd bytes at 0x%016" - PRIx64 "\n", len, addr); + D(fprintf(stderr, "ERROR: invalid load from debug module: %zd bytes at 0x%016" + PRIx64 "\n", len, addr)); return false; } @@ -179,11 +200,24 @@ bool debug_module_t::store(reg_t addr, size_t len, const uint8_t* bytes) if (addr == DEBUG_ROM_HALTED) { assert (len == 4); - halted[id] = true; + if (!hart_state[id].halted) { + hart_state[id].halted = true; + if (hart_state[id].haltgroup) { + for (unsigned i = 0; i < nprocs; i++) { + if (!hart_state[i].halted && + hart_state[i].haltgroup == hart_state[id].haltgroup) { + processor_t *proc = sim->get_core(i); + proc->halt_request = proc->HR_GROUP; + // TODO: What if the debugger comes and writes dmcontrol before the + // halt occurs? + } + } + } + } if (dmcontrol.hartsel == id) { if (0 == (debug_rom_flags[id] & (1 << DEBUG_ROM_FLAG_GO))){ if (dmcontrol.hartsel == id) { - abstractcs.busy = false; + abstract_command_completed = true; } } } @@ -191,14 +225,15 @@ bool debug_module_t::store(reg_t addr, size_t len, const uint8_t* bytes) } if (addr == DEBUG_ROM_GOING) { - debug_rom_flags[dmcontrol.hartsel] &= ~(1 << DEBUG_ROM_FLAG_GO); + assert(len == 4); + debug_rom_flags[id] &= ~(1 << DEBUG_ROM_FLAG_GO); return true; } if (addr == DEBUG_ROM_RESUMING) { assert (len == 4); - halted[id] = false; - resumeack[id] = true; + hart_state[id].halted = false; + hart_state[id].resumeack = true; debug_rom_flags[id] &= ~(1 << DEBUG_ROM_FLAG_RESUME); return true; } @@ -210,8 +245,8 @@ bool debug_module_t::store(reg_t addr, size_t len, const uint8_t* bytes) return true; } - fprintf(stderr, "ERROR: invalid store to debug module: %zd bytes at 0x%016" - PRIx64 "\n", len, addr); + D(fprintf(stderr, "ERROR: invalid store to debug module: %zd bytes at 0x%016" + PRIx64 "\n", len, addr)); return false; } @@ -234,16 +269,25 @@ uint32_t debug_module_t::read32(uint8_t *memory, unsigned int index) return value; } -processor_t *debug_module_t::current_proc() const +processor_t *debug_module_t::processor(unsigned hartid) const { processor_t *proc = NULL; try { - proc = sim->get_core(dmcontrol.hartsel); + proc = sim->get_core(hartid); } catch (const std::out_of_range&) { } return proc; } +bool debug_module_t::hart_selected(unsigned hartid) const +{ + if (dmcontrol.hasel) { + return hartid == dmcontrol.hartsel || hart_array_mask[hartid]; + } else { + return hartid == dmcontrol.hartsel; + } +} + unsigned debug_module_t::sb_access_bits() { return 8 << sbcs.sbaccess; @@ -251,7 +295,7 @@ unsigned debug_module_t::sb_access_bits() void debug_module_t::sb_autoincrement() { - if (!sbcs.autoincrement || !max_bus_master_bits) + if (!sbcs.autoincrement || !config.max_bus_master_bits) return; uint64_t value = sbaddress[0] + sb_access_bits() / 8; @@ -273,13 +317,13 @@ void debug_module_t::sb_read() { reg_t address = ((uint64_t) sbaddress[1] << 32) | sbaddress[0]; try { - if (sbcs.sbaccess == 0 && max_bus_master_bits >= 8) { + if (sbcs.sbaccess == 0 && config.max_bus_master_bits >= 8) { sbdata[0] = sim->debug_mmu->load_uint8(address); - } else if (sbcs.sbaccess == 1 && max_bus_master_bits >= 16) { + } else if (sbcs.sbaccess == 1 && config.max_bus_master_bits >= 16) { sbdata[0] = sim->debug_mmu->load_uint16(address); - } else if (sbcs.sbaccess == 2 && max_bus_master_bits >= 32) { + } else if (sbcs.sbaccess == 2 && config.max_bus_master_bits >= 32) { sbdata[0] = sim->debug_mmu->load_uint32(address); - } else if (sbcs.sbaccess == 3 && max_bus_master_bits >= 64) { + } else if (sbcs.sbaccess == 3 && config.max_bus_master_bits >= 64) { uint64_t value = sim->debug_mmu->load_uint64(address); sbdata[0] = value; sbdata[1] = value >> 32; @@ -295,13 +339,13 @@ void debug_module_t::sb_write() { reg_t address = ((uint64_t) sbaddress[1] << 32) | sbaddress[0]; D(fprintf(stderr, "sb_write() 0x%x @ 0x%lx\n", sbdata[0], address)); - if (sbcs.sbaccess == 0 && max_bus_master_bits >= 8) { + if (sbcs.sbaccess == 0 && config.max_bus_master_bits >= 8) { sim->debug_mmu->store_uint8(address, sbdata[0]); - } else if (sbcs.sbaccess == 1 && max_bus_master_bits >= 16) { + } else if (sbcs.sbaccess == 1 && config.max_bus_master_bits >= 16) { sim->debug_mmu->store_uint16(address, sbdata[0]); - } else if (sbcs.sbaccess == 2 && max_bus_master_bits >= 32) { + } else if (sbcs.sbaccess == 2 && config.max_bus_master_bits >= 32) { sim->debug_mmu->store_uint32(address, sbdata[0]); - } else if (sbcs.sbaccess == 3 && max_bus_master_bits >= 64) { + } else if (sbcs.sbaccess == 3 && config.max_bus_master_bits >= 64) { sim->debug_mmu->store_uint64(address, (((uint64_t) sbdata[1]) << 32) | sbdata[0]); } else { @@ -318,7 +362,7 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) result = read32(dmdata, i); if (abstractcs.busy) { result = -1; - fprintf(stderr, "\ndmi_read(0x%02x (data[%d]) -> -1 because abstractcs.busy==true\n", address, i); + D(fprintf(stderr, "\ndmi_read(0x%02x (data[%d]) -> -1 because abstractcs.busy==true\n", address, i)); } if (abstractcs.busy && abstractcs.cmderr == CMDERR_NONE) { @@ -328,12 +372,12 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) if (!abstractcs.busy && ((abstractauto.autoexecdata >> i) & 1)) { perform_abstract_command(); } - } else if (address >= DMI_PROGBUF0 && address < DMI_PROGBUF0 + progbufsize) { + } else if (address >= DMI_PROGBUF0 && address < DMI_PROGBUF0 + config.progbufsize) { unsigned i = address - DMI_PROGBUF0; result = read32(program_buffer, i); if (abstractcs.busy) { result = -1; - fprintf(stderr, "\ndmi_read(0x%02x (progbuf[%d]) -> -1 because abstractcs.busy==true\n", address, i); + D(fprintf(stderr, "\ndmi_read(0x%02x (progbuf[%d]) -> -1 because abstractcs.busy==true\n", address, i)); } if (!abstractcs.busy && ((abstractauto.autoexecprogbuf >> i) & 1)) { perform_abstract_command(); @@ -343,14 +387,11 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) switch (address) { case DMI_DMCONTROL: { - processor_t *proc = current_proc(); - if (proc) - dmcontrol.haltreq = proc->halt_request; - result = set_field(result, DMI_DMCONTROL_HALTREQ, dmcontrol.haltreq); result = set_field(result, DMI_DMCONTROL_RESUMEREQ, dmcontrol.resumereq); result = set_field(result, DMI_DMCONTROL_HARTSELHI, dmcontrol.hartsel >> DMI_DMCONTROL_HARTSELLO_LENGTH); + result = set_field(result, DMI_DMCONTROL_HASEL, dmcontrol.hasel); result = set_field(result, DMI_DMCONTROL_HARTSELLO, dmcontrol.hartsel); result = set_field(result, DMI_DMCONTROL_HARTRESET, dmcontrol.hartreset); result = set_field(result, DMI_DMCONTROL_NDMRESET, dmcontrol.ndmreset); @@ -359,42 +400,45 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) break; case DMI_DMSTATUS: { - processor_t *proc = current_proc(); - - dmstatus.allnonexistant = false; - dmstatus.allunavail = false; - dmstatus.allrunning = false; - dmstatus.allhalted = false; - dmstatus.allresumeack = false; - if (proc) { - if (halted[dmcontrol.hartsel]) { - dmstatus.allhalted = true; - } else { - dmstatus.allrunning = true; - } - } else { - dmstatus.allnonexistant = true; - } - dmstatus.anynonexistant = dmstatus.allnonexistant; - dmstatus.anyunavail = dmstatus.allunavail; - dmstatus.anyrunning = dmstatus.allrunning; - dmstatus.anyhalted = dmstatus.allhalted; - if (proc) { - if (resumeack[dmcontrol.hartsel]) { - dmstatus.allresumeack = true; - } else { - dmstatus.allresumeack = false; + dmstatus.allhalted = true; + dmstatus.anyhalted = false; + dmstatus.allrunning = true; + dmstatus.anyrunning = false; + dmstatus.allnonexistant = true; + dmstatus.allresumeack = true; + dmstatus.anyresumeack = false; + for (unsigned i = 0; i < nprocs; i++) { + if (hart_selected(i)) { + dmstatus.allnonexistant = false; + if (hart_state[i].resumeack) { + dmstatus.anyresumeack = true; + } else { + dmstatus.allresumeack = false; + } + if (hart_state[i].halted) { + dmstatus.allrunning = false; + dmstatus.anyhalted = true; + } else { + dmstatus.allhalted = false; + dmstatus.anyrunning = true; + } } - } else { - dmstatus.allresumeack = false; } + // We don't allow selecting non-existant harts through + // hart_array_mask, so the only way it's possible is by writing a + // non-existant hartsel. + dmstatus.anynonexistant = (dmcontrol.hartsel >= nprocs); + + dmstatus.allunavail = false; + dmstatus.anyunavail = false; + result = set_field(result, DMI_DMSTATUS_IMPEBREAK, dmstatus.impebreak); result = set_field(result, DMI_DMSTATUS_ALLHAVERESET, - havereset[dmcontrol.hartsel]); + hart_state[dmcontrol.hartsel].havereset); result = set_field(result, DMI_DMSTATUS_ANYHAVERESET, - havereset[dmcontrol.hartsel]); + hart_state[dmcontrol.hartsel].havereset); result = set_field(result, DMI_DMSTATUS_ALLNONEXISTENT, dmstatus.allnonexistant); result = set_field(result, DMI_DMSTATUS_ALLUNAVAIL, dmstatus.allunavail); result = set_field(result, DMI_DMSTATUS_ALLRUNNING, dmstatus.allrunning); @@ -430,6 +474,20 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) result = set_field(result, DMI_HARTINFO_DATASIZE, abstractcs.datacount); result = set_field(result, DMI_HARTINFO_DATAADDR, debug_data_start); break; + case DMI_HAWINDOWSEL: + result = hawindowsel; + break; + case DMI_HAWINDOW: + { + unsigned base = hawindowsel * 32; + for (unsigned i = 0; i < 32; i++) { + unsigned n = base + i; + if (n < nprocs && hart_array_mask[n]) { + result |= 1 << i; + } + } + } + break; case DMI_SBCS: result = set_field(result, DMI_SBCS_SBVERSION, sbcs.version); result = set_field(result, DMI_SBCS_SBREADONADDR, sbcs.readonaddr); @@ -459,10 +517,12 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) case DMI_SBDATA0: result = sbdata[0]; if (sbcs.error == 0) { - sb_autoincrement(); if (sbcs.readondata) { sb_read(); } + if (sbcs.error == 0) { + sb_autoincrement(); + } } break; case DMI_SBDATA1: @@ -477,6 +537,10 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) case DMI_AUTHDATA: result = challenge; break; + case DMI_DMCS2: + result = set_field(result, DMI_DMCS2_HALTGROUP, + hart_state[dmcontrol.hartsel].haltgroup); + break; default: result = 0; D(fprintf(stderr, "Unexpected. Returning Error.")); @@ -488,6 +552,22 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) return true; } +void debug_module_t::run_test_idle() +{ + if (rti_remaining > 0) { + rti_remaining--; + } + if (rti_remaining == 0 && abstractcs.busy && abstract_command_completed) { + abstractcs.busy = false; + } +} + +static bool is_fpu_reg(unsigned regno) +{ + return (regno >= 0x1020 && regno <= 0x103f) || regno == CSR_FFLAGS || + regno == CSR_FRM || regno == CSR_FCSR; +} + bool debug_module_t::perform_abstract_command() { if (abstractcs.cmderr != CMDERR_NONE) @@ -499,11 +579,11 @@ bool debug_module_t::perform_abstract_command() if ((command >> 24) == 0) { // register access - unsigned size = get_field(command, AC_ACCESS_REGISTER_SIZE); + unsigned size = get_field(command, AC_ACCESS_REGISTER_AARSIZE); bool write = get_field(command, AC_ACCESS_REGISTER_WRITE); unsigned regno = get_field(command, AC_ACCESS_REGISTER_REGNO); - if (!halted[dmcontrol.hartsel]) { + if (!hart_state[dmcontrol.hartsel].halted) { abstractcs.cmderr = CMDERR_HALTRESUME; return true; } @@ -511,10 +591,22 @@ bool debug_module_t::perform_abstract_command() unsigned i = 0; if (get_field(command, AC_ACCESS_REGISTER_TRANSFER)) { - if (regno < 0x1000 && progbufsize < 2) { - // Make the debugger use the program buffer if it's available, so it - // can test both use cases. - write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH)); + if (is_fpu_reg(regno)) { + // Save S0 + write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0)); + // Save mstatus + write32(debug_abstract, i++, csrr(S0, CSR_MSTATUS)); + write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH1)); + // Set mstatus.fs + assert((MSTATUS_FS & 0xfff) == 0); + write32(debug_abstract, i++, lui(S0, MSTATUS_FS >> 12)); + write32(debug_abstract, i++, csrrs(ZERO, S0, CSR_MSTATUS)); + } + + if (regno < 0x1000 && config.support_abstract_csr_access) { + if (!is_fpu_reg(regno)) { + write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0)); + } if (write) { switch (size) { @@ -544,7 +636,9 @@ bool debug_module_t::perform_abstract_command() return true; } } - write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH)); + if (!is_fpu_reg(regno)) { + write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH0)); + } } else if (regno >= 0x1000 && regno < 0x1020) { unsigned regnum = regno - 0x1000; @@ -568,9 +662,6 @@ bool debug_module_t::perform_abstract_command() } } else if (regno >= 0x1020 && regno < 0x1040) { - // Don't force the debugger to use progbuf if it exists, so the - // debugger has to make the decision not to use abstract commands to - // access 64-bit FPRs on 32-bit targets. unsigned fprnum = regno - 0x1020; if (write) { @@ -618,6 +709,14 @@ bool debug_module_t::perform_abstract_command() abstractcs.cmderr = CMDERR_NOTSUP; return true; } + + if (is_fpu_reg(regno)) { + // restore mstatus + write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH1)); + write32(debug_abstract, i++, csrw(S0, CSR_MSTATUS)); + // restore s0 + write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH0)); + } } if (get_field(command, AC_ACCESS_REGISTER_POSTEXEC)) { @@ -629,6 +728,8 @@ bool debug_module_t::perform_abstract_command() } debug_rom_flags[dmcontrol.hartsel] |= 1 << DEBUG_ROM_FLAG_GO; + rti_remaining = config.abstract_rti; + abstract_command_completed = false; abstractcs.busy = true; } else { @@ -659,7 +760,7 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) } return true; - } else if (address >= DMI_PROGBUF0 && address < DMI_PROGBUF0 + progbufsize) { + } else if (address >= DMI_PROGBUF0 && address < DMI_PROGBUF0 + config.progbufsize) { unsigned i = address - DMI_PROGBUF0; if (!abstractcs.busy) @@ -677,35 +778,47 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) if (!dmcontrol.dmactive && get_field(value, DMI_DMCONTROL_DMACTIVE)) reset(); dmcontrol.dmactive = get_field(value, DMI_DMCONTROL_DMACTIVE); - if (!dmstatus.authenticated) + if (!dmstatus.authenticated || !dmcontrol.dmactive) return true; - if (dmcontrol.dmactive) { - dmcontrol.haltreq = get_field(value, DMI_DMCONTROL_HALTREQ); - dmcontrol.resumereq = get_field(value, DMI_DMCONTROL_RESUMEREQ); - dmcontrol.hartreset = get_field(value, DMI_DMCONTROL_HARTRESET); - dmcontrol.ndmreset = get_field(value, DMI_DMCONTROL_NDMRESET); - dmcontrol.hartsel = get_field(value, DMI_DMCONTROL_HARTSELHI) << - DMI_DMCONTROL_HARTSELLO_LENGTH; - dmcontrol.hartsel |= get_field(value, DMI_DMCONTROL_HARTSELLO); - dmcontrol.hartsel &= (1L<halt_request = dmcontrol.haltreq; - if (dmcontrol.resumereq) { - debug_rom_flags[dmcontrol.hartsel] |= (1 << DEBUG_ROM_FLAG_RESUME); - resumeack[dmcontrol.hartsel] = false; + + dmcontrol.haltreq = get_field(value, DMI_DMCONTROL_HALTREQ); + dmcontrol.resumereq = get_field(value, DMI_DMCONTROL_RESUMEREQ); + dmcontrol.hartreset = get_field(value, DMI_DMCONTROL_HARTRESET); + dmcontrol.ndmreset = get_field(value, DMI_DMCONTROL_NDMRESET); + if (config.support_hasel) + dmcontrol.hasel = get_field(value, DMI_DMCONTROL_HASEL); + else + dmcontrol.hasel = 0; + dmcontrol.hartsel = get_field(value, DMI_DMCONTROL_HARTSELHI) << + DMI_DMCONTROL_HARTSELLO_LENGTH; + dmcontrol.hartsel |= get_field(value, DMI_DMCONTROL_HARTSELLO); + dmcontrol.hartsel &= (1L<halt_request = dmcontrol.haltreq ? proc->HR_REGULAR : proc->HR_NONE; + if (dmcontrol.haltreq) { + D(fprintf(stderr, "halt hart %d\n", i)); + } + if (dmcontrol.resumereq) { + D(fprintf(stderr, "resume hart %d\n", i)); + debug_rom_flags[i] |= (1 << DEBUG_ROM_FLAG_RESUME); + hart_state[i].resumeack = false; + } + if (dmcontrol.hartreset) { + proc->reset(); + } + } } - if (dmcontrol.hartreset) { - proc->reset(); - } } + if (dmcontrol.ndmreset) { for (size_t i = 0; i < sim->nprocs(); i++) { - proc = sim->get_core(i); + processor_t *proc = sim->get_core(i); proc->reset(); } } @@ -716,6 +829,22 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) command = value; return perform_abstract_command(); + case DMI_HAWINDOWSEL: + hawindowsel = value & ((1U<> i) & 1; + } + } + } + return true; + case DMI_ABSTRACTCS: abstractcs.cmderr = (cmderr_t) (((uint32_t) (abstractcs.cmderr)) & (~(uint32_t)(get_field(value, DMI_ABSTRACTCS_CMDERR)))); return true; @@ -737,6 +866,7 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) sbaddress[0] = value; if (sbcs.error == 0 && sbcs.readonaddr) { sb_read(); + sb_autoincrement(); } return true; case DMI_SBADDRESS1: @@ -752,7 +882,7 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) sbdata[0] = value; if (sbcs.error == 0) { sb_write(); - if (sbcs.autoincrement && sbcs.error == 0) { + if (sbcs.error == 0) { sb_autoincrement(); } } @@ -769,7 +899,7 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) case DMI_AUTHDATA: D(fprintf(stderr, "debug authentication: got 0x%x; 0x%x unlocks\n", value, challenge + secret)); - if (require_authentication) { + if (config.require_authentication) { if (value == challenge + secret) { dmstatus.authenticated = true; } else { @@ -778,6 +908,12 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) } } return true; + case DMI_DMCS2: + if (config.support_haltgroups && get_field(value, DMI_DMCS2_HGWRITE)) { + hart_state[dmcontrol.hartsel].haltgroup = get_field(value, + DMI_DMCS2_HALTGROUP); + } + return true; } } return false; @@ -785,6 +921,7 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) void debug_module_t::proc_reset(unsigned id) { - havereset[id] = true; - halted[id] = false; + hart_state[id].havereset = true; + hart_state[id].halted = false; + hart_state[id].haltgroup = 0; } diff --git a/riscv/debug_module.h b/riscv/debug_module.h index 5b43ed628c..2bcdba4228 100644 --- a/riscv/debug_module.h +++ b/riscv/debug_module.h @@ -8,9 +8,23 @@ class sim_t; +typedef struct { + // Size of program_buffer in 32-bit words, as exposed to the rest of the + // world. + unsigned progbufsize; + unsigned max_bus_master_bits; + bool require_authentication; + unsigned abstract_rti; + bool support_hasel; + bool support_abstract_csr_access; + bool support_haltgroups; + bool support_impebreak; +} debug_module_config_t; + typedef struct { bool haltreq; bool resumereq; + bool hasel; unsigned hartsel; bool hartreset; bool dmactive; @@ -73,6 +87,13 @@ typedef struct { bool access8; } sbcs_t; +typedef struct { + bool halted; + bool resumeack; + bool havereset; + uint8_t haltgroup; +} hart_debug_state_t; + class debug_module_t : public abstract_device_t { public: @@ -81,9 +102,11 @@ class debug_module_t : public abstract_device_t * follows: * 1. Read a 32-bit value from authdata: * 2. Write the value that was read back, plus one, to authdata. + * + * abstract_rti is extra run-test/idle cycles that each abstract command + * takes to execute. Useful for testing OpenOCD. */ - debug_module_t(sim_t *sim, unsigned progbufsize, unsigned max_bus_master_bits, - bool require_authentication); + debug_module_t(sim_t *sim, const debug_module_config_t &config); ~debug_module_t(); void add_device(bus_t *bus); @@ -97,23 +120,23 @@ class debug_module_t : public abstract_device_t bool dmi_read(unsigned address, uint32_t *value); bool dmi_write(unsigned address, uint32_t value); + // Called for every cycle the JTAG TAP spends in Run-Test/Idle. + void run_test_idle(); + // Called when one of the attached harts was reset. void proc_reset(unsigned id); private: static const unsigned datasize = 2; - // Size of program_buffer in 32-bit words, as exposed to the rest of the - // world. - unsigned progbufsize; + unsigned nprocs; + debug_module_config_t config; // Actual size of the program buffer, which is 1 word bigger than we let on // to implement the implicit ebreak at the end. unsigned program_buffer_bytes; - unsigned max_bus_master_bits; - bool require_authentication; static const unsigned debug_data_start = 0x380; unsigned debug_progbuf_start; - static const unsigned debug_abstract_size = 5; + static const unsigned debug_abstract_size = 12; unsigned debug_abstract_start; // R/W this through custom registers, to allow debuggers to test that // functionality. @@ -121,7 +144,7 @@ class debug_module_t : public abstract_device_t // We only support 1024 harts currently. More requires at least resizing // the arrays below, and their corresponding special memory regions. - static const unsigned hartsellen = 10; + unsigned hartsellen = 10; sim_t *sim; @@ -130,9 +153,7 @@ class debug_module_t : public abstract_device_t uint8_t *program_buffer; uint8_t dmdata[datasize * 4]; - bool halted[1024]; - bool resumeack[1024]; - bool havereset[1024]; + std::vector hart_state; uint8_t debug_rom_flags[1024]; void write32(uint8_t *rom, unsigned int index, uint32_t value); @@ -148,6 +169,8 @@ class debug_module_t : public abstract_device_t abstractcs_t abstractcs; abstractauto_t abstractauto; uint32_t command; + uint16_t hawindowsel; + std::vector hart_array_mask; sbcs_t sbcs; uint32_t sbaddress[4]; @@ -156,9 +179,13 @@ class debug_module_t : public abstract_device_t uint32_t challenge; const uint32_t secret = 1; - processor_t *current_proc() const; + processor_t *processor(unsigned hartid) const; + bool hart_selected(unsigned hartid) const; void reset(); bool perform_abstract_command(); + + bool abstract_command_completed; + unsigned rti_remaining; }; #endif diff --git a/riscv/decode.h b/riscv/decode.h index f9e3b6f649..3bb74996b4 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -7,10 +7,7 @@ # error spike requires a two''s-complement c++ implementation #endif -#ifdef WORDS_BIGENDIAN -# error spike requires a little-endian host -#endif - +#include #include #include #include @@ -24,13 +21,25 @@ typedef int64_t sreg_t; typedef uint64_t reg_t; +#ifdef __SIZEOF_INT128__ +typedef __int128 int128_t; +typedef unsigned __int128 uint128_t; +#endif + const int NXPR = 32; const int NFPR = 32; +const int NVPR = 32; const int NCSR = 4096; #define X_RA 1 #define X_SP 2 +#define VCSR_VXRM_SHIFT 1 +#define VCSR_VXRM (0x3 << VCSR_VXRM_SHIFT) + +#define VCSR_VXSAT_SHIFT 0 +#define VCSR_VXSAT (0x1 << VCSR_VXSAT_SHIFT) + #define FP_RD_NE 0 #define FP_RD_0 1 #define FP_RD_DN 2 @@ -68,7 +77,7 @@ class insn_t public: insn_t() = default; insn_t(insn_bits_t bits) : b(bits) {} - insn_bits_t bits() { return b; } + insn_bits_t bits() { return b & ~((UINT64_MAX) << (length() * 8)); } int length() { return insn_length(b); } int64_t i_imm() { return int64_t(b) >> 20; } int64_t shamt() { return x(20, 6); } @@ -101,6 +110,36 @@ class insn_t uint64_t rvc_rs2() { return x(2, 5); } uint64_t rvc_rs1s() { return 8 + x(7, 3); } uint64_t rvc_rs2s() { return 8 + x(2, 3); } + + uint64_t v_vm() { return x(25, 1); } + uint64_t v_wd() { return x(26, 1); } + uint64_t v_nf() { return x(29, 3); } + uint64_t v_simm5() { return xs(15, 5); } + uint64_t v_zimm5() { return x(15, 5); } + uint64_t v_zimm11() { return x(20, 11); } + uint64_t v_lmul() { return x(20, 2); } + uint64_t v_frac_lmul() { return x(22, 1); } + uint64_t v_sew() { return 1 << (x(23, 3) + 3); } + uint64_t v_width() { return x(12, 3); } + uint64_t v_mop() { return x(26, 2); } + uint64_t v_lumop() { return x(20, 5); } + uint64_t v_sumop() { return x(20, 5); } + uint64_t v_vta() { return x(26, 1); } + uint64_t v_vma() { return x(27, 1); } + uint64_t v_mew() { return x(28, 1); } + + // Xpulpimg + uint64_t p_uimmL() { return x(20, 12); } // [31:20] same bits as I-type imm12 + uint64_t p_uimmS() { return x(15, 5); } // [19:15] same bits as RS1 + uint64_t p_loop() { return x(7, 1); } // [7] called 'L' in the docs + uint64_t p_Luimm5() { return x(25, 5); } // [29:25] + uint64_t p_zimm5() { return x(20, 5); } // [24:20] + int64_t p_simm5() { return xs(20, 5); } // [24:20] + uint64_t p_rs3() { return x(7, 5); } // [11:07] alias for RD + uint64_t p_zimm6() { return x(25,1) + (x(20, 5) << 1); } + int64_t p_simm6() { return x(25,1) + (xs(20, 5) << 1); } + + private: insn_bits_t b; uint64_t x(int lo, int len) { return (b >> lo) & ((insn_bits_t(1) << len)-1); } @@ -121,6 +160,14 @@ class regfile_t { return data[i]; } + regfile_t() + { + reset(); + } + void reset() + { + memset(data, 0, sizeof(data)); + } private: T data[N]; }; @@ -128,26 +175,38 @@ class regfile_t // helpful macros, etc #define MMU (*p->get_mmu()) #define STATE (*p->get_state()) +#define P (*p) +#define FLEN (p->get_flen()) #define READ_REG(reg) STATE.XPR[reg] #define READ_FREG(reg) STATE.FPR[reg] +#define RD READ_REG(insn.rd()) #define RS1 READ_REG(insn.rs1()) #define RS2 READ_REG(insn.rs2()) +#define RS3 READ_REG(insn.rs3()) #define WRITE_RD(value) WRITE_REG(insn.rd(), value) #ifndef RISCV_ENABLE_COMMITLOG # define WRITE_REG(reg, value) STATE.XPR.write(reg, value) # define WRITE_FREG(reg, value) DO_WRITE_FREG(reg, freg(value)) +# define WRITE_VSTATUS #else + /* 0 : int + * 1 : floating + * 2 : vector reg + * 3 : vector hint + * 4 : csr + */ # define WRITE_REG(reg, value) ({ \ reg_t wdata = (value); /* value may have side effects */ \ - STATE.log_reg_write = (commit_log_reg_t){(reg) << 1, {wdata, 0}}; \ + STATE.log_reg_write[(reg) << 4] = {wdata, 0}; \ STATE.XPR.write(reg, wdata); \ }) # define WRITE_FREG(reg, value) ({ \ freg_t wdata = freg(value); /* value may have side effects */ \ - STATE.log_reg_write = (commit_log_reg_t){((reg) << 1) | 1, wdata}; \ + STATE.log_reg_write[((reg) << 4) | 1] = wdata; \ DO_WRITE_FREG(reg, wdata); \ }) +# define WRITE_VSTATUS STATE.log_reg_write[3] = {0, 0}; #endif // RVC macros @@ -168,34 +227,85 @@ class regfile_t #define FRS3 READ_FREG(insn.rs3()) #define dirty_fp_state (STATE.mstatus |= MSTATUS_FS | (xlen == 64 ? MSTATUS64_SD : MSTATUS32_SD)) #define dirty_ext_state (STATE.mstatus |= MSTATUS_XS | (xlen == 64 ? MSTATUS64_SD : MSTATUS32_SD)) +#define dirty_vs_state (STATE.mstatus |= MSTATUS_VS | (xlen == 64 ? MSTATUS64_SD : MSTATUS32_SD)) #define DO_WRITE_FREG(reg, value) (STATE.FPR.write(reg, value), dirty_fp_state) #define WRITE_FRD(value) WRITE_FREG(insn.rd(), value) - + #define SHAMT (insn.i_imm() & 0x3F) #define BRANCH_TARGET (pc + insn.sb_imm()) #define JUMP_TARGET (pc + insn.uj_imm()) #define RM ({ int rm = insn.rm(); \ if(rm == 7) rm = STATE.frm; \ - if(rm > 4) throw trap_illegal_instruction(0); \ + if(rm > 4) throw trap_illegal_instruction(insn.bits()); \ rm; }) #define get_field(reg, mask) (((reg) & (decltype(reg))(mask)) / ((mask) & ~((mask) << 1))) #define set_field(reg, mask, val) (((reg) & ~(decltype(reg))(mask)) | (((decltype(reg))(val) * ((mask) & ~((mask) << 1))) & (decltype(reg))(mask))) -#define require(x) if (unlikely(!(x))) throw trap_illegal_instruction(0) +#define require(x) if (unlikely(!(x))) throw trap_illegal_instruction(insn.bits()) #define require_privilege(p) require(STATE.prv >= (p)) +#define require_novirt() if (unlikely(STATE.v)) throw trap_virtual_instruction(insn.bits()) #define require_rv64 require(xlen == 64) #define require_rv32 require(xlen == 32) #define require_extension(s) require(p->supports_extension(s)) #define require_fp require((STATE.mstatus & MSTATUS_FS) != 0) #define require_accelerator require((STATE.mstatus & MSTATUS_XS) != 0) +#define require_vector_vs require((STATE.mstatus & MSTATUS_VS) != 0); +#define require_vector(alu) \ + do { \ + require_vector_vs; \ + require_extension('V'); \ + require(!P.VU.vill); \ + if (alu && !P.VU.vstart_alu) \ + require(P.VU.vstart == 0); \ + WRITE_VSTATUS; \ + dirty_vs_state; \ + } while (0); +#define require_vector_novtype(is_log, alu) \ + do { \ + require_vector_vs; \ + require_extension('V'); \ + if (alu && !P.VU.vstart_alu) \ + require(P.VU.vstart == 0); \ + if (is_log) \ + WRITE_VSTATUS; \ + dirty_vs_state; \ + } while (0); +#define require_align(val, pos) require(is_aligned(val, pos)) +#define require_noover(astart, asize, bstart, bsize) \ + require(!is_overlapped(astart, asize, bstart, bsize)) +#define require_noover_widen(astart, asize, bstart, bsize) \ + require(!is_overlapped_widen(astart, asize, bstart, bsize)) +#define require_vm do { if (insn.v_vm() == 0) require(insn.rd() != 0);} while(0); + #define set_fp_exceptions ({ if (softfloat_exceptionFlags) { \ dirty_fp_state; \ STATE.fflags |= softfloat_exceptionFlags; \ } \ softfloat_exceptionFlags = 0; }) +// Xpulpimg macros +#define sext16(x) ((sreg_t)(int16_t)(x)) +#define zext16(x) ((reg_t)(uint16_t)(x)) + +#define sext8(x) ((sreg_t)(int8_t)(x)) +#define zext8(x) ((reg_t)(uint8_t)(x)) + +#define sextr(x, u, l) ( (sreg_t)( ((sreg_t)x) << (63-(u)) >> ((l)+63-(u)) ) ) // sext(x[u:l]) +#define zextr(x, u, l) ((reg_t)( ((x) >> l) & ( (1 << ((u)-(l)+1))-1 ) )) // zext(x[u:l]) + +#define P_RS3 READ_REG(insn.p_rs3()) /* same as RD, just different semantical value */ +#define WRITE_RS1(value) WRITE_REG(insn.rs1(), value) + +#define RS1_H(i) ((RS1 >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rs1 half: i should only be 0 or 1 */ +#define RS1_B(i) ((RS1 >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rs1 byte: i should only be from 0 to 3 */ +#define RS2_H(i) ((RS2 >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rs2 half: i should only be 0 or 1 */ +#define RS2_B(i) ((RS2 >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rs2 byte: i should only be from 0 to 3 */ +#define RD_H(i) ((RD >> ((xlen >> 1) * (i & 0x1))) & 0xFFFF) /* select rd half: i should only be 0 or 1 */ +#define RD_B(i) ((RD >> ((xlen >> 2) * (i & 0x3))) & 0xFF) /* select rd byte: i should only be from 0 to 3 */ + + #define sext32(x) ((sreg_t)(int32_t)(x)) #define zext32(x) ((reg_t)(uint32_t)(x)) #define sext_xlen(x) (((sreg_t)(x) << (64-xlen)) >> (64-xlen)) @@ -212,9 +322,12 @@ class regfile_t STATE.pc = __npc; \ } while(0) +class wait_for_interrupt_t {}; + #define wfi() \ do { set_pc_and_serialize(npc); \ npc = PC_SERIALIZE_WFI; \ + throw wait_for_interrupt_t(); \ } while(0) #define serialize() set_pc_and_serialize(npc) @@ -226,21 +339,29 @@ class regfile_t #define invalid_pc(pc) ((pc) & 1) /* Convenience wrappers to simplify softfloat code sequences */ +#define isBoxedF16(r) (isBoxedF32(r) && ((uint64_t)((r.v[0] >> 16) + 1) == ((uint64_t)1 << 48))) +#define unboxF16(r) (isBoxedF16(r) ? (uint16_t)r.v[0] : defaultNaNF16UI) #define isBoxedF32(r) (isBoxedF64(r) && ((uint32_t)((r.v[0] >> 32) + 1) == 0)) #define unboxF32(r) (isBoxedF32(r) ? (uint32_t)r.v[0] : defaultNaNF32UI) #define isBoxedF64(r) ((r.v[1] + 1) == 0) #define unboxF64(r) (isBoxedF64(r) ? r.v[0] : defaultNaNF64UI) typedef float128_t freg_t; +inline float16_t f16(uint16_t v) { return { v }; } inline float32_t f32(uint32_t v) { return { v }; } inline float64_t f64(uint64_t v) { return { v }; } +inline float16_t f16(freg_t r) { return f16(unboxF16(r)); } inline float32_t f32(freg_t r) { return f32(unboxF32(r)); } inline float64_t f64(freg_t r) { return f64(unboxF64(r)); } inline float128_t f128(freg_t r) { return r; } +inline freg_t freg(float16_t f) { return { ((uint64_t)-1 << 16) | f.v, (uint64_t)-1 }; } inline freg_t freg(float32_t f) { return { ((uint64_t)-1 << 32) | f.v, (uint64_t)-1 }; } inline freg_t freg(float64_t f) { return { f.v, (uint64_t)-1 }; } inline freg_t freg(float128_t f) { return f; } +#define F16_SIGN ((uint16_t)1 << 15) #define F32_SIGN ((uint32_t)1 << 31) #define F64_SIGN ((uint64_t)1 << 63) +#define fsgnj16(a, b, n, x) \ + f16((f16(a).v & ~F16_SIGN) | ((((x) ? f16(a).v : (n) ? F16_SIGN : 0) ^ f16(b).v) & F16_SIGN)) #define fsgnj32(a, b, n, x) \ f32((f32(a).v & ~F32_SIGN) | ((((x) ? f32(a).v : (n) ? F32_SIGN : 0) ^ f32(b).v) & F32_SIGN)) #define fsgnj64(a, b, n, x) \ @@ -268,14 +389,2034 @@ inline freg_t f128_negate(freg_t a) #define validate_csr(which, write) ({ \ if (!STATE.serialized) return PC_SERIALIZE_BEFORE; \ STATE.serialized = false; \ - unsigned csr_priv = get_field((which), 0x300); \ - unsigned csr_read_only = get_field((which), 0xC00) == 3; \ - if (((write) && csr_read_only) || STATE.prv < csr_priv) \ - throw trap_illegal_instruction(0); \ + /* permissions check occurs in get_csr */ \ (which); }) -// Seems that 0x0 doesn't work. -#define DEBUG_START 0x100 -#define DEBUG_END (0x1000 - 1) +/* For debug only. This will fail if the native machine's float types are not IEEE */ +inline float to_f(float32_t f){float r; memcpy(&r, &f, sizeof(r)); return r;} +inline double to_f(float64_t f){double r; memcpy(&r, &f, sizeof(r)); return r;} +inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r)); return r;} + + +// Interpret register as packed SIMD +union simd_reg { + reg_t reg; + sreg_t sreg; + // halfwords (signed and unsigned) + int16_t h[4]; + uint16_t hu[4]; + // bytes (signed and unsigned) + int8_t b[8]; + uint8_t bu[8]; +}; + + +// Vector macros +#define e8 8 // 8b elements +#define e16 16 // 16b elements +#define e32 32 // 32b elements +#define e64 64 // 64b elements +#define e128 128 // 128b elements +#define e256 256 // 256b elements +#define e512 512 // 512b elements +#define e1024 1024 // 1024b elements + +#define vsext(x, sew) ( ((sreg_t)(x) << (64-(sew))) >> (64-(sew)) ) +#define vzext(x, sew) ( ((reg_t)(x) << (64-(sew))) >> (64-(sew)) ) + +#define DEBUG_RVV 0 + +#if DEBUG_RVV +#define DEBUG_RVV_FP_VV \ + printf("vfp(%lu) vd=%f vs1=%f vs2=%f\n", i, to_f(vd), to_f(vs1), to_f(vs2)); +#define DEBUG_RVV_FP_VF \ + printf("vfp(%lu) vd=%f vs1=%f vs2=%f\n", i, to_f(vd), to_f(rs1), to_f(vs2)); +#define DEBUG_RVV_FMA_VV \ + printf("vfma(%lu) vd=%f vs1=%f vs2=%f vd_old=%f\n", i, to_f(vd), to_f(vs1), to_f(vs2), to_f(vd_old)); +#define DEBUG_RVV_FMA_VF \ + printf("vfma(%lu) vd=%f vs1=%f vs2=%f vd_old=%f\n", i, to_f(vd), to_f(rs1), to_f(vs2), to_f(vd_old)); +#else +#define DEBUG_RVV_FP_VV 0 +#define DEBUG_RVV_FP_VF 0 +#define DEBUG_RVV_FMA_VV 0 +#define DEBUG_RVV_FMA_VF 0 +#endif + +// +// vector: masking skip helper +// +#define VI_MASK_VARS \ + const int midx = i / 64; \ + const int mpos = i % 64; + +#define VI_LOOP_ELEMENT_SKIP(BODY) \ + VI_MASK_VARS \ + if (insn.v_vm() == 0) { \ + BODY; \ + bool skip = ((P.VU.elt(0, midx) >> mpos) & 0x1) == 0; \ + if (skip) {\ + continue; \ + }\ + } + +#define VI_ELEMENT_SKIP(inx) \ + if (inx >= vl) { \ + continue; \ + } else if (inx < P.VU.vstart) { \ + continue; \ + } else { \ + VI_LOOP_ELEMENT_SKIP(); \ + } + +// +// vector: operation and register acccess check helper +// +static inline bool is_overlapped(const int astart, int asize, + const int bstart, int bsize) +{ + asize = asize == 0 ? 1 : asize; + bsize = bsize == 0 ? 1 : bsize; + + const int aend = astart + asize; + const int bend = bstart + bsize; + + return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize; +} + +static inline bool is_overlapped_widen(const int astart, int asize, + const int bstart, int bsize) +{ + asize = asize == 0 ? 1 : asize; + bsize = bsize == 0 ? 1 : bsize; + + const int aend = astart + asize; + const int bend = bstart + bsize; + + if (astart < bstart && + is_overlapped(astart, asize, bstart, bsize) && + !is_overlapped(astart, asize, bstart + bsize, bsize)) { + return false; + } else { + return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize; + } +} + +static inline bool is_aligned(const unsigned val, const unsigned pos) +{ + return pos ? (val & (pos - 1)) == 0 : true; +} + +#define VI_NARROW_CHECK_COMMON \ + require_vector(true);\ + require(P.VU.vflmul <= 4); \ + require(P.VU.vsew * 2 <= P.VU.ELEN); \ + require_align(insn.rs2(), P.VU.vflmul * 2); \ + require_align(insn.rd(), P.VU.vflmul); \ + require_vm; \ + +#define VI_WIDE_CHECK_COMMON \ + require_vector(true);\ + require(P.VU.vflmul <= 4); \ + require(P.VU.vsew * 2 <= P.VU.ELEN); \ + require_align(insn.rd(), P.VU.vflmul * 2); \ + require_vm; \ + +#define VI_CHECK_ST_INDEX(elt_width) \ + require_vector(false); \ + float vemul = ((float)elt_width / P.VU.vsew * P.VU.vflmul); \ + require(vemul >= 0.125 && vemul <= 8); \ + reg_t emul = vemul < 1 ? 1 : vemul; \ + reg_t flmul = P.VU.vflmul < 1 ? 1 : P.VU.vflmul; \ + require_align(insn.rd(), P.VU.vflmul); \ + require_align(insn.rs2(), vemul); \ + require((nf * flmul) <= (NVPR / 4) && \ + (insn.rd() + nf * flmul) <= NVPR); \ + +#define VI_CHECK_LD_INDEX(elt_width) \ + VI_CHECK_ST_INDEX(elt_width); \ + if (elt_width > P.VU.vsew) { \ + if (insn.rd() != insn.rs2()) \ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + } else if (elt_width < P.VU.vsew) { \ + if (vemul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + } \ + } \ + if (insn.v_nf() > 0) {\ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + require_noover(vd, nf, insn.rs2(), 1); \ + } \ + require_vm; \ + +#define VI_CHECK_MSS(is_vs1) \ + if (insn.rd() != insn.rs2()) \ + require_noover(insn.rd(), 1, insn.rs2(), P.VU.vflmul); \ + require_align(insn.rs2(), P.VU.vflmul); \ + if (is_vs1) {\ + if (insn.rd() != insn.rs1()) \ + require_noover(insn.rd(), 1, insn.rs1(), P.VU.vflmul); \ + require_align(insn.rs1(), P.VU.vflmul); \ + } \ + +#define VI_CHECK_SSS(is_vs1) \ + require_vm; \ + if (P.VU.vflmul > 1) { \ + require_align(insn.rd(), P.VU.vflmul); \ + require_align(insn.rs2(), P.VU.vflmul); \ + if (is_vs1) { \ + require_align(insn.rs1(), P.VU.vflmul); \ + } \ + } + +#define VI_CHECK_STORE(elt_width) \ + require_vector(false); \ + reg_t veew = sizeof(elt_width##_t) * 8; \ + float vemul = ((float)veew / P.VU.vsew * P.VU.vflmul); \ + reg_t emul = vemul < 1 ? 1 : vemul; \ + require(vemul >= 0.125 && vemul <= 8); \ + require_align(insn.rd(), vemul); \ + require((nf * emul) <= (NVPR / 4) && \ + (insn.rd() + nf * emul) <= NVPR); \ + +#define VI_CHECK_LOAD(elt_width) \ + VI_CHECK_STORE(elt_width); \ + require_vm; \ + +#define VI_CHECK_DSS(is_vs1) \ + VI_WIDE_CHECK_COMMON; \ + require_align(insn.rs2(), P.VU.vflmul); \ + if (P.VU.vflmul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul * 2, insn.rs2(), P.VU.vflmul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul * 2, insn.rs2(), P.VU.vflmul); \ + } \ + if (is_vs1) {\ + require_align(insn.rs1(), P.VU.vflmul); \ + if (P.VU.vflmul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul * 2, insn.rs1(), P.VU.vflmul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul * 2, insn.rs1(), P.VU.vflmul); \ + } \ + } + +#define VI_CHECK_QSS(is_vs1) \ + require_vector(true);\ + p->supports_extension(EXT_ZVQMAC); \ + require(P.VU.vflmul <= 2); \ + require(P.VU.vsew * 4 <= P.VU.ELEN); \ + require_align(insn.rd(), P.VU.vflmul * 4); \ + require_align(insn.rs2(), P.VU.vflmul); \ + require_vm; \ + if (P.VU.vflmul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul * 4, insn.rs2(), P.VU.vflmul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul * 4, insn.rs2(), P.VU.vflmul); \ + } \ + if (is_vs1) {\ + require_align(insn.rs1(), P.VU.vflmul); \ + if (P.VU.vflmul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul * 4, insn.rs1(), P.VU.vflmul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul * 4, insn.rs1(), P.VU.vflmul); \ + } \ + } + +#define VI_CHECK_DDS(is_rs) \ + VI_WIDE_CHECK_COMMON; \ + require_align(insn.rs2(), P.VU.vflmul * 2); \ + if (is_rs) { \ + require_align(insn.rs1(), P.VU.vflmul); \ + if (P.VU.vflmul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul * 2, insn.rs1(), P.VU.vflmul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul * 2, insn.rs1(), P.VU.vflmul); \ + } \ + } + +#define VI_CHECK_SDS(is_vs1) \ + VI_NARROW_CHECK_COMMON; \ + if (insn.rd() != insn.rs2()) \ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul * 2); \ + if (is_vs1) \ + require_align(insn.rs1(), P.VU.vflmul); \ + +#define VI_CHECK_REDUCTION(is_wide) \ + require_vector(true);\ + if (is_wide) {\ + require(P.VU.vsew * 2 <= P.VU.ELEN); \ + } \ + require_align(insn.rs2(), P.VU.vflmul); \ + require(P.VU.vstart == 0); \ + +#define VI_CHECK_SLIDE(is_over) \ + require_align(insn.rs2(), P.VU.vflmul); \ + require_align(insn.rd(), P.VU.vflmul); \ + require_vm; \ + if (is_over) \ + require(insn.rd() != insn.rs2()); \ + + +// +// vector: loop header and end helper +// +#define VI_GENERAL_LOOP_BASE \ + require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \ + require_vector(true);\ + reg_t vl = P.VU.vl; \ + reg_t sew = P.VU.vsew; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + for (reg_t i=P.VU.vstart; i 0) { \ + vd_0_des = vd_0_res; \ + } \ + P.VU.vstart = 0; + +#define VI_LOOP_CMP_BASE \ + require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \ + require_vector(true);\ + reg_t vl = P.VU.vl; \ + reg_t sew = P.VU.vsew; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + for (reg_t i=P.VU.vstart; i(insn.rd(), midx, true); \ + uint64_t res = 0; + +#define VI_LOOP_CMP_END \ + vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ + } \ + P.VU.vstart = 0; + +#define VI_LOOP_MASK(op) \ + require(P.VU.vsew <= e64); \ + require_vector(true);\ + reg_t vl = P.VU.vl; \ + for (reg_t i = P.VU.vstart; i < vl; ++i) { \ + int midx = i / 64; \ + int mpos = i % 64; \ + uint64_t mmask = UINT64_C(1) << mpos; \ + uint64_t vs2 = P.VU.elt(insn.rs2(), midx); \ + uint64_t vs1 = P.VU.elt(insn.rs1(), midx); \ + uint64_t &res = P.VU.elt(insn.rd(), midx, true); \ + res = (res & ~mmask) | ((op) & (1ULL << mpos)); \ + } \ + P.VU.vstart = 0; + +#define VI_LOOP_NSHIFT_BASE \ + VI_GENERAL_LOOP_BASE; \ + VI_LOOP_ELEMENT_SKIP({\ + require(!(insn.rd() == 0 && P.VU.vflmul > 1));\ + }); + + +#define INT_ROUNDING(result, xrm, gb) \ + do { \ + const uint64_t lsb = 1UL << (gb); \ + const uint64_t lsb_half = lsb >> 1; \ + switch (xrm) {\ + case VRM::RNU:\ + result += lsb_half; \ + break;\ + case VRM::RNE:\ + if ((result & lsb_half) && ((result & (lsb_half - 1)) || (result & lsb))) \ + result += lsb; \ + break;\ + case VRM::RDN:\ + break;\ + case VRM::ROD:\ + if (result & (lsb - 1)) \ + result |= lsb; \ + break;\ + case VRM::INVALID_RM:\ + assert(true);\ + } \ + } while (0) + +// +// vector: integer and masking operand access helper +// +#define VXI_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_sew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); \ + type_sew_t::type rs1 = (type_sew_t::type)RS1; \ + type_sew_t::type simm5 = (type_sew_t::type)insn.v_simm5(); + +#define VV_U_PARAMS(x) \ + type_usew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VX_U_PARAMS(x) \ + type_usew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type rs1 = (type_usew_t::type)RS1; \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VI_U_PARAMS(x) \ + type_usew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type zimm5 = (type_usew_t::type)insn.v_zimm5(); \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VV_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_sew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VX_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_sew_t::type rs1 = (type_sew_t::type)RS1; \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VI_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_sew_t::type simm5 = (type_sew_t::type)insn.v_simm5(); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define XV_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, RS1); + +#define VV_UCMP_PARAMS(x) \ + type_usew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VX_UCMP_PARAMS(x) \ + type_usew_t::type rs1 = (type_usew_t::type)RS1; \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VI_UCMP_PARAMS(x) \ + type_usew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VV_CMP_PARAMS(x) \ + type_sew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VX_CMP_PARAMS(x) \ + type_sew_t::type rs1 = (type_sew_t::type)RS1; \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VI_CMP_PARAMS(x) \ + type_sew_t::type simm5 = (type_sew_t::type)insn.v_simm5(); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + +#define VI_XI_SLIDEDOWN_PARAMS(x, off) \ + auto &vd = P.VU.elt::type>(rd_num, i, true); \ + auto vs2 = P.VU.elt::type>(rs2_num, i + off); + +#define VI_XI_SLIDEUP_PARAMS(x, offset) \ + auto &vd = P.VU.elt::type>(rd_num, i, true); \ + auto vs2 = P.VU.elt::type>(rs2_num, i - offset); + +#define VI_NSHIFT_PARAMS(sew1, sew2) \ + auto &vd = P.VU.elt::type>(rd_num, i, true); \ + auto vs2_u = P.VU.elt::type>(rs2_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto zimm5 = (type_usew_t::type)insn.v_zimm5(); + +#define VX_NSHIFT_PARAMS(sew1, sew2) \ + auto &vd = P.VU.elt::type>(rd_num, i, true); \ + auto vs2_u = P.VU.elt::type>(rs2_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto rs1 = (type_sew_t::type)RS1; + +#define VV_NSHIFT_PARAMS(sew1, sew2) \ + auto &vd = P.VU.elt::type>(rd_num, i, true); \ + auto vs2_u = P.VU.elt::type>(rs2_num, i); \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto vs1 = P.VU.elt::type>(rs1_num, i); + +#define XI_CARRY_PARAMS(x) \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto rs1 = (type_sew_t::type)RS1; \ + auto simm5 = (type_sew_t::type)insn.v_simm5(); \ + auto &vd = P.VU.elt(rd_num, midx, true); + +#define VV_CARRY_PARAMS(x) \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto vs1 = P.VU.elt::type>(rs1_num, i); \ + auto &vd = P.VU.elt(rd_num, midx, true); + +#define XI_WITH_CARRY_PARAMS(x) \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto rs1 = (type_sew_t::type)RS1; \ + auto simm5 = (type_sew_t::type)insn.v_simm5(); \ + auto &vd = P.VU.elt::type>(rd_num, i, true); + +#define VV_WITH_CARRY_PARAMS(x) \ + auto vs2 = P.VU.elt::type>(rs2_num, i); \ + auto vs1 = P.VU.elt::type>(rs1_num, i); \ + auto &vd = P.VU.elt::type>(rd_num, i, true); + +// +// vector: integer and masking operation loop +// + +// comparision result to masking register +#define VI_VV_LOOP_CMP(BODY) \ + VI_CHECK_MSS(true); \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VV_CMP_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_CMP_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_CMP_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_CMP_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VX_LOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VX_CMP_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_CMP_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_CMP_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_CMP_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VI_LOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VI_CMP_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_CMP_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_CMP_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_CMP_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VV_ULOOP_CMP(BODY) \ + VI_CHECK_MSS(true); \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VV_UCMP_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_UCMP_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_UCMP_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_UCMP_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VX_ULOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VX_UCMP_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_UCMP_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_UCMP_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_UCMP_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VI_ULOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VI_UCMP_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_UCMP_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_UCMP_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_UCMP_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +// merge and copy loop +#define VI_VVXI_MERGE_LOOP(BODY) \ + VI_GENERAL_LOOP_BASE \ + if (sew == e8){ \ + VXI_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VXI_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VXI_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VXI_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +// reduction loop - signed +#define VI_LOOP_REDUCTION_BASE(x) \ + require(x >= e8 && x <= e64); \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt::type>(rd_num, 0, true); \ + auto vd_0_res = P.VU.elt::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i::type>(rs2_num, i); \ + +#define REDUCTION_LOOP(x, BODY) \ + VI_LOOP_REDUCTION_BASE(x) \ + BODY; \ + VI_LOOP_REDUCTION_END(x) + +#define VI_VV_LOOP_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(false); \ + reg_t sew = P.VU.vsew; \ + if (sew == e8) { \ + REDUCTION_LOOP(e8, BODY) \ + } else if(sew == e16) { \ + REDUCTION_LOOP(e16, BODY) \ + } else if(sew == e32) { \ + REDUCTION_LOOP(e32, BODY) \ + } else if(sew == e64) { \ + REDUCTION_LOOP(e64, BODY) \ + } + +// reduction loop - unsgied +#define VI_ULOOP_REDUCTION_BASE(x) \ + require(x >= e8 && x <= e64); \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt::type>(rd_num, 0, true); \ + auto vd_0_res = P.VU.elt::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i::type>(rs2_num, i); + +#define REDUCTION_ULOOP(x, BODY) \ + VI_ULOOP_REDUCTION_BASE(x) \ + BODY; \ + VI_LOOP_REDUCTION_END(x) + +#define VI_VV_ULOOP_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(false); \ + reg_t sew = P.VU.vsew; \ + if (sew == e8){ \ + REDUCTION_ULOOP(e8, BODY) \ + } else if(sew == e16) { \ + REDUCTION_ULOOP(e16, BODY) \ + } else if(sew == e32) { \ + REDUCTION_ULOOP(e32, BODY) \ + } else if(sew == e64) { \ + REDUCTION_ULOOP(e64, BODY) \ + } + + +// genearl VXI signed/unsgied loop +#define VI_VV_ULOOP(BODY) \ + VI_CHECK_SSS(true) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VV_LOOP(BODY) \ + VI_CHECK_SSS(true) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_ULOOP(BODY) \ + VI_CHECK_SSS(false) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_LOOP(BODY) \ + VI_CHECK_SSS(false) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VI_ULOOP(BODY) \ + VI_CHECK_SSS(false) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VI_LOOP(BODY) \ + VI_CHECK_SSS(false) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +// narrow operation loop +#define VI_VV_LOOP_NARROW(BODY) \ +VI_NARROW_CHECK_COMMON; \ +VI_LOOP_BASE \ +if (sew == e8){ \ + VI_NARROW_SHIFT(e8, e16) \ + BODY; \ +}else if(sew == e16){ \ + VI_NARROW_SHIFT(e16, e32) \ + BODY; \ +}else if(sew == e32){ \ + VI_NARROW_SHIFT(e32, e64) \ + BODY; \ +} \ +VI_LOOP_END + +#define VI_NARROW_SHIFT(sew1, sew2) \ + type_usew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type vs2_u = P.VU.elt::type>(rs2_num, i); \ + type_usew_t::type zimm5 = (type_usew_t::type)insn.v_zimm5(); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); \ + type_sew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ + type_sew_t::type rs1 = (type_sew_t::type)RS1; + +#define VI_VVXI_LOOP_NARROW(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_NARROW_SHIFT(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VI_NARROW_SHIFT(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VI_NARROW_SHIFT(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VI_LOOP_NSHIFT(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ + VI_LOOP_NSHIFT_BASE \ + if (sew == e8){ \ + VI_NSHIFT_PARAMS(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VI_NSHIFT_PARAMS(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VI_NSHIFT_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_LOOP_NSHIFT(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ + VI_LOOP_NSHIFT_BASE \ + if (sew == e8){ \ + VX_NSHIFT_PARAMS(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VX_NSHIFT_PARAMS(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VX_NSHIFT_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VV_LOOP_NSHIFT(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ + VI_LOOP_NSHIFT_BASE \ + if (sew == e8){ \ + VV_NSHIFT_PARAMS(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VV_NSHIFT_PARAMS(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VV_NSHIFT_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +// widen operation loop +#define VI_VV_LOOP_WIDEN(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_PARAMS(e32); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_LOOP_WIDEN(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_PARAMS(e32); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_WIDE_OP_AND_ASSIGN(var0, var1, var2, op0, op1, sign) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign##16_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign##16_t)(sign##8_t)var0 op0 (sign##16_t)(sign##8_t)var1) + var2; \ + } \ + break; \ + case e16: { \ + sign##32_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign##32_t)(sign##16_t)var0 op0 (sign##32_t)(sign##16_t)var1) + var2; \ + } \ + break; \ + default: { \ + sign##64_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign##64_t)(sign##32_t)var0 op0 (sign##64_t)(sign##32_t)var1) + var2; \ + } \ + break; \ + } + +#define VI_WIDE_OP_AND_ASSIGN_MIX(var0, var1, var2, op0, op1, sign_d, sign_1, sign_2) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign_d##16_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign_1##16_t)(sign_1##8_t)var0 op0 (sign_2##16_t)(sign_2##8_t)var1) + var2; \ + } \ + break; \ + case e16: { \ + sign_d##32_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign_1##32_t)(sign_1##16_t)var0 op0 (sign_2##32_t)(sign_2##16_t)var1) + var2; \ + } \ + break; \ + default: { \ + sign_d##64_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign_1##64_t)(sign_1##32_t)var0 op0 (sign_2##64_t)(sign_2##32_t)var1) + var2; \ + } \ + break; \ + } + +#define VI_WIDE_WVX_OP(var0, op0, sign) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign##16_t &vd_w = P.VU.elt(rd_num, i, true); \ + sign##16_t vs2_w = P.VU.elt(rs2_num, i); \ + vd_w = vs2_w op0 (sign##16_t)(sign##8_t)var0; \ + } \ + break; \ + case e16: { \ + sign##32_t &vd_w = P.VU.elt(rd_num, i, true); \ + sign##32_t vs2_w = P.VU.elt(rs2_num, i); \ + vd_w = vs2_w op0 (sign##32_t)(sign##16_t)var0; \ + } \ + break; \ + default: { \ + sign##64_t &vd_w = P.VU.elt(rd_num, i, true); \ + sign##64_t vs2_w = P.VU.elt(rs2_num, i); \ + vd_w = vs2_w op0 (sign##64_t)(sign##32_t)var0; \ + } \ + break; \ + } + +// quad operation loop +#define VI_VV_LOOP_QUAD(BODY) \ + VI_CHECK_QSS(true); \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_LOOP_QUAD(BODY) \ + VI_CHECK_QSS(false); \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_PARAMS(e16); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_QUAD_OP_AND_ASSIGN(var0, var1, var2, op0, op1, sign) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign##32_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign##32_t)(sign##8_t)var0 op0 (sign##32_t)(sign##8_t)var1) + var2; \ + } \ + break; \ + default: { \ + sign##64_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign##64_t)(sign##16_t)var0 op0 (sign##64_t)(sign##16_t)var1) + var2; \ + } \ + break; \ + } + +#define VI_QUAD_OP_AND_ASSIGN_MIX(var0, var1, var2, op0, op1, sign_d, sign_1, sign_2) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign_d##32_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign_1##32_t)(sign_1##8_t)var0 op0 (sign_2##32_t)(sign_2##8_t)var1) + var2; \ + } \ + break; \ + default: { \ + sign_d##64_t vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op1((sign_1##64_t)(sign_1##16_t)var0 op0 (sign_2##64_t)(sign_2##16_t)var1) + var2; \ + } \ + break; \ + } + +// wide reduction loop - signed +#define VI_LOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt::type>(rd_num, 0, true); \ + auto vd_0_res = P.VU.elt::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i::type>(rs2_num, i); + +#define WIDE_REDUCTION_LOOP(sew1, sew2, BODY) \ + VI_LOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + BODY; \ + VI_LOOP_REDUCTION_END(sew2) + +#define VI_VV_LOOP_WIDE_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(true); \ + reg_t sew = P.VU.vsew; \ + if (sew == e8){ \ + WIDE_REDUCTION_LOOP(e8, e16, BODY) \ + } else if(sew == e16){ \ + WIDE_REDUCTION_LOOP(e16, e32, BODY) \ + } else if(sew == e32){ \ + WIDE_REDUCTION_LOOP(e32, e64, BODY) \ + } + +// wide reduction loop - unsigned +#define VI_ULOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt::type>(rd_num, 0, true); \ + auto vd_0_res = P.VU.elt::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i::type>(rs2_num, i); + +#define WIDE_REDUCTION_ULOOP(sew1, sew2, BODY) \ + VI_ULOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + BODY; \ + VI_LOOP_REDUCTION_END(sew2) + +#define VI_VV_ULOOP_WIDE_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(true); \ + reg_t sew = P.VU.vsew; \ + if (sew == e8){ \ + WIDE_REDUCTION_ULOOP(e8, e16, BODY) \ + } else if(sew == e16){ \ + WIDE_REDUCTION_ULOOP(e16, e32, BODY) \ + } else if(sew == e32){ \ + WIDE_REDUCTION_ULOOP(e32, e64, BODY) \ + } + +// carry/borrow bit loop +#define VI_VV_LOOP_CARRY(BODY) \ + VI_CHECK_MSS(true); \ + VI_GENERAL_LOOP_BASE \ + VI_MASK_VARS \ + if (sew == e8){ \ + VV_CARRY_PARAMS(e8) \ + BODY; \ + } else if (sew == e16) { \ + VV_CARRY_PARAMS(e16) \ + BODY; \ + } else if (sew == e32) { \ + VV_CARRY_PARAMS(e32) \ + BODY; \ + } else if (sew == e64) { \ + VV_CARRY_PARAMS(e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_XI_LOOP_CARRY(BODY) \ + VI_CHECK_MSS(false); \ + VI_GENERAL_LOOP_BASE \ + VI_MASK_VARS \ + if (sew == e8){ \ + XI_CARRY_PARAMS(e8) \ + BODY; \ + } else if (sew == e16) { \ + XI_CARRY_PARAMS(e16) \ + BODY; \ + } else if (sew == e32) { \ + XI_CARRY_PARAMS(e32) \ + BODY; \ + } else if (sew == e64) { \ + XI_CARRY_PARAMS(e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VV_LOOP_WITH_CARRY(BODY) \ + require(insn.rd() != 0); \ + VI_CHECK_SSS(true); \ + VI_GENERAL_LOOP_BASE \ + VI_MASK_VARS \ + if (sew == e8){ \ + VV_WITH_CARRY_PARAMS(e8) \ + BODY; \ + } else if (sew == e16) { \ + VV_WITH_CARRY_PARAMS(e16) \ + BODY; \ + } else if (sew == e32) { \ + VV_WITH_CARRY_PARAMS(e32) \ + BODY; \ + } else if (sew == e64) { \ + VV_WITH_CARRY_PARAMS(e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_XI_LOOP_WITH_CARRY(BODY) \ + require(insn.rd() != 0); \ + VI_CHECK_SSS(false); \ + VI_GENERAL_LOOP_BASE \ + VI_MASK_VARS \ + if (sew == e8){ \ + XI_WITH_CARRY_PARAMS(e8) \ + BODY; \ + } else if (sew == e16) { \ + XI_WITH_CARRY_PARAMS(e16) \ + BODY; \ + } else if (sew == e32) { \ + XI_WITH_CARRY_PARAMS(e32) \ + BODY; \ + } else if (sew == e64) { \ + XI_WITH_CARRY_PARAMS(e64) \ + BODY; \ + } \ + VI_LOOP_END + +// average loop +#define VI_VVX_LOOP_AVG(opd, op, is_vs1) \ +VI_CHECK_SSS(is_vs1); \ +VRM xrm = p->VU.get_vround_mode(); \ +VI_LOOP_BASE \ + switch(sew) { \ + case e8: { \ + VV_PARAMS(e8); \ + type_sew_t::type rs1 = RS1; \ + auto res = (int32_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + case e16: { \ + VV_PARAMS(e16); \ + type_sew_t::type rs1 = RS1; \ + auto res = (int32_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + case e32: { \ + VV_PARAMS(e32); \ + type_sew_t::type rs1 = RS1; \ + auto res = (int64_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + default: { \ + VV_PARAMS(e64); \ + type_sew_t::type rs1 = RS1; \ + auto res = (int128_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + } \ +VI_LOOP_END + +#define VI_VVX_ULOOP_AVG(opd, op, is_vs1) \ +VI_CHECK_SSS(is_vs1); \ +VRM xrm = p->VU.get_vround_mode(); \ +VI_LOOP_BASE \ + switch(sew) { \ + case e8: { \ + VV_U_PARAMS(e8); \ + type_usew_t::type rs1 = RS1; \ + auto res = (uint16_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + case e16: { \ + VV_U_PARAMS(e16); \ + type_usew_t::type rs1 = RS1; \ + auto res = (uint32_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + case e32: { \ + VV_U_PARAMS(e32); \ + type_usew_t::type rs1 = RS1; \ + auto res = (uint64_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + default: { \ + VV_U_PARAMS(e64); \ + type_usew_t::type rs1 = RS1; \ + auto res = (uint128_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + } \ +VI_LOOP_END + +// +// vector: load/store helper +// +#define VI_STRIP(inx) \ + reg_t vreg_inx = inx; + +#define VI_DUPLICATE_VREG(reg_num, idx_sew) \ +reg_t index[P.VU.vlmax]; \ +for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ + switch(idx_sew) { \ + case e8: \ + index[i] = P.VU.elt(reg_num, i); \ + break; \ + case e16: \ + index[i] = P.VU.elt(reg_num, i); \ + break; \ + case e32: \ + index[i] = P.VU.elt(reg_num, i); \ + break; \ + case e64: \ + index[i] = P.VU.elt(reg_num, i); \ + break; \ + } \ +} + +#define VI_LD(stride, offset, elt_width) \ + const reg_t nf = insn.v_nf() + 1; \ + const reg_t vl = P.VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t vd = insn.rd(); \ + VI_CHECK_LOAD(elt_width); \ + for (reg_t i = 0; i < vl; ++i) { \ + VI_ELEMENT_SKIP(i); \ + VI_STRIP(i); \ + P.VU.vstart = i; \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + elt_width##_t val = MMU.load_##elt_width( \ + baseAddr + (stride) + (offset) * sizeof(elt_width##_t)); \ + P.VU.elt(vd + fn * emul, vreg_inx, true) = val; \ + } \ + } \ + P.VU.vstart = 0; + +#define VI_LD_INDEX(elt_width, is_seg) \ + const reg_t nf = insn.v_nf() + 1; \ + const reg_t vl = P.VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t vd = insn.rd(); \ + if (!is_seg) \ + require(nf == 1); \ + VI_CHECK_LD_INDEX(elt_width); \ + VI_DUPLICATE_VREG(insn.rs2(), elt_width); \ + for (reg_t i = 0; i < vl; ++i) { \ + VI_ELEMENT_SKIP(i); \ + VI_STRIP(i); \ + P.VU.vstart = i; \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + switch(P.VU.vsew){ \ + case e8: \ + P.VU.elt(vd + fn * flmul, vreg_inx, true) = \ + MMU.load_uint8(baseAddr + index[i] + fn * 1); \ + break; \ + case e16: \ + P.VU.elt(vd + fn * flmul, vreg_inx, true) = \ + MMU.load_uint16(baseAddr + index[i] + fn * 2); \ + break; \ + case e32: \ + P.VU.elt(vd + fn * flmul, vreg_inx, true) = \ + MMU.load_uint32(baseAddr + index[i] + fn * 4); \ + break; \ + default: \ + P.VU.elt(vd + fn * flmul, vreg_inx, true) = \ + MMU.load_uint64(baseAddr + index[i] + fn * 8); \ + break; \ + } \ + } \ + } \ + P.VU.vstart = 0; + +#define VI_ST(stride, offset, elt_width) \ + const reg_t nf = insn.v_nf() + 1; \ + const reg_t vl = P.VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t vs3 = insn.rd(); \ + VI_CHECK_STORE(elt_width); \ + for (reg_t i = 0; i < vl; ++i) { \ + VI_STRIP(i) \ + VI_ELEMENT_SKIP(i); \ + P.VU.vstart = i; \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + elt_width##_t val = P.VU.elt(vs3 + fn * emul, vreg_inx); \ + MMU.store_##elt_width( \ + baseAddr + (stride) + (offset) * sizeof(elt_width##_t), val); \ + } \ + } \ + P.VU.vstart = 0; + +#define VI_ST_INDEX(elt_width, is_seg) \ + const reg_t nf = insn.v_nf() + 1; \ + const reg_t vl = P.VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t vs3 = insn.rd(); \ + if (!is_seg) \ + require(nf == 1); \ + VI_CHECK_ST_INDEX(elt_width); \ + VI_DUPLICATE_VREG(insn.rs2(), elt_width); \ + for (reg_t i = 0; i < vl; ++i) { \ + VI_STRIP(i) \ + VI_ELEMENT_SKIP(i); \ + P.VU.vstart = i; \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + switch (P.VU.vsew) { \ + case e8: \ + MMU.store_uint8(baseAddr + index[i] + fn * 1, \ + P.VU.elt(vs3 + fn * flmul, vreg_inx)); \ + break; \ + case e16: \ + MMU.store_uint16(baseAddr + index[i] + fn * 2, \ + P.VU.elt(vs3 + fn * flmul, vreg_inx)); \ + break; \ + case e32: \ + MMU.store_uint32(baseAddr + index[i] + fn * 4, \ + P.VU.elt(vs3 + fn * flmul, vreg_inx)); \ + break; \ + default: \ + MMU.store_uint64(baseAddr + index[i] + fn * 8, \ + P.VU.elt(vs3 + fn * flmul, vreg_inx)); \ + break; \ + } \ + } \ + } \ + P.VU.vstart = 0; + +#define VI_LDST_FF(elt_width) \ + const reg_t nf = insn.v_nf() + 1; \ + const reg_t sew = p->VU.vsew; \ + const reg_t vl = p->VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t rd_num = insn.rd(); \ + VI_CHECK_LOAD(elt_width); \ + bool early_stop = false; \ + for (reg_t i = p->VU.vstart; i < vl; ++i) { \ + VI_STRIP(i); \ + VI_ELEMENT_SKIP(i); \ + \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + uint64_t val; \ + try { \ + val = MMU.load_##elt_width( \ + baseAddr + (i * nf + fn) * sizeof(elt_width##_t)); \ + } catch (trap_t& t) { \ + if (i == 0) \ + throw; /* Only take exception on zeroth element */ \ + /* Reduce VL if an exception occurs on a later element */ \ + early_stop = true; \ + P.VU.vl = i; \ + break; \ + } \ + p->VU.elt(rd_num + fn * emul, vreg_inx, true) = val; \ + } \ + \ + if (early_stop) { \ + break; \ + } \ + } \ + p->VU.vstart = 0; + +#define VI_LD_WHOLE(elt_width) \ + require_vector_novtype(true, false); \ + const reg_t baseAddr = RS1; \ + const reg_t vd = insn.rd(); \ + const reg_t len = insn.v_nf() + 1; \ + require_align(vd, len); \ + const reg_t elt_per_reg = P.VU.vlenb / sizeof(elt_width ## _t); \ + const reg_t size = len * elt_per_reg; \ + if (P.VU.vstart < size) { \ + reg_t i = P.VU.vstart / elt_per_reg; \ + reg_t off = P.VU.vstart % elt_per_reg; \ + if (off) { \ + for (reg_t pos = off; pos < elt_per_reg; ++pos) { \ + auto val = MMU.load_## elt_width(baseAddr + \ + P.VU.vstart * sizeof(elt_width ## _t)); \ + P.VU.elt(vd + i, pos, true) = val; \ + P.VU.vstart++; \ + } \ + ++i; \ + } \ + for (; i < len; ++i) { \ + for (reg_t pos = 0; pos < elt_per_reg; ++pos) { \ + auto val = MMU.load_## elt_width(baseAddr + \ + P.VU.vstart * sizeof(elt_width ## _t)); \ + P.VU.elt(vd + i, pos, true) = val; \ + P.VU.vstart++; \ + } \ + } \ + } \ + P.VU.vstart = 0; \ + +#define VI_ST_WHOLE \ + require_vector_novtype(true, false); \ + const reg_t baseAddr = RS1; \ + const reg_t vs3 = insn.rd(); \ + const reg_t len = insn.v_nf() + 1; \ + require_align(vs3, len); \ + const reg_t size = len * P.VU.vlenb; \ + \ + if (P.VU.vstart < size) { \ + reg_t i = P.VU.vstart / P.VU.vlenb; \ + reg_t off = P.VU.vstart % P.VU.vlenb; \ + if (off) { \ + for (reg_t pos = off; pos < P.VU.vlenb; ++pos) { \ + auto val = P.VU.elt(vs3 + i, pos); \ + MMU.store_uint8(baseAddr + P.VU.vstart, val); \ + P.VU.vstart++; \ + } \ + i++; \ + } \ + for (; i < len; ++i) { \ + for (reg_t pos = 0; pos < P.VU.vlenb; ++pos) { \ + auto val = P.VU.elt(vs3 + i, pos); \ + MMU.store_uint8(baseAddr + P.VU.vstart, val); \ + P.VU.vstart++; \ + } \ + } \ + } \ + P.VU.vstart = 0; + +// +// vector: amo +// +#define VI_AMO(op, type, idx_type) \ + require_vector(false); \ + require_align(insn.rd(), P.VU.vflmul); \ + require(P.VU.vsew <= P.get_xlen() && P.VU.vsew >= 32); \ + require_align(insn.rd(), P.VU.vflmul); \ + float vemul = ((float)idx_type / P.VU.vsew * P.VU.vflmul); \ + require(vemul >= 0.125 && vemul <= 8); \ + require_align(insn.rs2(), vemul); \ + if (insn.v_wd()) {\ + require_vm; \ + if (idx_type > P.VU.vsew) { \ + if (insn.rd() != insn.rs2()) \ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + } else if (idx_type < P.VU.vsew) { \ + if (vemul < 1) {\ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul, insn.rs2(), vemul); \ + } \ + } \ + } \ + VI_DUPLICATE_VREG(insn.rs2(), idx_type); \ + const reg_t vl = P.VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t vd = insn.rd(); \ + for (reg_t i = P.VU.vstart; i < vl; ++i) { \ + VI_ELEMENT_SKIP(i); \ + VI_STRIP(i); \ + switch (P.VU.vsew) { \ + case e32: {\ + auto vs3 = P.VU.elt< type ## 32_t>(vd, vreg_inx); \ + auto val = MMU.amo_uint32(baseAddr + index[i], [&]( type ## 32_t lhs) { op }); \ + if (insn.v_wd()) \ + P.VU.elt< type ## 32_t>(vd, vreg_inx, true) = val; \ + } \ + break; \ + case e64: {\ + auto vs3 = P.VU.elt< type ## 64_t>(vd, vreg_inx); \ + auto val = MMU.amo_uint64(baseAddr + index[i], [&]( type ## 64_t lhs) { op }); \ + if (insn.v_wd()) \ + P.VU.elt< type ## 64_t>(vd, vreg_inx, true) = val; \ + } \ + break; \ + default: \ + require(0); \ + break; \ + } \ + } \ + P.VU.vstart = 0; + +// vector: sign/unsiged extension +#define VI_VV_EXT(div, type) \ + require(insn.rd() != insn.rs2()); \ + require_vm; \ + reg_t from = P.VU.vsew / div; \ + require(from >= e8 && from <= e64); \ + require_align(insn.rd(), P.VU.vflmul); \ + require_align(insn.rs2(), P.VU.vflmul / div); \ + if ((P.VU.vflmul / div) < 1) { \ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul / div); \ + } else {\ + require_noover_widen(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul / div); \ + } \ + reg_t pat = (((P.VU.vsew >> 3) << 4) | from >> 3); \ + VI_GENERAL_LOOP_BASE \ + VI_LOOP_ELEMENT_SKIP(); \ + switch (pat) { \ + case 0x21: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + case 0x41: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + case 0x81: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + case 0x42: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + case 0x82: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + case 0x84: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + case 0x88: \ + P.VU.elt(rd_num, i, true) = P.VU.elt(rs2_num, i); \ + break; \ + default: \ + break; \ + } \ + VI_LOOP_END + +// +// vector: vfp helper +// +#define VI_VFP_COMMON \ + require_fp; \ + require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) || \ + (P.VU.vsew == e32 && p->supports_extension('F')) || \ + (P.VU.vsew == e64 && p->supports_extension('D'))); \ + require_vector(true);\ + require(STATE.frm < 0x5);\ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + softfloat_roundingMode = STATE.frm; + +#define VI_VFP_LOOP_BASE \ + VI_VFP_COMMON \ + for (reg_t i=P.VU.vstart; i(rd_num, midx, true); \ + uint64_t res = 0; + +#define VI_VFP_LOOP_REDUCTION_BASE(width) \ + float##width##_t vd_0 = P.VU.elt(rd_num, 0); \ + float##width##_t vs1_0 = P.VU.elt(rs1_num, 0); \ + vd_0 = vs1_0; \ + bool is_active = false; \ + for (reg_t i=P.VU.vstart; i(rs2_num, i); \ + is_active = true; \ + +#define VI_VFP_LOOP_WIDE_REDUCTION_BASE \ + VI_VFP_COMMON \ + float64_t vd_0 = f64(P.VU.elt(rs1_num, 0).v); \ + for (reg_t i=P.VU.vstart; i 0) { \ + if (is_propagate && !is_active) { \ + switch (x) { \ + case e16: {\ + auto ret = f16_classify(f16(vd_0.v)); \ + if (ret & 0x300) { \ + if (ret & 0x100) { \ + softfloat_exceptionFlags |= softfloat_flag_invalid; \ + set_fp_exceptions; \ + } \ + P.VU.elt(rd_num, 0, true) = defaultNaNF16UI; \ + } else { \ + P.VU.elt(rd_num, 0, true) = vd_0.v; \ + } \ + } \ + break; \ + case e32: { \ + auto ret = f32_classify(f32(vd_0.v)); \ + if (ret & 0x300) { \ + if (ret & 0x100) { \ + softfloat_exceptionFlags |= softfloat_flag_invalid; \ + set_fp_exceptions; \ + } \ + P.VU.elt(rd_num, 0, true) = defaultNaNF32UI; \ + } else { \ + P.VU.elt(rd_num, 0, true) = vd_0.v; \ + } \ + } \ + break; \ + case e64: {\ + auto ret = f64_classify(f64(vd_0.v)); \ + if (ret & 0x300) { \ + if (ret & 0x100) { \ + softfloat_exceptionFlags |= softfloat_flag_invalid; \ + set_fp_exceptions; \ + } \ + P.VU.elt(rd_num, 0, true) = defaultNaNF64UI; \ + } else { \ + P.VU.elt(rd_num, 0, true) = vd_0.v; \ + } \ + } \ + break; \ + } \ + } else { \ + P.VU.elt::type>(rd_num, 0, true) = vd_0.v; \ + } \ + } + +#define VI_VFP_LOOP_CMP_END \ + switch(P.VU.vsew) { \ + case e16: \ + case e32: \ + case e64: { \ + vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ + break; \ + } \ + default: \ + require(0); \ + break; \ + }; \ + } \ + P.VU.vstart = 0; + +#define VI_VFP_VV_LOOP(BODY16, BODY32, BODY64) \ + VI_CHECK_SSS(true); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float16_t &vd = P.VU.elt(rd_num, i, true); \ + float16_t vs1 = P.VU.elt(rs1_num, i); \ + float16_t vs2 = P.VU.elt(rs2_num, i); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs1 = P.VU.elt(rs1_num, i); \ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + case e64: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t vs1 = P.VU.elt(rs1_num, i); \ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + BODY64; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + +#define VI_VFP_V_LOOP(BODY16, BODY32, BODY64) \ + VI_CHECK_SSS(false); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float16_t &vd = P.VU.elt(rd_num, i, true); \ + float16_t vs2 = P.VU.elt(rs2_num, i); \ + BODY16; \ + break; \ + }\ + case e32: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + BODY32; \ + break; \ + }\ + case e64: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + BODY64; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + set_fp_exceptions; \ + VI_VFP_LOOP_END + +#define VI_VFP_VV_LOOP_REDUCTION(BODY16, BODY32, BODY64) \ + VI_CHECK_REDUCTION(false) \ + VI_VFP_COMMON \ + switch(P.VU.vsew) { \ + case e16: {\ + VI_VFP_LOOP_REDUCTION_BASE(16) \ + BODY16; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e16) \ + break; \ + }\ + case e32: {\ + VI_VFP_LOOP_REDUCTION_BASE(32) \ + BODY32; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e32) \ + break; \ + }\ + case e64: {\ + VI_VFP_LOOP_REDUCTION_BASE(64) \ + BODY64; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e64) \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + +#define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY16, BODY32) \ + VI_CHECK_REDUCTION(true) \ + VI_VFP_COMMON \ + require((P.VU.vsew == e16 && p->supports_extension('F')) || \ + (P.VU.vsew == e32 && p->supports_extension('D'))); \ + bool is_active = false; \ + switch(P.VU.vsew) { \ + case e16: {\ + float32_t vd_0 = P.VU.elt(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i(rs2_num, i)); \ + BODY16; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e32) \ + break; \ + }\ + case e32: {\ + float64_t vd_0 = P.VU.elt(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i(rs2_num, i)); \ + BODY32; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e64) \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + +#define VI_VFP_VF_LOOP(BODY16, BODY32, BODY64) \ + VI_CHECK_SSS(false); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float16_t &vd = P.VU.elt(rd_num, i, true); \ + float16_t rs1 = f16(READ_FREG(rs1_num)); \ + float16_t vs2 = P.VU.elt(rs2_num, i); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t rs1 = f32(READ_FREG(rs1_num)); \ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + case e64: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t rs1 = f64(READ_FREG(rs1_num)); \ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + BODY64; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VF; \ + VI_VFP_LOOP_END + +#define VI_VFP_LOOP_CMP(BODY16, BODY32, BODY64, is_vs1) \ + VI_CHECK_MSS(is_vs1); \ + VI_VFP_LOOP_CMP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float16_t vs2 = P.VU.elt(rs2_num, i); \ + float16_t vs1 = P.VU.elt(rs1_num, i); \ + float16_t rs1 = f16(READ_FREG(rs1_num)); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + float32_t vs1 = P.VU.elt(rs1_num, i); \ + float32_t rs1 = f32(READ_FREG(rs1_num)); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + case e64: {\ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + float64_t vs1 = P.VU.elt(rs1_num, i); \ + float64_t rs1 = f64(READ_FREG(rs1_num)); \ + BODY64; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + VI_VFP_LOOP_CMP_END \ + +#define VI_VFP_VF_LOOP_WIDE(BODY16, BODY32) \ + VI_CHECK_DSS(false); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: { \ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = f16_to_f32(P.VU.elt(rs2_num, i)); \ + float32_t rs1 = f16_to_f32(f16(READ_FREG(rs1_num))); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + } \ + case e32: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t vs2 = f32_to_f64(P.VU.elt(rs2_num, i)); \ + float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + + +#define VI_VFP_VV_LOOP_WIDE(BODY16, BODY32) \ + VI_CHECK_DSS(true); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = f16_to_f32(P.VU.elt(rs2_num, i)); \ + float32_t vs1 = f16_to_f32(P.VU.elt(rs1_num, i)); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t vs2 = f32_to_f64(P.VU.elt(rs2_num, i)); \ + float64_t vs1 = f32_to_f64(P.VU.elt(rs1_num, i)); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + +#define VI_VFP_WF_LOOP_WIDE(BODY16, BODY32) \ + VI_CHECK_DDS(false); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + float32_t rs1 = f16_to_f32(f16(READ_FREG(rs1_num))); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + +#define VI_VFP_WV_LOOP_WIDE(BODY16, BODY32) \ + VI_CHECK_DDS(true); \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + float32_t &vd = P.VU.elt(rd_num, i, true); \ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + float32_t vs1 = f16_to_f32(P.VU.elt(rs1_num, i)); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + float64_t &vd = P.VU.elt(rd_num, i, true); \ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + float64_t vs1 = f32_to_f64(P.VU.elt(rs1_num, i)); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + +#define VI_VFP_LOOP_SCALE_BASE \ + require_fp; \ + require_vector(true);\ + require((P.VU.vsew == e8 && p->supports_extension(EXT_ZFH)) || \ + (P.VU.vsew == e16 && p->supports_extension('F')) || \ + (P.VU.vsew == e32 && p->supports_extension('D'))); \ + require(STATE.frm < 0x5);\ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + softfloat_roundingMode = STATE.frm; \ + for (reg_t i=P.VU.vstart; i bus_t::find_device(reg_t addr) it--; return std::make_pair(it->first, it->second); } + +// Type for holding all registered MMIO plugins by name. +using mmio_plugin_map_t = std::map; + +// Simple singleton instance of an mmio_plugin_map_t. +static mmio_plugin_map_t& mmio_plugin_map() +{ + static mmio_plugin_map_t instance; + return instance; +} + +void register_mmio_plugin(const char* name_cstr, + const mmio_plugin_t* mmio_plugin) +{ + std::string name(name_cstr); + if (!mmio_plugin_map().emplace(name, *mmio_plugin).second) { + throw std::runtime_error("Plugin \"" + name + "\" already registered!"); + } +} + +mmio_plugin_device_t::mmio_plugin_device_t(const std::string& name, + const std::string& args) + : plugin(mmio_plugin_map().at(name)), user_data((*plugin.alloc)(args.c_str())) +{ +} + +mmio_plugin_device_t::~mmio_plugin_device_t() +{ + (*plugin.dealloc)(user_data); +} + +bool mmio_plugin_device_t::load(reg_t addr, size_t len, uint8_t* bytes) +{ + return (*plugin.load)(user_data, addr, len, bytes); +} + +bool mmio_plugin_device_t::store(reg_t addr, size_t len, const uint8_t* bytes) +{ + return (*plugin.store)(user_data, addr, len, bytes); +} diff --git a/riscv/devices.h b/riscv/devices.h index 4e4d27ff60..3dd6c66936 100644 --- a/riscv/devices.h +++ b/riscv/devices.h @@ -2,10 +2,12 @@ #define _RISCV_DEVICES_H #include "decode.h" +#include "mmio_plugin.h" #include #include #include #include +#include class processor_t; @@ -62,7 +64,7 @@ class mem_t : public abstract_device_t { class clint_t : public abstract_device_t { public: - clint_t(std::vector&); + clint_t(std::vector&, uint64_t freq_hz, bool real_time); bool load(reg_t addr, size_t len, uint8_t* bytes); bool store(reg_t addr, size_t len, const uint8_t* bytes); size_t size() { return CLINT_SIZE; } @@ -72,8 +74,25 @@ class clint_t : public abstract_device_t { typedef uint64_t mtimecmp_t; typedef uint32_t msip_t; std::vector& procs; + uint64_t freq_hz; + bool real_time; + uint64_t real_time_ref_secs; + uint64_t real_time_ref_usecs; mtime_t mtime; std::vector mtimecmp; }; +class mmio_plugin_device_t : public abstract_device_t { + public: + mmio_plugin_device_t(const std::string& name, const std::string& args); + virtual ~mmio_plugin_device_t() override; + + virtual bool load(reg_t addr, size_t len, uint8_t* bytes) override; + virtual bool store(reg_t addr, size_t len, const uint8_t* bytes) override; + + private: + mmio_plugin_t plugin; + void* user_data; +}; + #endif diff --git a/riscv/disasm.h b/riscv/disasm.h index 94e007a12d..88d0e9b3ad 100644 --- a/riscv/disasm.h +++ b/riscv/disasm.h @@ -10,6 +10,7 @@ extern const char* xpr_name[NXPR]; extern const char* fpr_name[NFPR]; +extern const char* vr_name[NVPR]; extern const char* csr_name(int which); class arg_t @@ -19,18 +20,31 @@ class arg_t virtual ~arg_t() {} }; +// Indicates that the next arg (only) is optional. +// If the result of converting the next arg to a string is "" +// then it will not be printed. +struct : public arg_t { + std::string to_string(insn_t insn) const { return ""; } +} opt; + class disasm_insn_t { public: - disasm_insn_t(const char* name, uint32_t match, uint32_t mask, - const std::vector& args) - : match(match), mask(mask), args(args), name(name) {} + NOINLINE disasm_insn_t(const char* name, uint32_t match, uint32_t mask, + const std::vector& args) + : match(match), mask(mask), args(args), name(strdup(name)) {} + ~disasm_insn_t() { free(const_cast(name)); } bool operator == (insn_t insn) const { return (insn.bits() & mask) == match; } + const char* get_name() const + { + return name; + } + std::string to_string(insn_t insn) const { std::stringstream s; @@ -40,10 +54,21 @@ class disasm_insn_t if (args.size()) { + bool next_arg_optional = false; s << std::string(std::max(1, 8 - len), ' '); - for (size_t i = 0; i < args.size()-1; i++) - s << args[i]->to_string(insn) << ", "; - s << args[args.size()-1]->to_string(insn); + for (size_t i = 0; i < args.size(); i++) { + if (args[i] == &opt) { + next_arg_optional = true; + continue; + } + std::string argString = args[i]->to_string(insn); + if (next_arg_optional) { + next_arg_optional = false; + if (argString.empty()) continue; + } + if (i != 0) s << ", "; + s << argString; + } } return s.str(); } @@ -63,12 +88,15 @@ class disassembler_t public: disassembler_t(int xlen); ~disassembler_t(); + std::string disassemble(insn_t insn) const; + const disasm_insn_t* lookup(insn_t insn) const; + void add_insn(disasm_insn_t* insn); + private: static const int HASH_SIZE = 256; std::vector chain[HASH_SIZE+1]; - const disasm_insn_t* lookup(insn_t insn) const; }; #endif diff --git a/riscv/dts.cc b/riscv/dts.cc index b8a5f9d7c1..56b76e6c50 100644 --- a/riscv/dts.cc +++ b/riscv/dts.cc @@ -1,6 +1,7 @@ // See LICENSE for license details. #include "dts.h" +#include "libfdt.h" #include #include #include @@ -9,6 +10,8 @@ #include std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, + reg_t initrd_start, reg_t initrd_end, + const char* bootargs, std::vector procs, std::vector> mems) { @@ -21,6 +24,25 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, " #size-cells = <2>;\n" " compatible = \"ucbbar,spike-bare-dev\";\n" " model = \"ucbbar,spike-bare\";\n" + " chosen {\n"; + if (initrd_start < initrd_end) { + s << " linux,initrd-start = <" << (size_t)initrd_start << ">;\n" + " linux,initrd-end = <" << (size_t)initrd_end << ">;\n"; + if (!bootargs) + bootargs = "root=/dev/ram console=hvc0 earlycon=sbi"; + } else { + if (!bootargs) + bootargs = "console=hvc0 earlycon=sbi"; + } + s << " bootargs = \""; + for (size_t i = 0; i < strlen(bootargs); i++) { + if (bootargs[i] == '"') + s << '\\' << bootargs[i]; + else + s << bootargs[i]; + } + s << "\";\n"; + s << " };\n" " cpus {\n" " #address-cells = <1>;\n" " #size-cells = <0>;\n" @@ -33,6 +55,8 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, " compatible = \"riscv\";\n" " riscv,isa = \"" << procs[i]->get_isa_string() << "\";\n" " mmu-type = \"riscv," << (procs[i]->get_max_xlen() <= 32 ? "sv32" : "sv48") << "\";\n" + " riscv,pmpregions = <16>;\n" + " riscv,pmpgranularity = <4>;\n" " clock-frequency = <" << cpu_hz << ">;\n" " CPU" << i << "_intc: interrupt-controller {\n" " #interrupt-cells = <1>;\n" @@ -47,7 +71,7 @@ std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, " memory@" << m.first << " {\n" " device_type = \"memory\";\n" " reg = <0x" << (m.first >> 32) << " 0x" << (m.first & (uint32_t)-1) << - " 0x" << (m.second->size() >> 32) << " 0x" << (m.second->size() & (uint32_t)-1) << ">;\n" + " 0x" << (m.second->size() >> 16 >> 16) << " 0x" << (m.second->size() & (uint32_t)-1) << ">;\n" " };\n"; } s << " soc {\n" @@ -80,6 +104,7 @@ std::string dts_compile(const std::string& dts) int dts_pipe[2]; pid_t dts_pid; + fflush(NULL); // flush stdout/stderr before forking if (pipe(dts_pipe) != 0 || (dts_pid = fork()) < 0) { std::cerr << "Failed to fork dts child: " << strerror(errno) << std::endl; exit(1); @@ -116,7 +141,7 @@ std::string dts_compile(const std::string& dts) close(dts_pipe[1]); close(dtb_pipe[0]); close(dtb_pipe[1]); - execl(DTC, DTC, "-O", "dtb", 0); + execlp(DTC, DTC, "-O", "dtb", 0); std::cerr << "Failed to run " DTC ": " << strerror(errno) << std::endl; exit(1); } @@ -154,3 +179,97 @@ std::string dts_compile(const std::string& dts) return dtb.str(); } + + +static int fdt_get_node_addr_size(void *fdt, int node, reg_t *addr, + unsigned long *size, const char *field) +{ + int parent, len, i; + int cell_addr, cell_size; + const fdt32_t *prop_addr, *prop_size; + uint64_t temp = 0; + + parent = fdt_parent_offset(fdt, node); + if (parent < 0) + return parent; + + cell_addr = fdt_address_cells(fdt, parent); + if (cell_addr < 1) + return -ENODEV; + + cell_size = fdt_size_cells(fdt, parent); + if (cell_size < 0) + return -ENODEV; + + if (!field) + return -ENODEV; + + prop_addr = (fdt32_t *)fdt_getprop(fdt, node, field, &len); + if (!prop_addr) + return -ENODEV; + prop_size = prop_addr + cell_addr; + + if (addr) { + for (i = 0; i < cell_addr; i++) + temp = (temp << 32) | fdt32_to_cpu(*prop_addr++); + *addr = temp; + } + temp = 0; + + if (size) { + for (i = 0; i < cell_size; i++) + temp = (temp << 32) | fdt32_to_cpu(*prop_size++); + *size = temp; + } + + return 0; +} + +int fdt_parse_clint(void *fdt, reg_t *clint_addr, + const char *compatible) +{ + int nodeoffset, rc; + + nodeoffset = fdt_node_offset_by_compatible(fdt, -1, compatible); + if (nodeoffset < 0) + return nodeoffset; + + rc = fdt_get_node_addr_size(fdt, nodeoffset, clint_addr, NULL, "reg"); + if (rc < 0 || !clint_addr) + return -ENODEV; + + return 0; +} + +int fdt_parse_pmp_num(void *fdt, reg_t *pmp_num, const char *compatible) +{ + int nodeoffset, rc; + + nodeoffset = fdt_node_offset_by_compatible(fdt, -1, compatible); + if (nodeoffset < 0) + return nodeoffset; + + rc = fdt_get_node_addr_size(fdt, nodeoffset, pmp_num, NULL, + "riscv,pmpregions"); + if (rc < 0 || !pmp_num) + return -ENODEV; + + return 0; +} + +int fdt_parse_pmp_alignment(void *fdt, reg_t *pmp_align, + const char *compatible) +{ + int nodeoffset, rc; + + nodeoffset = fdt_node_offset_by_compatible(fdt, -1, compatible); + if (nodeoffset < 0) + return nodeoffset; + + rc = fdt_get_node_addr_size(fdt, nodeoffset, pmp_align, NULL, + "riscv,pmpgranularity"); + if (rc < 0 || !pmp_align) + return -ENODEV; + + return 0; +} diff --git a/riscv/dts.h b/riscv/dts.h index ec0aa6161b..1f01e0f8c8 100644 --- a/riscv/dts.h +++ b/riscv/dts.h @@ -7,9 +7,17 @@ #include std::string make_dts(size_t insns_per_rtc_tick, size_t cpu_hz, + reg_t initrd_start, reg_t initrd_end, + const char* bootargs, std::vector procs, std::vector> mems); std::string dts_compile(const std::string& dts); +int fdt_parse_clint(void *fdt, reg_t *clint_addr, + const char *compatible); +int fdt_parse_pmp_num(void *fdt, reg_t *pmp_num, + const char *compatible); +int fdt_parse_pmp_alignment(void *fdt, reg_t *pmp_align, + const char *compatible); #endif diff --git a/riscv/encoding.h b/riscv/encoding.h deleted file mode 100644 index c109ce189d..0000000000 --- a/riscv/encoding.h +++ /dev/null @@ -1,1471 +0,0 @@ -// See LICENSE for license details. - -#ifndef RISCV_CSR_ENCODING_H -#define RISCV_CSR_ENCODING_H - -#define MSTATUS_UIE 0x00000001 -#define MSTATUS_SIE 0x00000002 -#define MSTATUS_HIE 0x00000004 -#define MSTATUS_MIE 0x00000008 -#define MSTATUS_UPIE 0x00000010 -#define MSTATUS_SPIE 0x00000020 -#define MSTATUS_HPIE 0x00000040 -#define MSTATUS_MPIE 0x00000080 -#define MSTATUS_SPP 0x00000100 -#define MSTATUS_HPP 0x00000600 -#define MSTATUS_MPP 0x00001800 -#define MSTATUS_FS 0x00006000 -#define MSTATUS_XS 0x00018000 -#define MSTATUS_MPRV 0x00020000 -#define MSTATUS_SUM 0x00040000 -#define MSTATUS_MXR 0x00080000 -#define MSTATUS_TVM 0x00100000 -#define MSTATUS_TW 0x00200000 -#define MSTATUS_TSR 0x00400000 -#define MSTATUS32_SD 0x80000000 -#define MSTATUS_UXL 0x0000000300000000 -#define MSTATUS_SXL 0x0000000C00000000 -#define MSTATUS64_SD 0x8000000000000000 - -#define SSTATUS_UIE 0x00000001 -#define SSTATUS_SIE 0x00000002 -#define SSTATUS_UPIE 0x00000010 -#define SSTATUS_SPIE 0x00000020 -#define SSTATUS_SPP 0x00000100 -#define SSTATUS_FS 0x00006000 -#define SSTATUS_XS 0x00018000 -#define SSTATUS_SUM 0x00040000 -#define SSTATUS_MXR 0x00080000 -#define SSTATUS32_SD 0x80000000 -#define SSTATUS_UXL 0x0000000300000000 -#define SSTATUS64_SD 0x8000000000000000 - -#define DCSR_XDEBUGVER (3U<<30) -#define DCSR_NDRESET (1<<29) -#define DCSR_FULLRESET (1<<28) -#define DCSR_EBREAKM (1<<15) -#define DCSR_EBREAKH (1<<14) -#define DCSR_EBREAKS (1<<13) -#define DCSR_EBREAKU (1<<12) -#define DCSR_STOPCYCLE (1<<10) -#define DCSR_STOPTIME (1<<9) -#define DCSR_CAUSE (7<<6) -#define DCSR_DEBUGINT (1<<5) -#define DCSR_HALT (1<<3) -#define DCSR_STEP (1<<2) -#define DCSR_PRV (3<<0) - -#define DCSR_CAUSE_NONE 0 -#define DCSR_CAUSE_SWBP 1 -#define DCSR_CAUSE_HWBP 2 -#define DCSR_CAUSE_DEBUGINT 3 -#define DCSR_CAUSE_STEP 4 -#define DCSR_CAUSE_HALT 5 - -#define MCONTROL_TYPE(xlen) (0xfULL<<((xlen)-4)) -#define MCONTROL_DMODE(xlen) (1ULL<<((xlen)-5)) -#define MCONTROL_MASKMAX(xlen) (0x3fULL<<((xlen)-11)) - -#define MCONTROL_SELECT (1<<19) -#define MCONTROL_TIMING (1<<18) -#define MCONTROL_ACTION (0x3f<<12) -#define MCONTROL_CHAIN (1<<11) -#define MCONTROL_MATCH (0xf<<7) -#define MCONTROL_M (1<<6) -#define MCONTROL_H (1<<5) -#define MCONTROL_S (1<<4) -#define MCONTROL_U (1<<3) -#define MCONTROL_EXECUTE (1<<2) -#define MCONTROL_STORE (1<<1) -#define MCONTROL_LOAD (1<<0) - -#define MCONTROL_TYPE_NONE 0 -#define MCONTROL_TYPE_MATCH 2 - -#define MCONTROL_ACTION_DEBUG_EXCEPTION 0 -#define MCONTROL_ACTION_DEBUG_MODE 1 -#define MCONTROL_ACTION_TRACE_START 2 -#define MCONTROL_ACTION_TRACE_STOP 3 -#define MCONTROL_ACTION_TRACE_EMIT 4 - -#define MCONTROL_MATCH_EQUAL 0 -#define MCONTROL_MATCH_NAPOT 1 -#define MCONTROL_MATCH_GE 2 -#define MCONTROL_MATCH_LT 3 -#define MCONTROL_MATCH_MASK_LOW 4 -#define MCONTROL_MATCH_MASK_HIGH 5 - -#define MIP_SSIP (1 << IRQ_S_SOFT) -#define MIP_HSIP (1 << IRQ_H_SOFT) -#define MIP_MSIP (1 << IRQ_M_SOFT) -#define MIP_STIP (1 << IRQ_S_TIMER) -#define MIP_HTIP (1 << IRQ_H_TIMER) -#define MIP_MTIP (1 << IRQ_M_TIMER) -#define MIP_SEIP (1 << IRQ_S_EXT) -#define MIP_HEIP (1 << IRQ_H_EXT) -#define MIP_MEIP (1 << IRQ_M_EXT) - -#define SIP_SSIP MIP_SSIP -#define SIP_STIP MIP_STIP - -#define PRV_U 0 -#define PRV_S 1 -#define PRV_H 2 -#define PRV_M 3 - -#define SATP32_MODE 0x80000000 -#define SATP32_ASID 0x7FC00000 -#define SATP32_PPN 0x003FFFFF -#define SATP64_MODE 0xF000000000000000 -#define SATP64_ASID 0x0FFFF00000000000 -#define SATP64_PPN 0x00000FFFFFFFFFFF - -#define SATP_MODE_OFF 0 -#define SATP_MODE_SV32 1 -#define SATP_MODE_SV39 8 -#define SATP_MODE_SV48 9 -#define SATP_MODE_SV57 10 -#define SATP_MODE_SV64 11 - -#define PMP_R 0x01 -#define PMP_W 0x02 -#define PMP_X 0x04 -#define PMP_A 0x18 -#define PMP_L 0x80 -#define PMP_SHIFT 2 - -#define PMP_TOR 0x08 -#define PMP_NA4 0x10 -#define PMP_NAPOT 0x18 - -#define IRQ_S_SOFT 1 -#define IRQ_H_SOFT 2 -#define IRQ_M_SOFT 3 -#define IRQ_S_TIMER 5 -#define IRQ_H_TIMER 6 -#define IRQ_M_TIMER 7 -#define IRQ_S_EXT 9 -#define IRQ_H_EXT 10 -#define IRQ_M_EXT 11 -#define IRQ_COP 12 -#define IRQ_HOST 13 - -#define DEFAULT_RSTVEC 0x00001000 -#define CLINT_BASE 0x02000000 -#define CLINT_SIZE 0x000c0000 -#define EXT_IO_BASE 0x40000000 -#define DRAM_BASE 0x80000000 - -// page table entry (PTE) fields -#define PTE_V 0x001 // Valid -#define PTE_R 0x002 // Read -#define PTE_W 0x004 // Write -#define PTE_X 0x008 // Execute -#define PTE_U 0x010 // User -#define PTE_G 0x020 // Global -#define PTE_A 0x040 // Accessed -#define PTE_D 0x080 // Dirty -#define PTE_SOFT 0x300 // Reserved for Software - -#define PTE_PPN_SHIFT 10 - -#define PTE_TABLE(PTE) (((PTE) & (PTE_V | PTE_R | PTE_W | PTE_X)) == PTE_V) - -#ifdef __riscv - -#if __riscv_xlen == 64 -# define MSTATUS_SD MSTATUS64_SD -# define SSTATUS_SD SSTATUS64_SD -# define RISCV_PGLEVEL_BITS 9 -# define SATP_MODE SATP64_MODE -#else -# define MSTATUS_SD MSTATUS32_SD -# define SSTATUS_SD SSTATUS32_SD -# define RISCV_PGLEVEL_BITS 10 -# define SATP_MODE SATP32_MODE -#endif -#define RISCV_PGSHIFT 12 -#define RISCV_PGSIZE (1 << RISCV_PGSHIFT) - -#ifndef __ASSEMBLER__ - -#ifdef __GNUC__ - -#define read_csr(reg) ({ unsigned long __tmp; \ - asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ - __tmp; }) - -#define write_csr(reg, val) ({ \ - asm volatile ("csrw " #reg ", %0" :: "rK"(val)); }) - -#define swap_csr(reg, val) ({ unsigned long __tmp; \ - asm volatile ("csrrw %0, " #reg ", %1" : "=r"(__tmp) : "rK"(val)); \ - __tmp; }) - -#define set_csr(reg, bit) ({ unsigned long __tmp; \ - asm volatile ("csrrs %0, " #reg ", %1" : "=r"(__tmp) : "rK"(bit)); \ - __tmp; }) - -#define clear_csr(reg, bit) ({ unsigned long __tmp; \ - asm volatile ("csrrc %0, " #reg ", %1" : "=r"(__tmp) : "rK"(bit)); \ - __tmp; }) - -#define rdtime() read_csr(time) -#define rdcycle() read_csr(cycle) -#define rdinstret() read_csr(instret) - -#endif - -#endif - -#endif - -#endif -/* Automatically generated by parse-opcodes. */ -#ifndef RISCV_ENCODING_H -#define RISCV_ENCODING_H -#define MATCH_BEQ 0x63 -#define MASK_BEQ 0x707f -#define MATCH_BNE 0x1063 -#define MASK_BNE 0x707f -#define MATCH_BLT 0x4063 -#define MASK_BLT 0x707f -#define MATCH_BGE 0x5063 -#define MASK_BGE 0x707f -#define MATCH_BLTU 0x6063 -#define MASK_BLTU 0x707f -#define MATCH_BGEU 0x7063 -#define MASK_BGEU 0x707f -#define MATCH_JALR 0x67 -#define MASK_JALR 0x707f -#define MATCH_JAL 0x6f -#define MASK_JAL 0x7f -#define MATCH_LUI 0x37 -#define MASK_LUI 0x7f -#define MATCH_AUIPC 0x17 -#define MASK_AUIPC 0x7f -#define MATCH_ADDI 0x13 -#define MASK_ADDI 0x707f -#define MATCH_SLLI 0x1013 -#define MASK_SLLI 0xfc00707f -#define MATCH_SLTI 0x2013 -#define MASK_SLTI 0x707f -#define MATCH_SLTIU 0x3013 -#define MASK_SLTIU 0x707f -#define MATCH_XORI 0x4013 -#define MASK_XORI 0x707f -#define MATCH_SRLI 0x5013 -#define MASK_SRLI 0xfc00707f -#define MATCH_SRAI 0x40005013 -#define MASK_SRAI 0xfc00707f -#define MATCH_ORI 0x6013 -#define MASK_ORI 0x707f -#define MATCH_ANDI 0x7013 -#define MASK_ANDI 0x707f -#define MATCH_ADD 0x33 -#define MASK_ADD 0xfe00707f -#define MATCH_SUB 0x40000033 -#define MASK_SUB 0xfe00707f -#define MATCH_SLL 0x1033 -#define MASK_SLL 0xfe00707f -#define MATCH_SLT 0x2033 -#define MASK_SLT 0xfe00707f -#define MATCH_SLTU 0x3033 -#define MASK_SLTU 0xfe00707f -#define MATCH_XOR 0x4033 -#define MASK_XOR 0xfe00707f -#define MATCH_SRL 0x5033 -#define MASK_SRL 0xfe00707f -#define MATCH_SRA 0x40005033 -#define MASK_SRA 0xfe00707f -#define MATCH_OR 0x6033 -#define MASK_OR 0xfe00707f -#define MATCH_AND 0x7033 -#define MASK_AND 0xfe00707f -#define MATCH_ADDIW 0x1b -#define MASK_ADDIW 0x707f -#define MATCH_SLLIW 0x101b -#define MASK_SLLIW 0xfe00707f -#define MATCH_SRLIW 0x501b -#define MASK_SRLIW 0xfe00707f -#define MATCH_SRAIW 0x4000501b -#define MASK_SRAIW 0xfe00707f -#define MATCH_ADDW 0x3b -#define MASK_ADDW 0xfe00707f -#define MATCH_SUBW 0x4000003b -#define MASK_SUBW 0xfe00707f -#define MATCH_SLLW 0x103b -#define MASK_SLLW 0xfe00707f -#define MATCH_SRLW 0x503b -#define MASK_SRLW 0xfe00707f -#define MATCH_SRAW 0x4000503b -#define MASK_SRAW 0xfe00707f -#define MATCH_LB 0x3 -#define MASK_LB 0x707f -#define MATCH_LH 0x1003 -#define MASK_LH 0x707f -#define MATCH_LW 0x2003 -#define MASK_LW 0x707f -#define MATCH_LD 0x3003 -#define MASK_LD 0x707f -#define MATCH_LBU 0x4003 -#define MASK_LBU 0x707f -#define MATCH_LHU 0x5003 -#define MASK_LHU 0x707f -#define MATCH_LWU 0x6003 -#define MASK_LWU 0x707f -#define MATCH_SB 0x23 -#define MASK_SB 0x707f -#define MATCH_SH 0x1023 -#define MASK_SH 0x707f -#define MATCH_SW 0x2023 -#define MASK_SW 0x707f -#define MATCH_SD 0x3023 -#define MASK_SD 0x707f -#define MATCH_FENCE 0xf -#define MASK_FENCE 0x707f -#define MATCH_FENCE_I 0x100f -#define MASK_FENCE_I 0x707f -#define MATCH_MUL 0x2000033 -#define MASK_MUL 0xfe00707f -#define MATCH_MULH 0x2001033 -#define MASK_MULH 0xfe00707f -#define MATCH_MULHSU 0x2002033 -#define MASK_MULHSU 0xfe00707f -#define MATCH_MULHU 0x2003033 -#define MASK_MULHU 0xfe00707f -#define MATCH_DIV 0x2004033 -#define MASK_DIV 0xfe00707f -#define MATCH_DIVU 0x2005033 -#define MASK_DIVU 0xfe00707f -#define MATCH_REM 0x2006033 -#define MASK_REM 0xfe00707f -#define MATCH_REMU 0x2007033 -#define MASK_REMU 0xfe00707f -#define MATCH_MULW 0x200003b -#define MASK_MULW 0xfe00707f -#define MATCH_DIVW 0x200403b -#define MASK_DIVW 0xfe00707f -#define MATCH_DIVUW 0x200503b -#define MASK_DIVUW 0xfe00707f -#define MATCH_REMW 0x200603b -#define MASK_REMW 0xfe00707f -#define MATCH_REMUW 0x200703b -#define MASK_REMUW 0xfe00707f -#define MATCH_AMOADD_W 0x202f -#define MASK_AMOADD_W 0xf800707f -#define MATCH_AMOXOR_W 0x2000202f -#define MASK_AMOXOR_W 0xf800707f -#define MATCH_AMOOR_W 0x4000202f -#define MASK_AMOOR_W 0xf800707f -#define MATCH_AMOAND_W 0x6000202f -#define MASK_AMOAND_W 0xf800707f -#define MATCH_AMOMIN_W 0x8000202f -#define MASK_AMOMIN_W 0xf800707f -#define MATCH_AMOMAX_W 0xa000202f -#define MASK_AMOMAX_W 0xf800707f -#define MATCH_AMOMINU_W 0xc000202f -#define MASK_AMOMINU_W 0xf800707f -#define MATCH_AMOMAXU_W 0xe000202f -#define MASK_AMOMAXU_W 0xf800707f -#define MATCH_AMOSWAP_W 0x800202f -#define MASK_AMOSWAP_W 0xf800707f -#define MATCH_LR_W 0x1000202f -#define MASK_LR_W 0xf9f0707f -#define MATCH_SC_W 0x1800202f -#define MASK_SC_W 0xf800707f -#define MATCH_AMOADD_D 0x302f -#define MASK_AMOADD_D 0xf800707f -#define MATCH_AMOXOR_D 0x2000302f -#define MASK_AMOXOR_D 0xf800707f -#define MATCH_AMOOR_D 0x4000302f -#define MASK_AMOOR_D 0xf800707f -#define MATCH_AMOAND_D 0x6000302f -#define MASK_AMOAND_D 0xf800707f -#define MATCH_AMOMIN_D 0x8000302f -#define MASK_AMOMIN_D 0xf800707f -#define MATCH_AMOMAX_D 0xa000302f -#define MASK_AMOMAX_D 0xf800707f -#define MATCH_AMOMINU_D 0xc000302f -#define MASK_AMOMINU_D 0xf800707f -#define MATCH_AMOMAXU_D 0xe000302f -#define MASK_AMOMAXU_D 0xf800707f -#define MATCH_AMOSWAP_D 0x800302f -#define MASK_AMOSWAP_D 0xf800707f -#define MATCH_LR_D 0x1000302f -#define MASK_LR_D 0xf9f0707f -#define MATCH_SC_D 0x1800302f -#define MASK_SC_D 0xf800707f -#define MATCH_ECALL 0x73 -#define MASK_ECALL 0xffffffff -#define MATCH_EBREAK 0x100073 -#define MASK_EBREAK 0xffffffff -#define MATCH_URET 0x200073 -#define MASK_URET 0xffffffff -#define MATCH_SRET 0x10200073 -#define MASK_SRET 0xffffffff -#define MATCH_MRET 0x30200073 -#define MASK_MRET 0xffffffff -#define MATCH_DRET 0x7b200073 -#define MASK_DRET 0xffffffff -#define MATCH_SFENCE_VMA 0x12000073 -#define MASK_SFENCE_VMA 0xfe007fff -#define MATCH_WFI 0x10500073 -#define MASK_WFI 0xffffffff -#define MATCH_CSRRW 0x1073 -#define MASK_CSRRW 0x707f -#define MATCH_CSRRS 0x2073 -#define MASK_CSRRS 0x707f -#define MATCH_CSRRC 0x3073 -#define MASK_CSRRC 0x707f -#define MATCH_CSRRWI 0x5073 -#define MASK_CSRRWI 0x707f -#define MATCH_CSRRSI 0x6073 -#define MASK_CSRRSI 0x707f -#define MATCH_CSRRCI 0x7073 -#define MASK_CSRRCI 0x707f -#define MATCH_FADD_S 0x53 -#define MASK_FADD_S 0xfe00007f -#define MATCH_FSUB_S 0x8000053 -#define MASK_FSUB_S 0xfe00007f -#define MATCH_FMUL_S 0x10000053 -#define MASK_FMUL_S 0xfe00007f -#define MATCH_FDIV_S 0x18000053 -#define MASK_FDIV_S 0xfe00007f -#define MATCH_FSGNJ_S 0x20000053 -#define MASK_FSGNJ_S 0xfe00707f -#define MATCH_FSGNJN_S 0x20001053 -#define MASK_FSGNJN_S 0xfe00707f -#define MATCH_FSGNJX_S 0x20002053 -#define MASK_FSGNJX_S 0xfe00707f -#define MATCH_FMIN_S 0x28000053 -#define MASK_FMIN_S 0xfe00707f -#define MATCH_FMAX_S 0x28001053 -#define MASK_FMAX_S 0xfe00707f -#define MATCH_FSQRT_S 0x58000053 -#define MASK_FSQRT_S 0xfff0007f -#define MATCH_FADD_D 0x2000053 -#define MASK_FADD_D 0xfe00007f -#define MATCH_FSUB_D 0xa000053 -#define MASK_FSUB_D 0xfe00007f -#define MATCH_FMUL_D 0x12000053 -#define MASK_FMUL_D 0xfe00007f -#define MATCH_FDIV_D 0x1a000053 -#define MASK_FDIV_D 0xfe00007f -#define MATCH_FSGNJ_D 0x22000053 -#define MASK_FSGNJ_D 0xfe00707f -#define MATCH_FSGNJN_D 0x22001053 -#define MASK_FSGNJN_D 0xfe00707f -#define MATCH_FSGNJX_D 0x22002053 -#define MASK_FSGNJX_D 0xfe00707f -#define MATCH_FMIN_D 0x2a000053 -#define MASK_FMIN_D 0xfe00707f -#define MATCH_FMAX_D 0x2a001053 -#define MASK_FMAX_D 0xfe00707f -#define MATCH_FCVT_S_D 0x40100053 -#define MASK_FCVT_S_D 0xfff0007f -#define MATCH_FCVT_D_S 0x42000053 -#define MASK_FCVT_D_S 0xfff0007f -#define MATCH_FSQRT_D 0x5a000053 -#define MASK_FSQRT_D 0xfff0007f -#define MATCH_FADD_Q 0x6000053 -#define MASK_FADD_Q 0xfe00007f -#define MATCH_FSUB_Q 0xe000053 -#define MASK_FSUB_Q 0xfe00007f -#define MATCH_FMUL_Q 0x16000053 -#define MASK_FMUL_Q 0xfe00007f -#define MATCH_FDIV_Q 0x1e000053 -#define MASK_FDIV_Q 0xfe00007f -#define MATCH_FSGNJ_Q 0x26000053 -#define MASK_FSGNJ_Q 0xfe00707f -#define MATCH_FSGNJN_Q 0x26001053 -#define MASK_FSGNJN_Q 0xfe00707f -#define MATCH_FSGNJX_Q 0x26002053 -#define MASK_FSGNJX_Q 0xfe00707f -#define MATCH_FMIN_Q 0x2e000053 -#define MASK_FMIN_Q 0xfe00707f -#define MATCH_FMAX_Q 0x2e001053 -#define MASK_FMAX_Q 0xfe00707f -#define MATCH_FCVT_S_Q 0x40300053 -#define MASK_FCVT_S_Q 0xfff0007f -#define MATCH_FCVT_Q_S 0x46000053 -#define MASK_FCVT_Q_S 0xfff0007f -#define MATCH_FCVT_D_Q 0x42300053 -#define MASK_FCVT_D_Q 0xfff0007f -#define MATCH_FCVT_Q_D 0x46100053 -#define MASK_FCVT_Q_D 0xfff0007f -#define MATCH_FSQRT_Q 0x5e000053 -#define MASK_FSQRT_Q 0xfff0007f -#define MATCH_FLE_S 0xa0000053 -#define MASK_FLE_S 0xfe00707f -#define MATCH_FLT_S 0xa0001053 -#define MASK_FLT_S 0xfe00707f -#define MATCH_FEQ_S 0xa0002053 -#define MASK_FEQ_S 0xfe00707f -#define MATCH_FLE_D 0xa2000053 -#define MASK_FLE_D 0xfe00707f -#define MATCH_FLT_D 0xa2001053 -#define MASK_FLT_D 0xfe00707f -#define MATCH_FEQ_D 0xa2002053 -#define MASK_FEQ_D 0xfe00707f -#define MATCH_FLE_Q 0xa6000053 -#define MASK_FLE_Q 0xfe00707f -#define MATCH_FLT_Q 0xa6001053 -#define MASK_FLT_Q 0xfe00707f -#define MATCH_FEQ_Q 0xa6002053 -#define MASK_FEQ_Q 0xfe00707f -#define MATCH_FCVT_W_S 0xc0000053 -#define MASK_FCVT_W_S 0xfff0007f -#define MATCH_FCVT_WU_S 0xc0100053 -#define MASK_FCVT_WU_S 0xfff0007f -#define MATCH_FCVT_L_S 0xc0200053 -#define MASK_FCVT_L_S 0xfff0007f -#define MATCH_FCVT_LU_S 0xc0300053 -#define MASK_FCVT_LU_S 0xfff0007f -#define MATCH_FMV_X_W 0xe0000053 -#define MASK_FMV_X_W 0xfff0707f -#define MATCH_FCLASS_S 0xe0001053 -#define MASK_FCLASS_S 0xfff0707f -#define MATCH_FCVT_W_D 0xc2000053 -#define MASK_FCVT_W_D 0xfff0007f -#define MATCH_FCVT_WU_D 0xc2100053 -#define MASK_FCVT_WU_D 0xfff0007f -#define MATCH_FCVT_L_D 0xc2200053 -#define MASK_FCVT_L_D 0xfff0007f -#define MATCH_FCVT_LU_D 0xc2300053 -#define MASK_FCVT_LU_D 0xfff0007f -#define MATCH_FMV_X_D 0xe2000053 -#define MASK_FMV_X_D 0xfff0707f -#define MATCH_FCLASS_D 0xe2001053 -#define MASK_FCLASS_D 0xfff0707f -#define MATCH_FCVT_W_Q 0xc6000053 -#define MASK_FCVT_W_Q 0xfff0007f -#define MATCH_FCVT_WU_Q 0xc6100053 -#define MASK_FCVT_WU_Q 0xfff0007f -#define MATCH_FCVT_L_Q 0xc6200053 -#define MASK_FCVT_L_Q 0xfff0007f -#define MATCH_FCVT_LU_Q 0xc6300053 -#define MASK_FCVT_LU_Q 0xfff0007f -#define MATCH_FMV_X_Q 0xe6000053 -#define MASK_FMV_X_Q 0xfff0707f -#define MATCH_FCLASS_Q 0xe6001053 -#define MASK_FCLASS_Q 0xfff0707f -#define MATCH_FCVT_S_W 0xd0000053 -#define MASK_FCVT_S_W 0xfff0007f -#define MATCH_FCVT_S_WU 0xd0100053 -#define MASK_FCVT_S_WU 0xfff0007f -#define MATCH_FCVT_S_L 0xd0200053 -#define MASK_FCVT_S_L 0xfff0007f -#define MATCH_FCVT_S_LU 0xd0300053 -#define MASK_FCVT_S_LU 0xfff0007f -#define MATCH_FMV_W_X 0xf0000053 -#define MASK_FMV_W_X 0xfff0707f -#define MATCH_FCVT_D_W 0xd2000053 -#define MASK_FCVT_D_W 0xfff0007f -#define MATCH_FCVT_D_WU 0xd2100053 -#define MASK_FCVT_D_WU 0xfff0007f -#define MATCH_FCVT_D_L 0xd2200053 -#define MASK_FCVT_D_L 0xfff0007f -#define MATCH_FCVT_D_LU 0xd2300053 -#define MASK_FCVT_D_LU 0xfff0007f -#define MATCH_FMV_D_X 0xf2000053 -#define MASK_FMV_D_X 0xfff0707f -#define MATCH_FCVT_Q_W 0xd6000053 -#define MASK_FCVT_Q_W 0xfff0007f -#define MATCH_FCVT_Q_WU 0xd6100053 -#define MASK_FCVT_Q_WU 0xfff0007f -#define MATCH_FCVT_Q_L 0xd6200053 -#define MASK_FCVT_Q_L 0xfff0007f -#define MATCH_FCVT_Q_LU 0xd6300053 -#define MASK_FCVT_Q_LU 0xfff0007f -#define MATCH_FMV_Q_X 0xf6000053 -#define MASK_FMV_Q_X 0xfff0707f -#define MATCH_FLW 0x2007 -#define MASK_FLW 0x707f -#define MATCH_FLD 0x3007 -#define MASK_FLD 0x707f -#define MATCH_FLQ 0x4007 -#define MASK_FLQ 0x707f -#define MATCH_FSW 0x2027 -#define MASK_FSW 0x707f -#define MATCH_FSD 0x3027 -#define MASK_FSD 0x707f -#define MATCH_FSQ 0x4027 -#define MASK_FSQ 0x707f -#define MATCH_FMADD_S 0x43 -#define MASK_FMADD_S 0x600007f -#define MATCH_FMSUB_S 0x47 -#define MASK_FMSUB_S 0x600007f -#define MATCH_FNMSUB_S 0x4b -#define MASK_FNMSUB_S 0x600007f -#define MATCH_FNMADD_S 0x4f -#define MASK_FNMADD_S 0x600007f -#define MATCH_FMADD_D 0x2000043 -#define MASK_FMADD_D 0x600007f -#define MATCH_FMSUB_D 0x2000047 -#define MASK_FMSUB_D 0x600007f -#define MATCH_FNMSUB_D 0x200004b -#define MASK_FNMSUB_D 0x600007f -#define MATCH_FNMADD_D 0x200004f -#define MASK_FNMADD_D 0x600007f -#define MATCH_FMADD_Q 0x6000043 -#define MASK_FMADD_Q 0x600007f -#define MATCH_FMSUB_Q 0x6000047 -#define MASK_FMSUB_Q 0x600007f -#define MATCH_FNMSUB_Q 0x600004b -#define MASK_FNMSUB_Q 0x600007f -#define MATCH_FNMADD_Q 0x600004f -#define MASK_FNMADD_Q 0x600007f -#define MATCH_C_NOP 0x1 -#define MASK_C_NOP 0xffff -#define MATCH_C_ADDI16SP 0x6101 -#define MASK_C_ADDI16SP 0xef83 -#define MATCH_C_JR 0x8002 -#define MASK_C_JR 0xf07f -#define MATCH_C_JALR 0x9002 -#define MASK_C_JALR 0xf07f -#define MATCH_C_EBREAK 0x9002 -#define MASK_C_EBREAK 0xffff -#define MATCH_C_LD 0x6000 -#define MASK_C_LD 0xe003 -#define MATCH_C_SD 0xe000 -#define MASK_C_SD 0xe003 -#define MATCH_C_ADDIW 0x2001 -#define MASK_C_ADDIW 0xe003 -#define MATCH_C_LDSP 0x6002 -#define MASK_C_LDSP 0xe003 -#define MATCH_C_SDSP 0xe002 -#define MASK_C_SDSP 0xe003 -#define MATCH_C_ADDI4SPN 0x0 -#define MASK_C_ADDI4SPN 0xe003 -#define MATCH_C_FLD 0x2000 -#define MASK_C_FLD 0xe003 -#define MATCH_C_LW 0x4000 -#define MASK_C_LW 0xe003 -#define MATCH_C_FLW 0x6000 -#define MASK_C_FLW 0xe003 -#define MATCH_C_FSD 0xa000 -#define MASK_C_FSD 0xe003 -#define MATCH_C_SW 0xc000 -#define MASK_C_SW 0xe003 -#define MATCH_C_FSW 0xe000 -#define MASK_C_FSW 0xe003 -#define MATCH_C_ADDI 0x1 -#define MASK_C_ADDI 0xe003 -#define MATCH_C_JAL 0x2001 -#define MASK_C_JAL 0xe003 -#define MATCH_C_LI 0x4001 -#define MASK_C_LI 0xe003 -#define MATCH_C_LUI 0x6001 -#define MASK_C_LUI 0xe003 -#define MATCH_C_SRLI 0x8001 -#define MASK_C_SRLI 0xec03 -#define MATCH_C_SRAI 0x8401 -#define MASK_C_SRAI 0xec03 -#define MATCH_C_ANDI 0x8801 -#define MASK_C_ANDI 0xec03 -#define MATCH_C_SUB 0x8c01 -#define MASK_C_SUB 0xfc63 -#define MATCH_C_XOR 0x8c21 -#define MASK_C_XOR 0xfc63 -#define MATCH_C_OR 0x8c41 -#define MASK_C_OR 0xfc63 -#define MATCH_C_AND 0x8c61 -#define MASK_C_AND 0xfc63 -#define MATCH_C_SUBW 0x9c01 -#define MASK_C_SUBW 0xfc63 -#define MATCH_C_ADDW 0x9c21 -#define MASK_C_ADDW 0xfc63 -#define MATCH_C_J 0xa001 -#define MASK_C_J 0xe003 -#define MATCH_C_BEQZ 0xc001 -#define MASK_C_BEQZ 0xe003 -#define MATCH_C_BNEZ 0xe001 -#define MASK_C_BNEZ 0xe003 -#define MATCH_C_SLLI 0x2 -#define MASK_C_SLLI 0xe003 -#define MATCH_C_FLDSP 0x2002 -#define MASK_C_FLDSP 0xe003 -#define MATCH_C_LWSP 0x4002 -#define MASK_C_LWSP 0xe003 -#define MATCH_C_FLWSP 0x6002 -#define MASK_C_FLWSP 0xe003 -#define MATCH_C_MV 0x8002 -#define MASK_C_MV 0xf003 -#define MATCH_C_ADD 0x9002 -#define MASK_C_ADD 0xf003 -#define MATCH_C_FSDSP 0xa002 -#define MASK_C_FSDSP 0xe003 -#define MATCH_C_SWSP 0xc002 -#define MASK_C_SWSP 0xe003 -#define MATCH_C_FSWSP 0xe002 -#define MASK_C_FSWSP 0xe003 -#define MATCH_CUSTOM0 0xb -#define MASK_CUSTOM0 0x707f -#define MATCH_CUSTOM0_RS1 0x200b -#define MASK_CUSTOM0_RS1 0x707f -#define MATCH_CUSTOM0_RS1_RS2 0x300b -#define MASK_CUSTOM0_RS1_RS2 0x707f -#define MATCH_CUSTOM0_RD 0x400b -#define MASK_CUSTOM0_RD 0x707f -#define MATCH_CUSTOM0_RD_RS1 0x600b -#define MASK_CUSTOM0_RD_RS1 0x707f -#define MATCH_CUSTOM0_RD_RS1_RS2 0x700b -#define MASK_CUSTOM0_RD_RS1_RS2 0x707f -#define MATCH_CUSTOM1 0x2b -#define MASK_CUSTOM1 0x707f -#define MATCH_CUSTOM1_RS1 0x202b -#define MASK_CUSTOM1_RS1 0x707f -#define MATCH_CUSTOM1_RS1_RS2 0x302b -#define MASK_CUSTOM1_RS1_RS2 0x707f -#define MATCH_CUSTOM1_RD 0x402b -#define MASK_CUSTOM1_RD 0x707f -#define MATCH_CUSTOM1_RD_RS1 0x602b -#define MASK_CUSTOM1_RD_RS1 0x707f -#define MATCH_CUSTOM1_RD_RS1_RS2 0x702b -#define MASK_CUSTOM1_RD_RS1_RS2 0x707f -#define MATCH_CUSTOM2 0x5b -#define MASK_CUSTOM2 0x707f -#define MATCH_CUSTOM2_RS1 0x205b -#define MASK_CUSTOM2_RS1 0x707f -#define MATCH_CUSTOM2_RS1_RS2 0x305b -#define MASK_CUSTOM2_RS1_RS2 0x707f -#define MATCH_CUSTOM2_RD 0x405b -#define MASK_CUSTOM2_RD 0x707f -#define MATCH_CUSTOM2_RD_RS1 0x605b -#define MASK_CUSTOM2_RD_RS1 0x707f -#define MATCH_CUSTOM2_RD_RS1_RS2 0x705b -#define MASK_CUSTOM2_RD_RS1_RS2 0x707f -#define MATCH_CUSTOM3 0x7b -#define MASK_CUSTOM3 0x707f -#define MATCH_CUSTOM3_RS1 0x207b -#define MASK_CUSTOM3_RS1 0x707f -#define MATCH_CUSTOM3_RS1_RS2 0x307b -#define MASK_CUSTOM3_RS1_RS2 0x707f -#define MATCH_CUSTOM3_RD 0x407b -#define MASK_CUSTOM3_RD 0x707f -#define MATCH_CUSTOM3_RD_RS1 0x607b -#define MASK_CUSTOM3_RD_RS1 0x707f -#define MATCH_CUSTOM3_RD_RS1_RS2 0x707b -#define MASK_CUSTOM3_RD_RS1_RS2 0x707f -#define CSR_FFLAGS 0x1 -#define CSR_FRM 0x2 -#define CSR_FCSR 0x3 -#define CSR_CYCLE 0xc00 -#define CSR_TIME 0xc01 -#define CSR_INSTRET 0xc02 -#define CSR_HPMCOUNTER3 0xc03 -#define CSR_HPMCOUNTER4 0xc04 -#define CSR_HPMCOUNTER5 0xc05 -#define CSR_HPMCOUNTER6 0xc06 -#define CSR_HPMCOUNTER7 0xc07 -#define CSR_HPMCOUNTER8 0xc08 -#define CSR_HPMCOUNTER9 0xc09 -#define CSR_HPMCOUNTER10 0xc0a -#define CSR_HPMCOUNTER11 0xc0b -#define CSR_HPMCOUNTER12 0xc0c -#define CSR_HPMCOUNTER13 0xc0d -#define CSR_HPMCOUNTER14 0xc0e -#define CSR_HPMCOUNTER15 0xc0f -#define CSR_HPMCOUNTER16 0xc10 -#define CSR_HPMCOUNTER17 0xc11 -#define CSR_HPMCOUNTER18 0xc12 -#define CSR_HPMCOUNTER19 0xc13 -#define CSR_HPMCOUNTER20 0xc14 -#define CSR_HPMCOUNTER21 0xc15 -#define CSR_HPMCOUNTER22 0xc16 -#define CSR_HPMCOUNTER23 0xc17 -#define CSR_HPMCOUNTER24 0xc18 -#define CSR_HPMCOUNTER25 0xc19 -#define CSR_HPMCOUNTER26 0xc1a -#define CSR_HPMCOUNTER27 0xc1b -#define CSR_HPMCOUNTER28 0xc1c -#define CSR_HPMCOUNTER29 0xc1d -#define CSR_HPMCOUNTER30 0xc1e -#define CSR_HPMCOUNTER31 0xc1f -#define CSR_SSTATUS 0x100 -#define CSR_SIE 0x104 -#define CSR_STVEC 0x105 -#define CSR_SCOUNTEREN 0x106 -#define CSR_SSCRATCH 0x140 -#define CSR_SEPC 0x141 -#define CSR_SCAUSE 0x142 -#define CSR_STVAL 0x143 -#define CSR_SIP 0x144 -#define CSR_SATP 0x180 -#define CSR_MSTATUS 0x300 -#define CSR_MISA 0x301 -#define CSR_MEDELEG 0x302 -#define CSR_MIDELEG 0x303 -#define CSR_MIE 0x304 -#define CSR_MTVEC 0x305 -#define CSR_MCOUNTEREN 0x306 -#define CSR_MSCRATCH 0x340 -#define CSR_MEPC 0x341 -#define CSR_MCAUSE 0x342 -#define CSR_MTVAL 0x343 -#define CSR_MIP 0x344 -#define CSR_PMPCFG0 0x3a0 -#define CSR_PMPCFG1 0x3a1 -#define CSR_PMPCFG2 0x3a2 -#define CSR_PMPCFG3 0x3a3 -#define CSR_PMPADDR0 0x3b0 -#define CSR_PMPADDR1 0x3b1 -#define CSR_PMPADDR2 0x3b2 -#define CSR_PMPADDR3 0x3b3 -#define CSR_PMPADDR4 0x3b4 -#define CSR_PMPADDR5 0x3b5 -#define CSR_PMPADDR6 0x3b6 -#define CSR_PMPADDR7 0x3b7 -#define CSR_PMPADDR8 0x3b8 -#define CSR_PMPADDR9 0x3b9 -#define CSR_PMPADDR10 0x3ba -#define CSR_PMPADDR11 0x3bb -#define CSR_PMPADDR12 0x3bc -#define CSR_PMPADDR13 0x3bd -#define CSR_PMPADDR14 0x3be -#define CSR_PMPADDR15 0x3bf -#define CSR_TSELECT 0x7a0 -#define CSR_TDATA1 0x7a1 -#define CSR_TDATA2 0x7a2 -#define CSR_TDATA3 0x7a3 -#define CSR_DCSR 0x7b0 -#define CSR_DPC 0x7b1 -#define CSR_DSCRATCH 0x7b2 -#define CSR_MCYCLE 0xb00 -#define CSR_MINSTRET 0xb02 -#define CSR_MHPMCOUNTER3 0xb03 -#define CSR_MHPMCOUNTER4 0xb04 -#define CSR_MHPMCOUNTER5 0xb05 -#define CSR_MHPMCOUNTER6 0xb06 -#define CSR_MHPMCOUNTER7 0xb07 -#define CSR_MHPMCOUNTER8 0xb08 -#define CSR_MHPMCOUNTER9 0xb09 -#define CSR_MHPMCOUNTER10 0xb0a -#define CSR_MHPMCOUNTER11 0xb0b -#define CSR_MHPMCOUNTER12 0xb0c -#define CSR_MHPMCOUNTER13 0xb0d -#define CSR_MHPMCOUNTER14 0xb0e -#define CSR_MHPMCOUNTER15 0xb0f -#define CSR_MHPMCOUNTER16 0xb10 -#define CSR_MHPMCOUNTER17 0xb11 -#define CSR_MHPMCOUNTER18 0xb12 -#define CSR_MHPMCOUNTER19 0xb13 -#define CSR_MHPMCOUNTER20 0xb14 -#define CSR_MHPMCOUNTER21 0xb15 -#define CSR_MHPMCOUNTER22 0xb16 -#define CSR_MHPMCOUNTER23 0xb17 -#define CSR_MHPMCOUNTER24 0xb18 -#define CSR_MHPMCOUNTER25 0xb19 -#define CSR_MHPMCOUNTER26 0xb1a -#define CSR_MHPMCOUNTER27 0xb1b -#define CSR_MHPMCOUNTER28 0xb1c -#define CSR_MHPMCOUNTER29 0xb1d -#define CSR_MHPMCOUNTER30 0xb1e -#define CSR_MHPMCOUNTER31 0xb1f -#define CSR_MHPMEVENT3 0x323 -#define CSR_MHPMEVENT4 0x324 -#define CSR_MHPMEVENT5 0x325 -#define CSR_MHPMEVENT6 0x326 -#define CSR_MHPMEVENT7 0x327 -#define CSR_MHPMEVENT8 0x328 -#define CSR_MHPMEVENT9 0x329 -#define CSR_MHPMEVENT10 0x32a -#define CSR_MHPMEVENT11 0x32b -#define CSR_MHPMEVENT12 0x32c -#define CSR_MHPMEVENT13 0x32d -#define CSR_MHPMEVENT14 0x32e -#define CSR_MHPMEVENT15 0x32f -#define CSR_MHPMEVENT16 0x330 -#define CSR_MHPMEVENT17 0x331 -#define CSR_MHPMEVENT18 0x332 -#define CSR_MHPMEVENT19 0x333 -#define CSR_MHPMEVENT20 0x334 -#define CSR_MHPMEVENT21 0x335 -#define CSR_MHPMEVENT22 0x336 -#define CSR_MHPMEVENT23 0x337 -#define CSR_MHPMEVENT24 0x338 -#define CSR_MHPMEVENT25 0x339 -#define CSR_MHPMEVENT26 0x33a -#define CSR_MHPMEVENT27 0x33b -#define CSR_MHPMEVENT28 0x33c -#define CSR_MHPMEVENT29 0x33d -#define CSR_MHPMEVENT30 0x33e -#define CSR_MHPMEVENT31 0x33f -#define CSR_MVENDORID 0xf11 -#define CSR_MARCHID 0xf12 -#define CSR_MIMPID 0xf13 -#define CSR_MHARTID 0xf14 -#define CSR_CYCLEH 0xc80 -#define CSR_TIMEH 0xc81 -#define CSR_INSTRETH 0xc82 -#define CSR_HPMCOUNTER3H 0xc83 -#define CSR_HPMCOUNTER4H 0xc84 -#define CSR_HPMCOUNTER5H 0xc85 -#define CSR_HPMCOUNTER6H 0xc86 -#define CSR_HPMCOUNTER7H 0xc87 -#define CSR_HPMCOUNTER8H 0xc88 -#define CSR_HPMCOUNTER9H 0xc89 -#define CSR_HPMCOUNTER10H 0xc8a -#define CSR_HPMCOUNTER11H 0xc8b -#define CSR_HPMCOUNTER12H 0xc8c -#define CSR_HPMCOUNTER13H 0xc8d -#define CSR_HPMCOUNTER14H 0xc8e -#define CSR_HPMCOUNTER15H 0xc8f -#define CSR_HPMCOUNTER16H 0xc90 -#define CSR_HPMCOUNTER17H 0xc91 -#define CSR_HPMCOUNTER18H 0xc92 -#define CSR_HPMCOUNTER19H 0xc93 -#define CSR_HPMCOUNTER20H 0xc94 -#define CSR_HPMCOUNTER21H 0xc95 -#define CSR_HPMCOUNTER22H 0xc96 -#define CSR_HPMCOUNTER23H 0xc97 -#define CSR_HPMCOUNTER24H 0xc98 -#define CSR_HPMCOUNTER25H 0xc99 -#define CSR_HPMCOUNTER26H 0xc9a -#define CSR_HPMCOUNTER27H 0xc9b -#define CSR_HPMCOUNTER28H 0xc9c -#define CSR_HPMCOUNTER29H 0xc9d -#define CSR_HPMCOUNTER30H 0xc9e -#define CSR_HPMCOUNTER31H 0xc9f -#define CSR_MCYCLEH 0xb80 -#define CSR_MINSTRETH 0xb82 -#define CSR_MHPMCOUNTER3H 0xb83 -#define CSR_MHPMCOUNTER4H 0xb84 -#define CSR_MHPMCOUNTER5H 0xb85 -#define CSR_MHPMCOUNTER6H 0xb86 -#define CSR_MHPMCOUNTER7H 0xb87 -#define CSR_MHPMCOUNTER8H 0xb88 -#define CSR_MHPMCOUNTER9H 0xb89 -#define CSR_MHPMCOUNTER10H 0xb8a -#define CSR_MHPMCOUNTER11H 0xb8b -#define CSR_MHPMCOUNTER12H 0xb8c -#define CSR_MHPMCOUNTER13H 0xb8d -#define CSR_MHPMCOUNTER14H 0xb8e -#define CSR_MHPMCOUNTER15H 0xb8f -#define CSR_MHPMCOUNTER16H 0xb90 -#define CSR_MHPMCOUNTER17H 0xb91 -#define CSR_MHPMCOUNTER18H 0xb92 -#define CSR_MHPMCOUNTER19H 0xb93 -#define CSR_MHPMCOUNTER20H 0xb94 -#define CSR_MHPMCOUNTER21H 0xb95 -#define CSR_MHPMCOUNTER22H 0xb96 -#define CSR_MHPMCOUNTER23H 0xb97 -#define CSR_MHPMCOUNTER24H 0xb98 -#define CSR_MHPMCOUNTER25H 0xb99 -#define CSR_MHPMCOUNTER26H 0xb9a -#define CSR_MHPMCOUNTER27H 0xb9b -#define CSR_MHPMCOUNTER28H 0xb9c -#define CSR_MHPMCOUNTER29H 0xb9d -#define CSR_MHPMCOUNTER30H 0xb9e -#define CSR_MHPMCOUNTER31H 0xb9f -#define CAUSE_MISALIGNED_FETCH 0x0 -#define CAUSE_FETCH_ACCESS 0x1 -#define CAUSE_ILLEGAL_INSTRUCTION 0x2 -#define CAUSE_BREAKPOINT 0x3 -#define CAUSE_MISALIGNED_LOAD 0x4 -#define CAUSE_LOAD_ACCESS 0x5 -#define CAUSE_MISALIGNED_STORE 0x6 -#define CAUSE_STORE_ACCESS 0x7 -#define CAUSE_USER_ECALL 0x8 -#define CAUSE_SUPERVISOR_ECALL 0x9 -#define CAUSE_HYPERVISOR_ECALL 0xa -#define CAUSE_MACHINE_ECALL 0xb -#define CAUSE_FETCH_PAGE_FAULT 0xc -#define CAUSE_LOAD_PAGE_FAULT 0xd -#define CAUSE_STORE_PAGE_FAULT 0xf -#endif -#ifdef DECLARE_INSN -DECLARE_INSN(beq, MATCH_BEQ, MASK_BEQ) -DECLARE_INSN(bne, MATCH_BNE, MASK_BNE) -DECLARE_INSN(blt, MATCH_BLT, MASK_BLT) -DECLARE_INSN(bge, MATCH_BGE, MASK_BGE) -DECLARE_INSN(bltu, MATCH_BLTU, MASK_BLTU) -DECLARE_INSN(bgeu, MATCH_BGEU, MASK_BGEU) -DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR) -DECLARE_INSN(jal, MATCH_JAL, MASK_JAL) -DECLARE_INSN(lui, MATCH_LUI, MASK_LUI) -DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC) -DECLARE_INSN(addi, MATCH_ADDI, MASK_ADDI) -DECLARE_INSN(slli, MATCH_SLLI, MASK_SLLI) -DECLARE_INSN(slti, MATCH_SLTI, MASK_SLTI) -DECLARE_INSN(sltiu, MATCH_SLTIU, MASK_SLTIU) -DECLARE_INSN(xori, MATCH_XORI, MASK_XORI) -DECLARE_INSN(srli, MATCH_SRLI, MASK_SRLI) -DECLARE_INSN(srai, MATCH_SRAI, MASK_SRAI) -DECLARE_INSN(ori, MATCH_ORI, MASK_ORI) -DECLARE_INSN(andi, MATCH_ANDI, MASK_ANDI) -DECLARE_INSN(add, MATCH_ADD, MASK_ADD) -DECLARE_INSN(sub, MATCH_SUB, MASK_SUB) -DECLARE_INSN(sll, MATCH_SLL, MASK_SLL) -DECLARE_INSN(slt, MATCH_SLT, MASK_SLT) -DECLARE_INSN(sltu, MATCH_SLTU, MASK_SLTU) -DECLARE_INSN(xor, MATCH_XOR, MASK_XOR) -DECLARE_INSN(srl, MATCH_SRL, MASK_SRL) -DECLARE_INSN(sra, MATCH_SRA, MASK_SRA) -DECLARE_INSN(or, MATCH_OR, MASK_OR) -DECLARE_INSN(and, MATCH_AND, MASK_AND) -DECLARE_INSN(addiw, MATCH_ADDIW, MASK_ADDIW) -DECLARE_INSN(slliw, MATCH_SLLIW, MASK_SLLIW) -DECLARE_INSN(srliw, MATCH_SRLIW, MASK_SRLIW) -DECLARE_INSN(sraiw, MATCH_SRAIW, MASK_SRAIW) -DECLARE_INSN(addw, MATCH_ADDW, MASK_ADDW) -DECLARE_INSN(subw, MATCH_SUBW, MASK_SUBW) -DECLARE_INSN(sllw, MATCH_SLLW, MASK_SLLW) -DECLARE_INSN(srlw, MATCH_SRLW, MASK_SRLW) -DECLARE_INSN(sraw, MATCH_SRAW, MASK_SRAW) -DECLARE_INSN(lb, MATCH_LB, MASK_LB) -DECLARE_INSN(lh, MATCH_LH, MASK_LH) -DECLARE_INSN(lw, MATCH_LW, MASK_LW) -DECLARE_INSN(ld, MATCH_LD, MASK_LD) -DECLARE_INSN(lbu, MATCH_LBU, MASK_LBU) -DECLARE_INSN(lhu, MATCH_LHU, MASK_LHU) -DECLARE_INSN(lwu, MATCH_LWU, MASK_LWU) -DECLARE_INSN(sb, MATCH_SB, MASK_SB) -DECLARE_INSN(sh, MATCH_SH, MASK_SH) -DECLARE_INSN(sw, MATCH_SW, MASK_SW) -DECLARE_INSN(sd, MATCH_SD, MASK_SD) -DECLARE_INSN(fence, MATCH_FENCE, MASK_FENCE) -DECLARE_INSN(fence_i, MATCH_FENCE_I, MASK_FENCE_I) -DECLARE_INSN(mul, MATCH_MUL, MASK_MUL) -DECLARE_INSN(mulh, MATCH_MULH, MASK_MULH) -DECLARE_INSN(mulhsu, MATCH_MULHSU, MASK_MULHSU) -DECLARE_INSN(mulhu, MATCH_MULHU, MASK_MULHU) -DECLARE_INSN(div, MATCH_DIV, MASK_DIV) -DECLARE_INSN(divu, MATCH_DIVU, MASK_DIVU) -DECLARE_INSN(rem, MATCH_REM, MASK_REM) -DECLARE_INSN(remu, MATCH_REMU, MASK_REMU) -DECLARE_INSN(mulw, MATCH_MULW, MASK_MULW) -DECLARE_INSN(divw, MATCH_DIVW, MASK_DIVW) -DECLARE_INSN(divuw, MATCH_DIVUW, MASK_DIVUW) -DECLARE_INSN(remw, MATCH_REMW, MASK_REMW) -DECLARE_INSN(remuw, MATCH_REMUW, MASK_REMUW) -DECLARE_INSN(amoadd_w, MATCH_AMOADD_W, MASK_AMOADD_W) -DECLARE_INSN(amoxor_w, MATCH_AMOXOR_W, MASK_AMOXOR_W) -DECLARE_INSN(amoor_w, MATCH_AMOOR_W, MASK_AMOOR_W) -DECLARE_INSN(amoand_w, MATCH_AMOAND_W, MASK_AMOAND_W) -DECLARE_INSN(amomin_w, MATCH_AMOMIN_W, MASK_AMOMIN_W) -DECLARE_INSN(amomax_w, MATCH_AMOMAX_W, MASK_AMOMAX_W) -DECLARE_INSN(amominu_w, MATCH_AMOMINU_W, MASK_AMOMINU_W) -DECLARE_INSN(amomaxu_w, MATCH_AMOMAXU_W, MASK_AMOMAXU_W) -DECLARE_INSN(amoswap_w, MATCH_AMOSWAP_W, MASK_AMOSWAP_W) -DECLARE_INSN(lr_w, MATCH_LR_W, MASK_LR_W) -DECLARE_INSN(sc_w, MATCH_SC_W, MASK_SC_W) -DECLARE_INSN(amoadd_d, MATCH_AMOADD_D, MASK_AMOADD_D) -DECLARE_INSN(amoxor_d, MATCH_AMOXOR_D, MASK_AMOXOR_D) -DECLARE_INSN(amoor_d, MATCH_AMOOR_D, MASK_AMOOR_D) -DECLARE_INSN(amoand_d, MATCH_AMOAND_D, MASK_AMOAND_D) -DECLARE_INSN(amomin_d, MATCH_AMOMIN_D, MASK_AMOMIN_D) -DECLARE_INSN(amomax_d, MATCH_AMOMAX_D, MASK_AMOMAX_D) -DECLARE_INSN(amominu_d, MATCH_AMOMINU_D, MASK_AMOMINU_D) -DECLARE_INSN(amomaxu_d, MATCH_AMOMAXU_D, MASK_AMOMAXU_D) -DECLARE_INSN(amoswap_d, MATCH_AMOSWAP_D, MASK_AMOSWAP_D) -DECLARE_INSN(lr_d, MATCH_LR_D, MASK_LR_D) -DECLARE_INSN(sc_d, MATCH_SC_D, MASK_SC_D) -DECLARE_INSN(ecall, MATCH_ECALL, MASK_ECALL) -DECLARE_INSN(ebreak, MATCH_EBREAK, MASK_EBREAK) -DECLARE_INSN(uret, MATCH_URET, MASK_URET) -DECLARE_INSN(sret, MATCH_SRET, MASK_SRET) -DECLARE_INSN(mret, MATCH_MRET, MASK_MRET) -DECLARE_INSN(dret, MATCH_DRET, MASK_DRET) -DECLARE_INSN(sfence_vma, MATCH_SFENCE_VMA, MASK_SFENCE_VMA) -DECLARE_INSN(wfi, MATCH_WFI, MASK_WFI) -DECLARE_INSN(csrrw, MATCH_CSRRW, MASK_CSRRW) -DECLARE_INSN(csrrs, MATCH_CSRRS, MASK_CSRRS) -DECLARE_INSN(csrrc, MATCH_CSRRC, MASK_CSRRC) -DECLARE_INSN(csrrwi, MATCH_CSRRWI, MASK_CSRRWI) -DECLARE_INSN(csrrsi, MATCH_CSRRSI, MASK_CSRRSI) -DECLARE_INSN(csrrci, MATCH_CSRRCI, MASK_CSRRCI) -DECLARE_INSN(fadd_s, MATCH_FADD_S, MASK_FADD_S) -DECLARE_INSN(fsub_s, MATCH_FSUB_S, MASK_FSUB_S) -DECLARE_INSN(fmul_s, MATCH_FMUL_S, MASK_FMUL_S) -DECLARE_INSN(fdiv_s, MATCH_FDIV_S, MASK_FDIV_S) -DECLARE_INSN(fsgnj_s, MATCH_FSGNJ_S, MASK_FSGNJ_S) -DECLARE_INSN(fsgnjn_s, MATCH_FSGNJN_S, MASK_FSGNJN_S) -DECLARE_INSN(fsgnjx_s, MATCH_FSGNJX_S, MASK_FSGNJX_S) -DECLARE_INSN(fmin_s, MATCH_FMIN_S, MASK_FMIN_S) -DECLARE_INSN(fmax_s, MATCH_FMAX_S, MASK_FMAX_S) -DECLARE_INSN(fsqrt_s, MATCH_FSQRT_S, MASK_FSQRT_S) -DECLARE_INSN(fadd_d, MATCH_FADD_D, MASK_FADD_D) -DECLARE_INSN(fsub_d, MATCH_FSUB_D, MASK_FSUB_D) -DECLARE_INSN(fmul_d, MATCH_FMUL_D, MASK_FMUL_D) -DECLARE_INSN(fdiv_d, MATCH_FDIV_D, MASK_FDIV_D) -DECLARE_INSN(fsgnj_d, MATCH_FSGNJ_D, MASK_FSGNJ_D) -DECLARE_INSN(fsgnjn_d, MATCH_FSGNJN_D, MASK_FSGNJN_D) -DECLARE_INSN(fsgnjx_d, MATCH_FSGNJX_D, MASK_FSGNJX_D) -DECLARE_INSN(fmin_d, MATCH_FMIN_D, MASK_FMIN_D) -DECLARE_INSN(fmax_d, MATCH_FMAX_D, MASK_FMAX_D) -DECLARE_INSN(fcvt_s_d, MATCH_FCVT_S_D, MASK_FCVT_S_D) -DECLARE_INSN(fcvt_d_s, MATCH_FCVT_D_S, MASK_FCVT_D_S) -DECLARE_INSN(fsqrt_d, MATCH_FSQRT_D, MASK_FSQRT_D) -DECLARE_INSN(fadd_q, MATCH_FADD_Q, MASK_FADD_Q) -DECLARE_INSN(fsub_q, MATCH_FSUB_Q, MASK_FSUB_Q) -DECLARE_INSN(fmul_q, MATCH_FMUL_Q, MASK_FMUL_Q) -DECLARE_INSN(fdiv_q, MATCH_FDIV_Q, MASK_FDIV_Q) -DECLARE_INSN(fsgnj_q, MATCH_FSGNJ_Q, MASK_FSGNJ_Q) -DECLARE_INSN(fsgnjn_q, MATCH_FSGNJN_Q, MASK_FSGNJN_Q) -DECLARE_INSN(fsgnjx_q, MATCH_FSGNJX_Q, MASK_FSGNJX_Q) -DECLARE_INSN(fmin_q, MATCH_FMIN_Q, MASK_FMIN_Q) -DECLARE_INSN(fmax_q, MATCH_FMAX_Q, MASK_FMAX_Q) -DECLARE_INSN(fcvt_s_q, MATCH_FCVT_S_Q, MASK_FCVT_S_Q) -DECLARE_INSN(fcvt_q_s, MATCH_FCVT_Q_S, MASK_FCVT_Q_S) -DECLARE_INSN(fcvt_d_q, MATCH_FCVT_D_Q, MASK_FCVT_D_Q) -DECLARE_INSN(fcvt_q_d, MATCH_FCVT_Q_D, MASK_FCVT_Q_D) -DECLARE_INSN(fsqrt_q, MATCH_FSQRT_Q, MASK_FSQRT_Q) -DECLARE_INSN(fle_s, MATCH_FLE_S, MASK_FLE_S) -DECLARE_INSN(flt_s, MATCH_FLT_S, MASK_FLT_S) -DECLARE_INSN(feq_s, MATCH_FEQ_S, MASK_FEQ_S) -DECLARE_INSN(fle_d, MATCH_FLE_D, MASK_FLE_D) -DECLARE_INSN(flt_d, MATCH_FLT_D, MASK_FLT_D) -DECLARE_INSN(feq_d, MATCH_FEQ_D, MASK_FEQ_D) -DECLARE_INSN(fle_q, MATCH_FLE_Q, MASK_FLE_Q) -DECLARE_INSN(flt_q, MATCH_FLT_Q, MASK_FLT_Q) -DECLARE_INSN(feq_q, MATCH_FEQ_Q, MASK_FEQ_Q) -DECLARE_INSN(fcvt_w_s, MATCH_FCVT_W_S, MASK_FCVT_W_S) -DECLARE_INSN(fcvt_wu_s, MATCH_FCVT_WU_S, MASK_FCVT_WU_S) -DECLARE_INSN(fcvt_l_s, MATCH_FCVT_L_S, MASK_FCVT_L_S) -DECLARE_INSN(fcvt_lu_s, MATCH_FCVT_LU_S, MASK_FCVT_LU_S) -DECLARE_INSN(fmv_x_w, MATCH_FMV_X_W, MASK_FMV_X_W) -DECLARE_INSN(fclass_s, MATCH_FCLASS_S, MASK_FCLASS_S) -DECLARE_INSN(fcvt_w_d, MATCH_FCVT_W_D, MASK_FCVT_W_D) -DECLARE_INSN(fcvt_wu_d, MATCH_FCVT_WU_D, MASK_FCVT_WU_D) -DECLARE_INSN(fcvt_l_d, MATCH_FCVT_L_D, MASK_FCVT_L_D) -DECLARE_INSN(fcvt_lu_d, MATCH_FCVT_LU_D, MASK_FCVT_LU_D) -DECLARE_INSN(fmv_x_d, MATCH_FMV_X_D, MASK_FMV_X_D) -DECLARE_INSN(fclass_d, MATCH_FCLASS_D, MASK_FCLASS_D) -DECLARE_INSN(fcvt_w_q, MATCH_FCVT_W_Q, MASK_FCVT_W_Q) -DECLARE_INSN(fcvt_wu_q, MATCH_FCVT_WU_Q, MASK_FCVT_WU_Q) -DECLARE_INSN(fcvt_l_q, MATCH_FCVT_L_Q, MASK_FCVT_L_Q) -DECLARE_INSN(fcvt_lu_q, MATCH_FCVT_LU_Q, MASK_FCVT_LU_Q) -DECLARE_INSN(fmv_x_q, MATCH_FMV_X_Q, MASK_FMV_X_Q) -DECLARE_INSN(fclass_q, MATCH_FCLASS_Q, MASK_FCLASS_Q) -DECLARE_INSN(fcvt_s_w, MATCH_FCVT_S_W, MASK_FCVT_S_W) -DECLARE_INSN(fcvt_s_wu, MATCH_FCVT_S_WU, MASK_FCVT_S_WU) -DECLARE_INSN(fcvt_s_l, MATCH_FCVT_S_L, MASK_FCVT_S_L) -DECLARE_INSN(fcvt_s_lu, MATCH_FCVT_S_LU, MASK_FCVT_S_LU) -DECLARE_INSN(fmv_w_x, MATCH_FMV_W_X, MASK_FMV_W_X) -DECLARE_INSN(fcvt_d_w, MATCH_FCVT_D_W, MASK_FCVT_D_W) -DECLARE_INSN(fcvt_d_wu, MATCH_FCVT_D_WU, MASK_FCVT_D_WU) -DECLARE_INSN(fcvt_d_l, MATCH_FCVT_D_L, MASK_FCVT_D_L) -DECLARE_INSN(fcvt_d_lu, MATCH_FCVT_D_LU, MASK_FCVT_D_LU) -DECLARE_INSN(fmv_d_x, MATCH_FMV_D_X, MASK_FMV_D_X) -DECLARE_INSN(fcvt_q_w, MATCH_FCVT_Q_W, MASK_FCVT_Q_W) -DECLARE_INSN(fcvt_q_wu, MATCH_FCVT_Q_WU, MASK_FCVT_Q_WU) -DECLARE_INSN(fcvt_q_l, MATCH_FCVT_Q_L, MASK_FCVT_Q_L) -DECLARE_INSN(fcvt_q_lu, MATCH_FCVT_Q_LU, MASK_FCVT_Q_LU) -DECLARE_INSN(fmv_q_x, MATCH_FMV_Q_X, MASK_FMV_Q_X) -DECLARE_INSN(flw, MATCH_FLW, MASK_FLW) -DECLARE_INSN(fld, MATCH_FLD, MASK_FLD) -DECLARE_INSN(flq, MATCH_FLQ, MASK_FLQ) -DECLARE_INSN(fsw, MATCH_FSW, MASK_FSW) -DECLARE_INSN(fsd, MATCH_FSD, MASK_FSD) -DECLARE_INSN(fsq, MATCH_FSQ, MASK_FSQ) -DECLARE_INSN(fmadd_s, MATCH_FMADD_S, MASK_FMADD_S) -DECLARE_INSN(fmsub_s, MATCH_FMSUB_S, MASK_FMSUB_S) -DECLARE_INSN(fnmsub_s, MATCH_FNMSUB_S, MASK_FNMSUB_S) -DECLARE_INSN(fnmadd_s, MATCH_FNMADD_S, MASK_FNMADD_S) -DECLARE_INSN(fmadd_d, MATCH_FMADD_D, MASK_FMADD_D) -DECLARE_INSN(fmsub_d, MATCH_FMSUB_D, MASK_FMSUB_D) -DECLARE_INSN(fnmsub_d, MATCH_FNMSUB_D, MASK_FNMSUB_D) -DECLARE_INSN(fnmadd_d, MATCH_FNMADD_D, MASK_FNMADD_D) -DECLARE_INSN(fmadd_q, MATCH_FMADD_Q, MASK_FMADD_Q) -DECLARE_INSN(fmsub_q, MATCH_FMSUB_Q, MASK_FMSUB_Q) -DECLARE_INSN(fnmsub_q, MATCH_FNMSUB_Q, MASK_FNMSUB_Q) -DECLARE_INSN(fnmadd_q, MATCH_FNMADD_Q, MASK_FNMADD_Q) -DECLARE_INSN(c_nop, MATCH_C_NOP, MASK_C_NOP) -DECLARE_INSN(c_addi16sp, MATCH_C_ADDI16SP, MASK_C_ADDI16SP) -DECLARE_INSN(c_jr, MATCH_C_JR, MASK_C_JR) -DECLARE_INSN(c_jalr, MATCH_C_JALR, MASK_C_JALR) -DECLARE_INSN(c_ebreak, MATCH_C_EBREAK, MASK_C_EBREAK) -DECLARE_INSN(c_ld, MATCH_C_LD, MASK_C_LD) -DECLARE_INSN(c_sd, MATCH_C_SD, MASK_C_SD) -DECLARE_INSN(c_addiw, MATCH_C_ADDIW, MASK_C_ADDIW) -DECLARE_INSN(c_ldsp, MATCH_C_LDSP, MASK_C_LDSP) -DECLARE_INSN(c_sdsp, MATCH_C_SDSP, MASK_C_SDSP) -DECLARE_INSN(c_addi4spn, MATCH_C_ADDI4SPN, MASK_C_ADDI4SPN) -DECLARE_INSN(c_fld, MATCH_C_FLD, MASK_C_FLD) -DECLARE_INSN(c_lw, MATCH_C_LW, MASK_C_LW) -DECLARE_INSN(c_flw, MATCH_C_FLW, MASK_C_FLW) -DECLARE_INSN(c_fsd, MATCH_C_FSD, MASK_C_FSD) -DECLARE_INSN(c_sw, MATCH_C_SW, MASK_C_SW) -DECLARE_INSN(c_fsw, MATCH_C_FSW, MASK_C_FSW) -DECLARE_INSN(c_addi, MATCH_C_ADDI, MASK_C_ADDI) -DECLARE_INSN(c_jal, MATCH_C_JAL, MASK_C_JAL) -DECLARE_INSN(c_li, MATCH_C_LI, MASK_C_LI) -DECLARE_INSN(c_lui, MATCH_C_LUI, MASK_C_LUI) -DECLARE_INSN(c_srli, MATCH_C_SRLI, MASK_C_SRLI) -DECLARE_INSN(c_srai, MATCH_C_SRAI, MASK_C_SRAI) -DECLARE_INSN(c_andi, MATCH_C_ANDI, MASK_C_ANDI) -DECLARE_INSN(c_sub, MATCH_C_SUB, MASK_C_SUB) -DECLARE_INSN(c_xor, MATCH_C_XOR, MASK_C_XOR) -DECLARE_INSN(c_or, MATCH_C_OR, MASK_C_OR) -DECLARE_INSN(c_and, MATCH_C_AND, MASK_C_AND) -DECLARE_INSN(c_subw, MATCH_C_SUBW, MASK_C_SUBW) -DECLARE_INSN(c_addw, MATCH_C_ADDW, MASK_C_ADDW) -DECLARE_INSN(c_j, MATCH_C_J, MASK_C_J) -DECLARE_INSN(c_beqz, MATCH_C_BEQZ, MASK_C_BEQZ) -DECLARE_INSN(c_bnez, MATCH_C_BNEZ, MASK_C_BNEZ) -DECLARE_INSN(c_slli, MATCH_C_SLLI, MASK_C_SLLI) -DECLARE_INSN(c_fldsp, MATCH_C_FLDSP, MASK_C_FLDSP) -DECLARE_INSN(c_lwsp, MATCH_C_LWSP, MASK_C_LWSP) -DECLARE_INSN(c_flwsp, MATCH_C_FLWSP, MASK_C_FLWSP) -DECLARE_INSN(c_mv, MATCH_C_MV, MASK_C_MV) -DECLARE_INSN(c_add, MATCH_C_ADD, MASK_C_ADD) -DECLARE_INSN(c_fsdsp, MATCH_C_FSDSP, MASK_C_FSDSP) -DECLARE_INSN(c_swsp, MATCH_C_SWSP, MASK_C_SWSP) -DECLARE_INSN(c_fswsp, MATCH_C_FSWSP, MASK_C_FSWSP) -DECLARE_INSN(custom0, MATCH_CUSTOM0, MASK_CUSTOM0) -DECLARE_INSN(custom0_rs1, MATCH_CUSTOM0_RS1, MASK_CUSTOM0_RS1) -DECLARE_INSN(custom0_rs1_rs2, MATCH_CUSTOM0_RS1_RS2, MASK_CUSTOM0_RS1_RS2) -DECLARE_INSN(custom0_rd, MATCH_CUSTOM0_RD, MASK_CUSTOM0_RD) -DECLARE_INSN(custom0_rd_rs1, MATCH_CUSTOM0_RD_RS1, MASK_CUSTOM0_RD_RS1) -DECLARE_INSN(custom0_rd_rs1_rs2, MATCH_CUSTOM0_RD_RS1_RS2, MASK_CUSTOM0_RD_RS1_RS2) -DECLARE_INSN(custom1, MATCH_CUSTOM1, MASK_CUSTOM1) -DECLARE_INSN(custom1_rs1, MATCH_CUSTOM1_RS1, MASK_CUSTOM1_RS1) -DECLARE_INSN(custom1_rs1_rs2, MATCH_CUSTOM1_RS1_RS2, MASK_CUSTOM1_RS1_RS2) -DECLARE_INSN(custom1_rd, MATCH_CUSTOM1_RD, MASK_CUSTOM1_RD) -DECLARE_INSN(custom1_rd_rs1, MATCH_CUSTOM1_RD_RS1, MASK_CUSTOM1_RD_RS1) -DECLARE_INSN(custom1_rd_rs1_rs2, MATCH_CUSTOM1_RD_RS1_RS2, MASK_CUSTOM1_RD_RS1_RS2) -DECLARE_INSN(custom2, MATCH_CUSTOM2, MASK_CUSTOM2) -DECLARE_INSN(custom2_rs1, MATCH_CUSTOM2_RS1, MASK_CUSTOM2_RS1) -DECLARE_INSN(custom2_rs1_rs2, MATCH_CUSTOM2_RS1_RS2, MASK_CUSTOM2_RS1_RS2) -DECLARE_INSN(custom2_rd, MATCH_CUSTOM2_RD, MASK_CUSTOM2_RD) -DECLARE_INSN(custom2_rd_rs1, MATCH_CUSTOM2_RD_RS1, MASK_CUSTOM2_RD_RS1) -DECLARE_INSN(custom2_rd_rs1_rs2, MATCH_CUSTOM2_RD_RS1_RS2, MASK_CUSTOM2_RD_RS1_RS2) -DECLARE_INSN(custom3, MATCH_CUSTOM3, MASK_CUSTOM3) -DECLARE_INSN(custom3_rs1, MATCH_CUSTOM3_RS1, MASK_CUSTOM3_RS1) -DECLARE_INSN(custom3_rs1_rs2, MATCH_CUSTOM3_RS1_RS2, MASK_CUSTOM3_RS1_RS2) -DECLARE_INSN(custom3_rd, MATCH_CUSTOM3_RD, MASK_CUSTOM3_RD) -DECLARE_INSN(custom3_rd_rs1, MATCH_CUSTOM3_RD_RS1, MASK_CUSTOM3_RD_RS1) -DECLARE_INSN(custom3_rd_rs1_rs2, MATCH_CUSTOM3_RD_RS1_RS2, MASK_CUSTOM3_RD_RS1_RS2) -#endif -#ifdef DECLARE_CSR -DECLARE_CSR(fflags, CSR_FFLAGS) -DECLARE_CSR(frm, CSR_FRM) -DECLARE_CSR(fcsr, CSR_FCSR) -DECLARE_CSR(cycle, CSR_CYCLE) -DECLARE_CSR(time, CSR_TIME) -DECLARE_CSR(instret, CSR_INSTRET) -DECLARE_CSR(hpmcounter3, CSR_HPMCOUNTER3) -DECLARE_CSR(hpmcounter4, CSR_HPMCOUNTER4) -DECLARE_CSR(hpmcounter5, CSR_HPMCOUNTER5) -DECLARE_CSR(hpmcounter6, CSR_HPMCOUNTER6) -DECLARE_CSR(hpmcounter7, CSR_HPMCOUNTER7) -DECLARE_CSR(hpmcounter8, CSR_HPMCOUNTER8) -DECLARE_CSR(hpmcounter9, CSR_HPMCOUNTER9) -DECLARE_CSR(hpmcounter10, CSR_HPMCOUNTER10) -DECLARE_CSR(hpmcounter11, CSR_HPMCOUNTER11) -DECLARE_CSR(hpmcounter12, CSR_HPMCOUNTER12) -DECLARE_CSR(hpmcounter13, CSR_HPMCOUNTER13) -DECLARE_CSR(hpmcounter14, CSR_HPMCOUNTER14) -DECLARE_CSR(hpmcounter15, CSR_HPMCOUNTER15) -DECLARE_CSR(hpmcounter16, CSR_HPMCOUNTER16) -DECLARE_CSR(hpmcounter17, CSR_HPMCOUNTER17) -DECLARE_CSR(hpmcounter18, CSR_HPMCOUNTER18) -DECLARE_CSR(hpmcounter19, CSR_HPMCOUNTER19) -DECLARE_CSR(hpmcounter20, CSR_HPMCOUNTER20) -DECLARE_CSR(hpmcounter21, CSR_HPMCOUNTER21) -DECLARE_CSR(hpmcounter22, CSR_HPMCOUNTER22) -DECLARE_CSR(hpmcounter23, CSR_HPMCOUNTER23) -DECLARE_CSR(hpmcounter24, CSR_HPMCOUNTER24) -DECLARE_CSR(hpmcounter25, CSR_HPMCOUNTER25) -DECLARE_CSR(hpmcounter26, CSR_HPMCOUNTER26) -DECLARE_CSR(hpmcounter27, CSR_HPMCOUNTER27) -DECLARE_CSR(hpmcounter28, CSR_HPMCOUNTER28) -DECLARE_CSR(hpmcounter29, CSR_HPMCOUNTER29) -DECLARE_CSR(hpmcounter30, CSR_HPMCOUNTER30) -DECLARE_CSR(hpmcounter31, CSR_HPMCOUNTER31) -DECLARE_CSR(sstatus, CSR_SSTATUS) -DECLARE_CSR(sie, CSR_SIE) -DECLARE_CSR(stvec, CSR_STVEC) -DECLARE_CSR(scounteren, CSR_SCOUNTEREN) -DECLARE_CSR(sscratch, CSR_SSCRATCH) -DECLARE_CSR(sepc, CSR_SEPC) -DECLARE_CSR(scause, CSR_SCAUSE) -DECLARE_CSR(stval, CSR_STVAL) -DECLARE_CSR(sip, CSR_SIP) -DECLARE_CSR(satp, CSR_SATP) -DECLARE_CSR(mstatus, CSR_MSTATUS) -DECLARE_CSR(misa, CSR_MISA) -DECLARE_CSR(medeleg, CSR_MEDELEG) -DECLARE_CSR(mideleg, CSR_MIDELEG) -DECLARE_CSR(mie, CSR_MIE) -DECLARE_CSR(mtvec, CSR_MTVEC) -DECLARE_CSR(mcounteren, CSR_MCOUNTEREN) -DECLARE_CSR(mscratch, CSR_MSCRATCH) -DECLARE_CSR(mepc, CSR_MEPC) -DECLARE_CSR(mcause, CSR_MCAUSE) -DECLARE_CSR(mtval, CSR_MTVAL) -DECLARE_CSR(mip, CSR_MIP) -DECLARE_CSR(pmpcfg0, CSR_PMPCFG0) -DECLARE_CSR(pmpcfg1, CSR_PMPCFG1) -DECLARE_CSR(pmpcfg2, CSR_PMPCFG2) -DECLARE_CSR(pmpcfg3, CSR_PMPCFG3) -DECLARE_CSR(pmpaddr0, CSR_PMPADDR0) -DECLARE_CSR(pmpaddr1, CSR_PMPADDR1) -DECLARE_CSR(pmpaddr2, CSR_PMPADDR2) -DECLARE_CSR(pmpaddr3, CSR_PMPADDR3) -DECLARE_CSR(pmpaddr4, CSR_PMPADDR4) -DECLARE_CSR(pmpaddr5, CSR_PMPADDR5) -DECLARE_CSR(pmpaddr6, CSR_PMPADDR6) -DECLARE_CSR(pmpaddr7, CSR_PMPADDR7) -DECLARE_CSR(pmpaddr8, CSR_PMPADDR8) -DECLARE_CSR(pmpaddr9, CSR_PMPADDR9) -DECLARE_CSR(pmpaddr10, CSR_PMPADDR10) -DECLARE_CSR(pmpaddr11, CSR_PMPADDR11) -DECLARE_CSR(pmpaddr12, CSR_PMPADDR12) -DECLARE_CSR(pmpaddr13, CSR_PMPADDR13) -DECLARE_CSR(pmpaddr14, CSR_PMPADDR14) -DECLARE_CSR(pmpaddr15, CSR_PMPADDR15) -DECLARE_CSR(tselect, CSR_TSELECT) -DECLARE_CSR(tdata1, CSR_TDATA1) -DECLARE_CSR(tdata2, CSR_TDATA2) -DECLARE_CSR(tdata3, CSR_TDATA3) -DECLARE_CSR(dcsr, CSR_DCSR) -DECLARE_CSR(dpc, CSR_DPC) -DECLARE_CSR(dscratch, CSR_DSCRATCH) -DECLARE_CSR(mcycle, CSR_MCYCLE) -DECLARE_CSR(minstret, CSR_MINSTRET) -DECLARE_CSR(mhpmcounter3, CSR_MHPMCOUNTER3) -DECLARE_CSR(mhpmcounter4, CSR_MHPMCOUNTER4) -DECLARE_CSR(mhpmcounter5, CSR_MHPMCOUNTER5) -DECLARE_CSR(mhpmcounter6, CSR_MHPMCOUNTER6) -DECLARE_CSR(mhpmcounter7, CSR_MHPMCOUNTER7) -DECLARE_CSR(mhpmcounter8, CSR_MHPMCOUNTER8) -DECLARE_CSR(mhpmcounter9, CSR_MHPMCOUNTER9) -DECLARE_CSR(mhpmcounter10, CSR_MHPMCOUNTER10) -DECLARE_CSR(mhpmcounter11, CSR_MHPMCOUNTER11) -DECLARE_CSR(mhpmcounter12, CSR_MHPMCOUNTER12) -DECLARE_CSR(mhpmcounter13, CSR_MHPMCOUNTER13) -DECLARE_CSR(mhpmcounter14, CSR_MHPMCOUNTER14) -DECLARE_CSR(mhpmcounter15, CSR_MHPMCOUNTER15) -DECLARE_CSR(mhpmcounter16, CSR_MHPMCOUNTER16) -DECLARE_CSR(mhpmcounter17, CSR_MHPMCOUNTER17) -DECLARE_CSR(mhpmcounter18, CSR_MHPMCOUNTER18) -DECLARE_CSR(mhpmcounter19, CSR_MHPMCOUNTER19) -DECLARE_CSR(mhpmcounter20, CSR_MHPMCOUNTER20) -DECLARE_CSR(mhpmcounter21, CSR_MHPMCOUNTER21) -DECLARE_CSR(mhpmcounter22, CSR_MHPMCOUNTER22) -DECLARE_CSR(mhpmcounter23, CSR_MHPMCOUNTER23) -DECLARE_CSR(mhpmcounter24, CSR_MHPMCOUNTER24) -DECLARE_CSR(mhpmcounter25, CSR_MHPMCOUNTER25) -DECLARE_CSR(mhpmcounter26, CSR_MHPMCOUNTER26) -DECLARE_CSR(mhpmcounter27, CSR_MHPMCOUNTER27) -DECLARE_CSR(mhpmcounter28, CSR_MHPMCOUNTER28) -DECLARE_CSR(mhpmcounter29, CSR_MHPMCOUNTER29) -DECLARE_CSR(mhpmcounter30, CSR_MHPMCOUNTER30) -DECLARE_CSR(mhpmcounter31, CSR_MHPMCOUNTER31) -DECLARE_CSR(mhpmevent3, CSR_MHPMEVENT3) -DECLARE_CSR(mhpmevent4, CSR_MHPMEVENT4) -DECLARE_CSR(mhpmevent5, CSR_MHPMEVENT5) -DECLARE_CSR(mhpmevent6, CSR_MHPMEVENT6) -DECLARE_CSR(mhpmevent7, CSR_MHPMEVENT7) -DECLARE_CSR(mhpmevent8, CSR_MHPMEVENT8) -DECLARE_CSR(mhpmevent9, CSR_MHPMEVENT9) -DECLARE_CSR(mhpmevent10, CSR_MHPMEVENT10) -DECLARE_CSR(mhpmevent11, CSR_MHPMEVENT11) -DECLARE_CSR(mhpmevent12, CSR_MHPMEVENT12) -DECLARE_CSR(mhpmevent13, CSR_MHPMEVENT13) -DECLARE_CSR(mhpmevent14, CSR_MHPMEVENT14) -DECLARE_CSR(mhpmevent15, CSR_MHPMEVENT15) -DECLARE_CSR(mhpmevent16, CSR_MHPMEVENT16) -DECLARE_CSR(mhpmevent17, CSR_MHPMEVENT17) -DECLARE_CSR(mhpmevent18, CSR_MHPMEVENT18) -DECLARE_CSR(mhpmevent19, CSR_MHPMEVENT19) -DECLARE_CSR(mhpmevent20, CSR_MHPMEVENT20) -DECLARE_CSR(mhpmevent21, CSR_MHPMEVENT21) -DECLARE_CSR(mhpmevent22, CSR_MHPMEVENT22) -DECLARE_CSR(mhpmevent23, CSR_MHPMEVENT23) -DECLARE_CSR(mhpmevent24, CSR_MHPMEVENT24) -DECLARE_CSR(mhpmevent25, CSR_MHPMEVENT25) -DECLARE_CSR(mhpmevent26, CSR_MHPMEVENT26) -DECLARE_CSR(mhpmevent27, CSR_MHPMEVENT27) -DECLARE_CSR(mhpmevent28, CSR_MHPMEVENT28) -DECLARE_CSR(mhpmevent29, CSR_MHPMEVENT29) -DECLARE_CSR(mhpmevent30, CSR_MHPMEVENT30) -DECLARE_CSR(mhpmevent31, CSR_MHPMEVENT31) -DECLARE_CSR(mvendorid, CSR_MVENDORID) -DECLARE_CSR(marchid, CSR_MARCHID) -DECLARE_CSR(mimpid, CSR_MIMPID) -DECLARE_CSR(mhartid, CSR_MHARTID) -DECLARE_CSR(cycleh, CSR_CYCLEH) -DECLARE_CSR(timeh, CSR_TIMEH) -DECLARE_CSR(instreth, CSR_INSTRETH) -DECLARE_CSR(hpmcounter3h, CSR_HPMCOUNTER3H) -DECLARE_CSR(hpmcounter4h, CSR_HPMCOUNTER4H) -DECLARE_CSR(hpmcounter5h, CSR_HPMCOUNTER5H) -DECLARE_CSR(hpmcounter6h, CSR_HPMCOUNTER6H) -DECLARE_CSR(hpmcounter7h, CSR_HPMCOUNTER7H) -DECLARE_CSR(hpmcounter8h, CSR_HPMCOUNTER8H) -DECLARE_CSR(hpmcounter9h, CSR_HPMCOUNTER9H) -DECLARE_CSR(hpmcounter10h, CSR_HPMCOUNTER10H) -DECLARE_CSR(hpmcounter11h, CSR_HPMCOUNTER11H) -DECLARE_CSR(hpmcounter12h, CSR_HPMCOUNTER12H) -DECLARE_CSR(hpmcounter13h, CSR_HPMCOUNTER13H) -DECLARE_CSR(hpmcounter14h, CSR_HPMCOUNTER14H) -DECLARE_CSR(hpmcounter15h, CSR_HPMCOUNTER15H) -DECLARE_CSR(hpmcounter16h, CSR_HPMCOUNTER16H) -DECLARE_CSR(hpmcounter17h, CSR_HPMCOUNTER17H) -DECLARE_CSR(hpmcounter18h, CSR_HPMCOUNTER18H) -DECLARE_CSR(hpmcounter19h, CSR_HPMCOUNTER19H) -DECLARE_CSR(hpmcounter20h, CSR_HPMCOUNTER20H) -DECLARE_CSR(hpmcounter21h, CSR_HPMCOUNTER21H) -DECLARE_CSR(hpmcounter22h, CSR_HPMCOUNTER22H) -DECLARE_CSR(hpmcounter23h, CSR_HPMCOUNTER23H) -DECLARE_CSR(hpmcounter24h, CSR_HPMCOUNTER24H) -DECLARE_CSR(hpmcounter25h, CSR_HPMCOUNTER25H) -DECLARE_CSR(hpmcounter26h, CSR_HPMCOUNTER26H) -DECLARE_CSR(hpmcounter27h, CSR_HPMCOUNTER27H) -DECLARE_CSR(hpmcounter28h, CSR_HPMCOUNTER28H) -DECLARE_CSR(hpmcounter29h, CSR_HPMCOUNTER29H) -DECLARE_CSR(hpmcounter30h, CSR_HPMCOUNTER30H) -DECLARE_CSR(hpmcounter31h, CSR_HPMCOUNTER31H) -DECLARE_CSR(mcycleh, CSR_MCYCLEH) -DECLARE_CSR(minstreth, CSR_MINSTRETH) -DECLARE_CSR(mhpmcounter3h, CSR_MHPMCOUNTER3H) -DECLARE_CSR(mhpmcounter4h, CSR_MHPMCOUNTER4H) -DECLARE_CSR(mhpmcounter5h, CSR_MHPMCOUNTER5H) -DECLARE_CSR(mhpmcounter6h, CSR_MHPMCOUNTER6H) -DECLARE_CSR(mhpmcounter7h, CSR_MHPMCOUNTER7H) -DECLARE_CSR(mhpmcounter8h, CSR_MHPMCOUNTER8H) -DECLARE_CSR(mhpmcounter9h, CSR_MHPMCOUNTER9H) -DECLARE_CSR(mhpmcounter10h, CSR_MHPMCOUNTER10H) -DECLARE_CSR(mhpmcounter11h, CSR_MHPMCOUNTER11H) -DECLARE_CSR(mhpmcounter12h, CSR_MHPMCOUNTER12H) -DECLARE_CSR(mhpmcounter13h, CSR_MHPMCOUNTER13H) -DECLARE_CSR(mhpmcounter14h, CSR_MHPMCOUNTER14H) -DECLARE_CSR(mhpmcounter15h, CSR_MHPMCOUNTER15H) -DECLARE_CSR(mhpmcounter16h, CSR_MHPMCOUNTER16H) -DECLARE_CSR(mhpmcounter17h, CSR_MHPMCOUNTER17H) -DECLARE_CSR(mhpmcounter18h, CSR_MHPMCOUNTER18H) -DECLARE_CSR(mhpmcounter19h, CSR_MHPMCOUNTER19H) -DECLARE_CSR(mhpmcounter20h, CSR_MHPMCOUNTER20H) -DECLARE_CSR(mhpmcounter21h, CSR_MHPMCOUNTER21H) -DECLARE_CSR(mhpmcounter22h, CSR_MHPMCOUNTER22H) -DECLARE_CSR(mhpmcounter23h, CSR_MHPMCOUNTER23H) -DECLARE_CSR(mhpmcounter24h, CSR_MHPMCOUNTER24H) -DECLARE_CSR(mhpmcounter25h, CSR_MHPMCOUNTER25H) -DECLARE_CSR(mhpmcounter26h, CSR_MHPMCOUNTER26H) -DECLARE_CSR(mhpmcounter27h, CSR_MHPMCOUNTER27H) -DECLARE_CSR(mhpmcounter28h, CSR_MHPMCOUNTER28H) -DECLARE_CSR(mhpmcounter29h, CSR_MHPMCOUNTER29H) -DECLARE_CSR(mhpmcounter30h, CSR_MHPMCOUNTER30H) -DECLARE_CSR(mhpmcounter31h, CSR_MHPMCOUNTER31H) -#endif -#ifdef DECLARE_CAUSE -DECLARE_CAUSE("misaligned fetch", CAUSE_MISALIGNED_FETCH) -DECLARE_CAUSE("fetch access", CAUSE_FETCH_ACCESS) -DECLARE_CAUSE("illegal instruction", CAUSE_ILLEGAL_INSTRUCTION) -DECLARE_CAUSE("breakpoint", CAUSE_BREAKPOINT) -DECLARE_CAUSE("misaligned load", CAUSE_MISALIGNED_LOAD) -DECLARE_CAUSE("load access", CAUSE_LOAD_ACCESS) -DECLARE_CAUSE("misaligned store", CAUSE_MISALIGNED_STORE) -DECLARE_CAUSE("store access", CAUSE_STORE_ACCESS) -DECLARE_CAUSE("user_ecall", CAUSE_USER_ECALL) -DECLARE_CAUSE("supervisor_ecall", CAUSE_SUPERVISOR_ECALL) -DECLARE_CAUSE("hypervisor_ecall", CAUSE_HYPERVISOR_ECALL) -DECLARE_CAUSE("machine_ecall", CAUSE_MACHINE_ECALL) -DECLARE_CAUSE("fetch page fault", CAUSE_FETCH_PAGE_FAULT) -DECLARE_CAUSE("load page fault", CAUSE_LOAD_PAGE_FAULT) -DECLARE_CAUSE("store page fault", CAUSE_STORE_PAGE_FAULT) -#endif diff --git a/riscv/encoding.h b/riscv/encoding.h new file mode 120000 index 0000000000..1075f15317 --- /dev/null +++ b/riscv/encoding.h @@ -0,0 +1 @@ +../../../software/runtime/encoding.h \ No newline at end of file diff --git a/riscv/execute.cc b/riscv/execute.cc index e639e90462..84c6d91071 100644 --- a/riscv/execute.cc +++ b/riscv/execute.cc @@ -2,65 +2,164 @@ #include "processor.h" #include "mmu.h" +#include "disasm.h" #include +#ifdef RISCV_ENABLE_COMMITLOG +static void commit_log_reset(processor_t* p) +{ + p->get_state()->log_reg_write.clear(); + p->get_state()->log_mem_read.clear(); + p->get_state()->log_mem_write.clear(); +} static void commit_log_stash_privilege(processor_t* p) { -#ifdef RISCV_ENABLE_COMMITLOG state_t* state = p->get_state(); state->last_inst_priv = state->prv; state->last_inst_xlen = p->get_xlen(); state->last_inst_flen = p->get_flen(); -#endif } -static void commit_log_print_value(int width, uint64_t hi, uint64_t lo) +static void commit_log_print_value(FILE *log_file, int width, const void *data) { + assert(log_file); + switch (width) { + case 8: + fprintf(log_file, "0x%01" PRIx8, *(const uint8_t *)data); + break; case 16: - fprintf(stderr, "0x%04" PRIx16, (uint16_t)lo); + fprintf(log_file, "0x%04" PRIx16, *(const uint16_t *)data); break; case 32: - fprintf(stderr, "0x%08" PRIx32, (uint32_t)lo); + fprintf(log_file, "0x%08" PRIx32, *(const uint32_t *)data); break; case 64: - fprintf(stderr, "0x%016" PRIx64, lo); - break; - case 128: - fprintf(stderr, "0x%016" PRIx64 "%016" PRIx64, hi, lo); + fprintf(log_file, "0x%016" PRIx64, *(const uint64_t *)data); break; default: - abort(); + // max lengh of vector + if (((width - 1) & width) == 0) { + const uint64_t *arr = (const uint64_t *)data; + + fprintf(log_file, "0x"); + for (int idx = width / 64 - 1; idx >= 0; --idx) { + fprintf(log_file, "%016" PRIx64, arr[idx]); + } + } else { + abort(); + } + break; } } -static void commit_log_print_insn(state_t* state, reg_t pc, insn_t insn) +static void commit_log_print_value(FILE *log_file, int width, uint64_t val) { -#ifdef RISCV_ENABLE_COMMITLOG - auto& reg = state->log_reg_write; - int priv = state->last_inst_priv; - int xlen = state->last_inst_xlen; - int flen = state->last_inst_flen; - - fprintf(stderr, "%1d ", priv); - commit_log_print_value(xlen, 0, pc); - fprintf(stderr, " ("); - commit_log_print_value(insn.length() * 8, 0, insn.bits()); - - if (reg.addr) { - bool fp = reg.addr & 1; - int rd = reg.addr >> 1; - int size = fp ? flen : xlen; - fprintf(stderr, ") %c%2d ", fp ? 'f' : 'x', rd); - commit_log_print_value(size, reg.data.v[1], reg.data.v[0]); - fprintf(stderr, "\n"); - } else { - fprintf(stderr, ")\n"); + commit_log_print_value(log_file, width, &val); +} + +const char* processor_t::get_symbol(uint64_t addr) +{ + return sim->get_symbol(addr); +} + +static void commit_log_print_insn(processor_t *p, reg_t pc, insn_t insn) +{ + FILE *log_file = p->get_log_file(); + + auto& reg = p->get_state()->log_reg_write; + auto& load = p->get_state()->log_mem_read; + auto& store = p->get_state()->log_mem_write; + int priv = p->get_state()->last_inst_priv; + int xlen = p->get_state()->last_inst_xlen; + int flen = p->get_state()->last_inst_flen; + + // print core id on all lines so it is easy to grep + uint64_t id = p->get_csr(CSR_MHARTID); + fprintf(log_file, "core%4" PRId64 ": ", id); + + fprintf(log_file, "%1d ", priv); + commit_log_print_value(log_file, xlen, pc); + fprintf(log_file, " ("); + commit_log_print_value(log_file, insn.length() * 8, insn.bits()); + fprintf(log_file, ")"); + bool show_vec = false; + + for (auto item : reg) { + if (item.first == 0) + continue; + + char prefix; + int size; + int rd = item.first >> 4; + bool is_vec = false; + bool is_vreg = false; + switch (item.first & 0xf) { + case 0: + size = xlen; + prefix = 'x'; + break; + case 1: + size = flen; + prefix = 'f'; + break; + case 2: + size = p->VU.VLEN; + prefix = 'v'; + is_vreg = true; + break; + case 3: + is_vec = true; + break; + case 4: + size = xlen; + prefix = 'c'; + break; + default: + assert("can't been here" && 0); + break; + } + + if (!show_vec && (is_vreg || is_vec)) { + fprintf(log_file, " e%ld %s%ld l%ld", + p->VU.vsew, + p->VU.vflmul < 1 ? "mf" : "m", + p->VU.vflmul < 1 ? (reg_t)(1 / p->VU.vflmul) : (reg_t)p->VU.vflmul, + p->VU.vl); + show_vec = true; + } + + if (!is_vec) { + if (prefix == 'c') + fprintf(log_file, " c%d_%s ", rd, csr_name(rd)); + else + fprintf(log_file, " %c%2d ", prefix, rd); + if (is_vreg) + commit_log_print_value(log_file, size, &p->VU.elt(rd, 0)); + else + commit_log_print_value(log_file, size, item.second.v); + } } - reg.addr = 0; -#endif + + for (auto item : load) { + fprintf(log_file, " mem "); + commit_log_print_value(log_file, xlen, std::get<0>(item)); + } + + for (auto item : store) { + fprintf(log_file, " mem "); + commit_log_print_value(log_file, xlen, std::get<0>(item)); + fprintf(log_file, " "); + commit_log_print_value(log_file, std::get<2>(item) << 3, std::get<1>(item)); + } + fprintf(log_file, "\n"); } +#else +static void commit_log_reset(processor_t* p) {} +static void commit_log_stash_privilege(processor_t* p) {} +static void commit_log_print_insn(processor_t* p, reg_t pc, insn_t insn) {} +#endif inline void processor_t::update_histogram(reg_t pc) { @@ -74,26 +173,55 @@ inline void processor_t::update_histogram(reg_t pc) // function calls. static reg_t execute_insn(processor_t* p, reg_t pc, insn_fetch_t fetch) { + commit_log_reset(p); commit_log_stash_privilege(p); - reg_t npc = fetch.func(p, fetch.insn, pc); - if (npc != PC_SERIALIZE_BEFORE) { - commit_log_print_insn(p->get_state(), pc, fetch.insn); - p->update_histogram(pc); + reg_t npc; + + try { + npc = fetch.func(p, fetch.insn, pc); + if (npc != PC_SERIALIZE_BEFORE) { + +#ifdef RISCV_ENABLE_COMMITLOG + if (p->get_log_commits_enabled()) { + commit_log_print_insn(p, pc, fetch.insn); + } +#endif + + } +#ifdef RISCV_ENABLE_COMMITLOG + } catch(mem_trap_t& t) { + //handle segfault in midlle of vector load/store + if (p->get_log_commits_enabled()) { + for (auto item : p->get_state()->log_reg_write) { + if ((item.first & 3) == 3) { + commit_log_print_insn(p, pc, fetch.insn); + break; + } + } + } + throw; +#endif + } catch(...) { + throw; } + p->update_histogram(pc); + return npc; } bool processor_t::slow_path() { - return debug || state.single_step != state.STEP_NONE || state.dcsr.cause; + return debug || state.single_step != state.STEP_NONE || state.debug_mode; } // fetch/decode/execute loop void processor_t::step(size_t n) { - if (state.dcsr.cause == DCSR_CAUSE_NONE) { - if (halt_request) { + if (!state.debug_mode) { + if (halt_request == HR_REGULAR) { enter_debug_mode(DCSR_CAUSE_DEBUGINT); + } else if (halt_request == HR_GROUP) { + enter_debug_mode(DCSR_CAUSE_GROUP); } // !!!The halt bit in DCSR is deprecated. else if (state.dcsr.halt) { enter_debug_mode(DCSR_CAUSE_HALT); @@ -130,7 +258,7 @@ void processor_t::step(size_t n) { if (unlikely(!state.serialized && state.single_step == state.STEP_STEPPED)) { state.single_step = state.STEP_NONE; - if (state.dcsr.cause == DCSR_CAUSE_NONE) { + if (!state.debug_mode) { enter_debug_mode(DCSR_CAUSE_STEP); // enter_debug_mode changed state.pc, so we can't just continue. break; @@ -145,15 +273,8 @@ void processor_t::step(size_t n) if (debug && !state.serialized) disasm(fetch.insn); pc = execute_insn(this, pc, fetch); - + pc = this->hwLoops.handle_loops(state.pc, pc, fetch.insn); advance_pc(); - - if (unlikely(state.pc >= DEBUG_ROM_ENTRY && - state.pc < DEBUG_END)) { - // We're waiting for the debugger to tell us something. - return; - } - } } else while (instret < n) @@ -185,9 +306,13 @@ void processor_t::step(size_t n) // This macro is included in "icache.h" included within the switch // statement below. The indirect jump corresponding to the instruction // is located within the execute_insn() function call. + + // Todo: Is it a good idea to add hwloops here or is forcing slow-path better? + // trade-off between speed of hwloops and speed of everything else #define ICACHE_ACCESS(i) { \ insn_fetch_t fetch = ic_entry->data; \ pc = execute_insn(this, pc, fetch); \ + pc = this->hwLoops.handle_loops(state.pc, pc, fetch.insn); \ ic_entry = ic_entry->next; \ if (i == mmu_t::ICACHE_ENTRIES-1) break; \ if (unlikely(ic_entry->tag != pc)) break; \ @@ -236,7 +361,7 @@ void processor_t::step(size_t n) enter_debug_mode(DCSR_CAUSE_HWBP); break; case ACTION_DEBUG_EXCEPTION: { - mem_trap_t trap(CAUSE_BREAKPOINT, t.address); + insn_trap_t trap(CAUSE_BREAKPOINT, t.address); take_trap(trap, pc); break; } @@ -244,6 +369,16 @@ void processor_t::step(size_t n) abort(); } } + catch (wait_for_interrupt_t &t) + { + // Return to the outer simulation loop, which gives other devices/harts a + // chance to generate interrupts. + // + // In the debug ROM this prevents us from wasting time looping, but also + // allows us to switch to other threads only once per idle loop in case + // there is activity. + n = instret; + } state.minstret += instret; n -= instret; diff --git a/riscv/extensions.cc b/riscv/extensions.cc index d1690c4b8f..347dc5e915 100644 --- a/riscv/extensions.cc +++ b/riscv/extensions.cc @@ -21,14 +21,23 @@ std::function find_extension(const char* name) if (!extensions().count(name)) { // try to find extension xyz by loading libxyz.so std::string libname = std::string("lib") + name + ".so"; - if (!dlopen(libname.c_str(), RTLD_LAZY)) { - fprintf(stderr, "couldn't find extension '%s' (or library '%s')\n", - name, libname.c_str()); - exit(-1); + std::string libdefault = "libcustomext.so"; + bool is_default = false; + auto dlh = dlopen(libname.c_str(), RTLD_LAZY); + if (!dlh) { + dlh = dlopen(libdefault.c_str(), RTLD_LAZY); + if (!dlh) { + fprintf(stderr, "couldn't find shared library either '%s' or '%s')\n", + libname.c_str(), libdefault.c_str()); + exit(-1); + } + + is_default = true; } + if (!extensions().count(name)) { fprintf(stderr, "couldn't find extension '%s' in shared library '%s'\n", - name, libname.c_str()); + name, is_default ? libdefault.c_str() : libname.c_str()); exit(-1); } } diff --git a/riscv/gen_icache b/riscv/gen_icache index 7ec3c69434..67c0d69f1a 100755 --- a/riscv/gen_icache +++ b/riscv/gen_icache @@ -1,7 +1,8 @@ #!/bin/sh -n=$(($1-1)) -for i in `seq 0 $n` +i=0 +while [ $i -lt $1 ] do echo case $i: ICACHE_ACCESS\($i\)\; + i=$((i+1)) done echo diff --git a/riscv/insn_template.h b/riscv/insn_template.h index 07aa16ba05..3c36d10e52 100644 --- a/riscv/insn_template.h +++ b/riscv/insn_template.h @@ -1,7 +1,7 @@ // See LICENSE for license details. +#include "arith.h" #include "mmu.h" -#include "mulhi.h" #include "softfloat.h" #include "internals.h" #include "specialize.h" diff --git a/riscv/insns/c_ebreak.h b/riscv/insns/c_ebreak.h index 128b86b22c..1c36b2418b 100644 --- a/riscv/insns/c_ebreak.h +++ b/riscv/insns/c_ebreak.h @@ -1,2 +1,2 @@ require_extension('C'); -throw trap_breakpoint(pc); +throw trap_breakpoint(0); diff --git a/riscv/insns/csrrc.h b/riscv/insns/csrrc.h index 0472d80efd..37384b0e52 100644 --- a/riscv/insns/csrrc.h +++ b/riscv/insns/csrrc.h @@ -1,6 +1,6 @@ bool write = insn.rs1() != 0; int csr = validate_csr(insn.csr(), write); -reg_t old = p->get_csr(csr); +reg_t old = p->get_csr(csr, insn, write); if (write) { p->set_csr(csr, old & ~RS1); } diff --git a/riscv/insns/csrrci.h b/riscv/insns/csrrci.h index 4d83cc0617..ad40c8f4c2 100644 --- a/riscv/insns/csrrci.h +++ b/riscv/insns/csrrci.h @@ -1,6 +1,6 @@ bool write = insn.rs1() != 0; int csr = validate_csr(insn.csr(), write); -reg_t old = p->get_csr(csr); +reg_t old = p->get_csr(csr, insn, write); if (write) { p->set_csr(csr, old & ~(reg_t)insn.rs1()); } diff --git a/riscv/insns/csrrs.h b/riscv/insns/csrrs.h index 4e8bde9637..91fcc7a347 100644 --- a/riscv/insns/csrrs.h +++ b/riscv/insns/csrrs.h @@ -1,6 +1,6 @@ bool write = insn.rs1() != 0; int csr = validate_csr(insn.csr(), write); -reg_t old = p->get_csr(csr); +reg_t old = p->get_csr(csr, insn, write); if (write) { p->set_csr(csr, old | RS1); } diff --git a/riscv/insns/csrrsi.h b/riscv/insns/csrrsi.h index b673725b54..f348e570bd 100644 --- a/riscv/insns/csrrsi.h +++ b/riscv/insns/csrrsi.h @@ -1,6 +1,6 @@ bool write = insn.rs1() != 0; int csr = validate_csr(insn.csr(), write); -reg_t old = p->get_csr(csr); +reg_t old = p->get_csr(csr, insn, write); if (write) { p->set_csr(csr, old | insn.rs1()); } diff --git a/riscv/insns/csrrw.h b/riscv/insns/csrrw.h index e45420b570..cc0c28dc95 100644 --- a/riscv/insns/csrrw.h +++ b/riscv/insns/csrrw.h @@ -1,5 +1,5 @@ int csr = validate_csr(insn.csr(), true); -reg_t old = p->get_csr(csr); +reg_t old = p->get_csr(csr, insn, true); p->set_csr(csr, RS1); WRITE_RD(sext_xlen(old)); serialize(); diff --git a/riscv/insns/csrrwi.h b/riscv/insns/csrrwi.h index decadf4121..4d5d06468b 100644 --- a/riscv/insns/csrrwi.h +++ b/riscv/insns/csrrwi.h @@ -1,5 +1,5 @@ int csr = validate_csr(insn.csr(), true); -reg_t old = p->get_csr(csr); +reg_t old = p->get_csr(csr, insn, true); p->set_csr(csr, insn.rs1()); WRITE_RD(sext_xlen(old)); serialize(); diff --git a/riscv/insns/dret.h b/riscv/insns/dret.h index 35c19cb8a2..ba503a0c7f 100644 --- a/riscv/insns/dret.h +++ b/riscv/insns/dret.h @@ -1,9 +1,9 @@ -require_privilege(PRV_M); +require(STATE.debug_mode); set_pc_and_serialize(STATE.dpc); p->set_privilege(STATE.dcsr.prv); /* We're not in Debug Mode anymore. */ -STATE.dcsr.cause = 0; +STATE.debug_mode = false; if (STATE.dcsr.step) STATE.single_step = STATE.STEP_STEPPING; diff --git a/riscv/insns/ebreak.h b/riscv/insns/ebreak.h index 736cebef4b..f123f9544d 100644 --- a/riscv/insns/ebreak.h +++ b/riscv/insns/ebreak.h @@ -1 +1 @@ -throw trap_breakpoint(pc); +throw trap_breakpoint(0); diff --git a/riscv/insns/ecall.h b/riscv/insns/ecall.h index e298ac722b..e6c723f4e3 100644 --- a/riscv/insns/ecall.h +++ b/riscv/insns/ecall.h @@ -1,7 +1,11 @@ switch (STATE.prv) { case PRV_U: throw trap_user_ecall(); - case PRV_S: throw trap_supervisor_ecall(); + case PRV_S: + if (STATE.v) + throw trap_virtual_supervisor_ecall(); + else + throw trap_supervisor_ecall(); case PRV_M: throw trap_machine_ecall(); default: abort(); } diff --git a/riscv/insns/fadd_h.h b/riscv/insns/fadd_h.h new file mode 100644 index 0000000000..2b646ae77b --- /dev/null +++ b/riscv/insns/fadd_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_add(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/fclass_h.h b/riscv/insns/fclass_h.h new file mode 100644 index 0000000000..066a2d24d6 --- /dev/null +++ b/riscv/insns/fclass_h.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_RD(f16_classify(f16(FRS1))); diff --git a/riscv/insns/fcvt_d_h.h b/riscv/insns/fcvt_d_h.h new file mode 100644 index 0000000000..6906fc06c1 --- /dev/null +++ b/riscv/insns/fcvt_d_h.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_extension('D'); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_to_f64(f16(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_d.h b/riscv/insns/fcvt_h_d.h new file mode 100644 index 0000000000..f463dd58e6 --- /dev/null +++ b/riscv/insns/fcvt_h_d.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_extension('D'); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f64_to_f16(f64(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_l.h b/riscv/insns/fcvt_h_l.h new file mode 100644 index 0000000000..39178c2fd3 --- /dev/null +++ b/riscv/insns/fcvt_h_l.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_rv64; +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(i64_to_f16(RS1)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_lu.h b/riscv/insns/fcvt_h_lu.h new file mode 100644 index 0000000000..a872c48091 --- /dev/null +++ b/riscv/insns/fcvt_h_lu.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_rv64; +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(ui64_to_f16(RS1)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_q.h b/riscv/insns/fcvt_h_q.h new file mode 100644 index 0000000000..94b0001635 --- /dev/null +++ b/riscv/insns/fcvt_h_q.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_extension('Q'); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f128_to_f16(f128(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_s.h b/riscv/insns/fcvt_h_s.h new file mode 100644 index 0000000000..eb928e9785 --- /dev/null +++ b/riscv/insns/fcvt_h_s.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f32_to_f16(f32(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_w.h b/riscv/insns/fcvt_h_w.h new file mode 100644 index 0000000000..c08245451f --- /dev/null +++ b/riscv/insns/fcvt_h_w.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(i32_to_f16((int32_t)RS1)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_h_wu.h b/riscv/insns/fcvt_h_wu.h new file mode 100644 index 0000000000..9f2f5f6a7c --- /dev/null +++ b/riscv/insns/fcvt_h_wu.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(ui32_to_f16((uint32_t)RS1)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_l_h.h b/riscv/insns/fcvt_l_h.h new file mode 100644 index 0000000000..5a1fea850d --- /dev/null +++ b/riscv/insns/fcvt_l_h.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_rv64; +require_fp; +softfloat_roundingMode = RM; +WRITE_RD(f16_to_i64(f16(FRS1), RM, true)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_lu_h.h b/riscv/insns/fcvt_lu_h.h new file mode 100644 index 0000000000..f1454c3e99 --- /dev/null +++ b/riscv/insns/fcvt_lu_h.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_rv64; +require_fp; +softfloat_roundingMode = RM; +WRITE_RD(f16_to_ui64(f16(FRS1), RM, true)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_q_h.h b/riscv/insns/fcvt_q_h.h new file mode 100644 index 0000000000..8a5f6805c5 --- /dev/null +++ b/riscv/insns/fcvt_q_h.h @@ -0,0 +1,6 @@ +require_extension(EXT_ZFH); +require_extension('Q'); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_to_f128(f16(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_s_h.h b/riscv/insns/fcvt_s_h.h new file mode 100644 index 0000000000..bfa2e91497 --- /dev/null +++ b/riscv/insns/fcvt_s_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_to_f32(f16(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_w_h.h b/riscv/insns/fcvt_w_h.h new file mode 100644 index 0000000000..fe8bb48fb2 --- /dev/null +++ b/riscv/insns/fcvt_w_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_RD(sext32(f16_to_i32(f16(FRS1), RM, true))); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_wu_h.h b/riscv/insns/fcvt_wu_h.h new file mode 100644 index 0000000000..bf6648d3c6 --- /dev/null +++ b/riscv/insns/fcvt_wu_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_RD(sext32(f16_to_ui32(f16(FRS1), RM, true))); +set_fp_exceptions; diff --git a/riscv/insns/fdiv_h.h b/riscv/insns/fdiv_h.h new file mode 100644 index 0000000000..a169eae83a --- /dev/null +++ b/riscv/insns/fdiv_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_div(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/feq_h.h b/riscv/insns/feq_h.h new file mode 100644 index 0000000000..47e75a5b92 --- /dev/null +++ b/riscv/insns/feq_h.h @@ -0,0 +1,4 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_RD(f16_eq(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/fle_h.h b/riscv/insns/fle_h.h new file mode 100644 index 0000000000..9fc5968532 --- /dev/null +++ b/riscv/insns/fle_h.h @@ -0,0 +1,4 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_RD(f16_le(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/flh.h b/riscv/insns/flh.h new file mode 100644 index 0000000000..c887999398 --- /dev/null +++ b/riscv/insns/flh.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(f16(MMU.load_uint16(RS1 + insn.i_imm()))); diff --git a/riscv/insns/flt_h.h b/riscv/insns/flt_h.h new file mode 100644 index 0000000000..f516a38a62 --- /dev/null +++ b/riscv/insns/flt_h.h @@ -0,0 +1,4 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_RD(f16_lt(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/fmadd_h.h b/riscv/insns/fmadd_h.h new file mode 100644 index 0000000000..6551de5e30 --- /dev/null +++ b/riscv/insns/fmadd_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_mulAdd(f16(FRS1), f16(FRS2), f16(FRS3))); +set_fp_exceptions; diff --git a/riscv/insns/fmax_h.h b/riscv/insns/fmax_h.h new file mode 100644 index 0000000000..3d4c40ebf9 --- /dev/null +++ b/riscv/insns/fmax_h.h @@ -0,0 +1,4 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(f16_max(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/fmin_h.h b/riscv/insns/fmin_h.h new file mode 100644 index 0000000000..5fb1404fe0 --- /dev/null +++ b/riscv/insns/fmin_h.h @@ -0,0 +1,4 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(f16_min(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/fmsub_h.h b/riscv/insns/fmsub_h.h new file mode 100644 index 0000000000..934291fc81 --- /dev/null +++ b/riscv/insns/fmsub_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_mulAdd(f16(FRS1), f16(FRS2), f16(f16(FRS3).v ^ F16_SIGN))); +set_fp_exceptions; diff --git a/riscv/insns/fmul_h.h b/riscv/insns/fmul_h.h new file mode 100644 index 0000000000..0152df8f09 --- /dev/null +++ b/riscv/insns/fmul_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_mul(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/fmv_h_x.h b/riscv/insns/fmv_h_x.h new file mode 100644 index 0000000000..c022508e24 --- /dev/null +++ b/riscv/insns/fmv_h_x.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(f16(RS1)); diff --git a/riscv/insns/fmv_x_h.h b/riscv/insns/fmv_x_h.h new file mode 100644 index 0000000000..5e89c4f0f0 --- /dev/null +++ b/riscv/insns/fmv_x_h.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_RD(sext32((int16_t)(FRS1.v[0]))); diff --git a/riscv/insns/fnmadd_h.h b/riscv/insns/fnmadd_h.h new file mode 100644 index 0000000000..e4c619e77a --- /dev/null +++ b/riscv/insns/fnmadd_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_mulAdd(f16(f16(FRS1).v ^ F16_SIGN), f16(FRS2), f16(f16(FRS3).v ^ F16_SIGN))); +set_fp_exceptions; diff --git a/riscv/insns/fnmsub_h.h b/riscv/insns/fnmsub_h.h new file mode 100644 index 0000000000..0410c3bba6 --- /dev/null +++ b/riscv/insns/fnmsub_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_mulAdd(f16(f16(FRS1).v ^ F16_SIGN), f16(FRS2), f16(FRS3))); +set_fp_exceptions; diff --git a/riscv/insns/fsgnj_h.h b/riscv/insns/fsgnj_h.h new file mode 100644 index 0000000000..79d50f5fa3 --- /dev/null +++ b/riscv/insns/fsgnj_h.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(fsgnj16(FRS1, FRS2, false, false)); diff --git a/riscv/insns/fsgnjn_h.h b/riscv/insns/fsgnjn_h.h new file mode 100644 index 0000000000..ebb4ac9f50 --- /dev/null +++ b/riscv/insns/fsgnjn_h.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(fsgnj16(FRS1, FRS2, true, false)); diff --git a/riscv/insns/fsgnjx_h.h b/riscv/insns/fsgnjx_h.h new file mode 100644 index 0000000000..9310269545 --- /dev/null +++ b/riscv/insns/fsgnjx_h.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +WRITE_FRD(fsgnj16(FRS1, FRS2, false, true)); diff --git a/riscv/insns/fsh.h b/riscv/insns/fsh.h new file mode 100644 index 0000000000..b9fa4e0557 --- /dev/null +++ b/riscv/insns/fsh.h @@ -0,0 +1,3 @@ +require_extension(EXT_ZFH); +require_fp; +MMU.store_uint16(RS1 + insn.s_imm(), FRS2.v[0]); diff --git a/riscv/insns/fsqrt_h.h b/riscv/insns/fsqrt_h.h new file mode 100644 index 0000000000..138d572744 --- /dev/null +++ b/riscv/insns/fsqrt_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_sqrt(f16(FRS1))); +set_fp_exceptions; diff --git a/riscv/insns/fsub_h.h b/riscv/insns/fsub_h.h new file mode 100644 index 0000000000..43b51cc2eb --- /dev/null +++ b/riscv/insns/fsub_h.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFH); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD(f16_sub(f16(FRS1), f16(FRS2))); +set_fp_exceptions; diff --git a/riscv/insns/hfence_gvma.h b/riscv/insns/hfence_gvma.h new file mode 100644 index 0000000000..f1996d921f --- /dev/null +++ b/riscv/insns/hfence_gvma.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.mstatus, MSTATUS_TVM) ? PRV_M : PRV_S); +MMU.flush_tlb(); diff --git a/riscv/insns/hfence_vvma.h b/riscv/insns/hfence_vvma.h new file mode 100644 index 0000000000..ecd42c198c --- /dev/null +++ b/riscv/insns/hfence_vvma.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(PRV_S); +MMU.flush_tlb(); diff --git a/riscv/insns/hlv_b.h b/riscv/insns/hlv_b.h new file mode 100644 index 0000000000..86192c6328 --- /dev/null +++ b/riscv/insns/hlv_b.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_int8(RS1)); diff --git a/riscv/insns/hlv_bu.h b/riscv/insns/hlv_bu.h new file mode 100644 index 0000000000..2f951947d0 --- /dev/null +++ b/riscv/insns/hlv_bu.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_uint8(RS1)); diff --git a/riscv/insns/hlv_d.h b/riscv/insns/hlv_d.h new file mode 100644 index 0000000000..1bbd0277b4 --- /dev/null +++ b/riscv/insns/hlv_d.h @@ -0,0 +1,5 @@ +require_extension('H'); +require_rv64; +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_int64(RS1)); diff --git a/riscv/insns/hlv_h.h b/riscv/insns/hlv_h.h new file mode 100644 index 0000000000..6825fe46bd --- /dev/null +++ b/riscv/insns/hlv_h.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_int16(RS1)); diff --git a/riscv/insns/hlv_hu.h b/riscv/insns/hlv_hu.h new file mode 100644 index 0000000000..3d9d98e2f4 --- /dev/null +++ b/riscv/insns/hlv_hu.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_uint16(RS1)); diff --git a/riscv/insns/hlv_w.h b/riscv/insns/hlv_w.h new file mode 100644 index 0000000000..be420d37c5 --- /dev/null +++ b/riscv/insns/hlv_w.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_int32(RS1)); diff --git a/riscv/insns/hlv_wu.h b/riscv/insns/hlv_wu.h new file mode 100644 index 0000000000..851be27c6a --- /dev/null +++ b/riscv/insns/hlv_wu.h @@ -0,0 +1,5 @@ +require_extension('H'); +require_rv64; +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_uint32(RS1)); diff --git a/riscv/insns/hlvx_hu.h b/riscv/insns/hlvx_hu.h new file mode 100644 index 0000000000..19dbcfbf63 --- /dev/null +++ b/riscv/insns/hlvx_hu.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_x_uint16(RS1)); diff --git a/riscv/insns/hlvx_wu.h b/riscv/insns/hlvx_wu.h new file mode 100644 index 0000000000..4dfe702350 --- /dev/null +++ b/riscv/insns/hlvx_wu.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +WRITE_RD(MMU.guest_load_x_uint32(RS1)); diff --git a/riscv/insns/hsv_b.h b/riscv/insns/hsv_b.h new file mode 100644 index 0000000000..a5c34ff072 --- /dev/null +++ b/riscv/insns/hsv_b.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +MMU.guest_store_uint8(RS1, RS2); diff --git a/riscv/insns/hsv_d.h b/riscv/insns/hsv_d.h new file mode 100644 index 0000000000..14c6d5d913 --- /dev/null +++ b/riscv/insns/hsv_d.h @@ -0,0 +1,5 @@ +require_extension('H'); +require_rv64; +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +MMU.guest_store_uint64(RS1, RS2); diff --git a/riscv/insns/hsv_h.h b/riscv/insns/hsv_h.h new file mode 100644 index 0000000000..1cfe77aae8 --- /dev/null +++ b/riscv/insns/hsv_h.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +MMU.guest_store_uint16(RS1, RS2); diff --git a/riscv/insns/hsv_w.h b/riscv/insns/hsv_w.h new file mode 100644 index 0000000000..d54f6731ee --- /dev/null +++ b/riscv/insns/hsv_w.h @@ -0,0 +1,4 @@ +require_extension('H'); +require_novirt(); +require_privilege(get_field(STATE.hstatus, HSTATUS_HU) ? PRV_U : PRV_S); +MMU.guest_store_uint32(RS1, RS2); diff --git a/riscv/insns/lp_count.h b/riscv/insns/lp_count.h new file mode 100644 index 0000000000..99c8099882 --- /dev/null +++ b/riscv/insns/lp_count.h @@ -0,0 +1,4 @@ +reg_t num_iter = zext_xlen(RS1); +bool loopNr = insn.p_loop(); + +p->hwLoops.set_count(loopNr, num_iter); diff --git a/riscv/insns/lp_counti.h b/riscv/insns/lp_counti.h new file mode 100644 index 0000000000..6c78ceaa89 --- /dev/null +++ b/riscv/insns/lp_counti.h @@ -0,0 +1,4 @@ +reg_t num_iter = zext_xlen(insn.p_uimmL()); +bool loopNr = insn.p_loop(); + +p->hwLoops.set_count(loopNr, num_iter); diff --git a/riscv/insns/lp_endi.h b/riscv/insns/lp_endi.h new file mode 100644 index 0000000000..74d155f95d --- /dev/null +++ b/riscv/insns/lp_endi.h @@ -0,0 +1,6 @@ +reg_t offset = zext_xlen(insn.p_uimmL() << 1); +bool loopNr = insn.p_loop(); + +reg_t end_addr = pc + offset; + +p->hwLoops.set_end(loopNr, end_addr); diff --git a/riscv/insns/lp_setup.h b/riscv/insns/lp_setup.h new file mode 100644 index 0000000000..2c29a1f4ca --- /dev/null +++ b/riscv/insns/lp_setup.h @@ -0,0 +1,10 @@ +reg_t num_iter = zext_xlen(RS1); +reg_t offset = zext_xlen(insn.p_uimmL() << 1); +bool loopNr = insn.p_loop(); + +reg_t start_addr = npc; // next pc (pc+4) +reg_t end_addr = pc + offset; + +p->hwLoops.set_end(loopNr, end_addr); +p->hwLoops.set_start(loopNr, start_addr); +p->hwLoops.set_count(loopNr, num_iter); diff --git a/riscv/insns/lp_setupi.h b/riscv/insns/lp_setupi.h new file mode 100644 index 0000000000..f824b7bcf4 --- /dev/null +++ b/riscv/insns/lp_setupi.h @@ -0,0 +1,10 @@ +reg_t num_iter = zext_xlen(insn.p_uimmL()); +reg_t offset = zext_xlen(insn.p_uimmS() << 1); +bool loopNr = insn.p_loop(); + +reg_t start_addr = npc; // next pc (pc+4) +reg_t end_addr = pc + offset; + +p->hwLoops.set_end(loopNr, end_addr); +p->hwLoops.set_start(loopNr, start_addr); +p->hwLoops.set_count(loopNr, num_iter); diff --git a/riscv/insns/lp_starti.h b/riscv/insns/lp_starti.h new file mode 100644 index 0000000000..09b3acd96f --- /dev/null +++ b/riscv/insns/lp_starti.h @@ -0,0 +1,6 @@ +reg_t offset = zext_xlen(insn.p_uimmL() << 1); +bool loopNr = insn.p_loop(); + +reg_t start_addr = pc + offset; + +p->hwLoops.set_start(loopNr, start_addr); diff --git a/riscv/insns/lr_d.h b/riscv/insns/lr_d.h index 52090c31b8..3f3521be54 100644 --- a/riscv/insns/lr_d.h +++ b/riscv/insns/lr_d.h @@ -1,4 +1,5 @@ require_extension('A'); require_rv64; +auto res = MMU.load_int64(RS1); MMU.acquire_load_reservation(RS1); -WRITE_RD(MMU.load_int64(RS1)); +WRITE_RD(res); diff --git a/riscv/insns/lr_w.h b/riscv/insns/lr_w.h index c5845a68e1..8605cc5df9 100644 --- a/riscv/insns/lr_w.h +++ b/riscv/insns/lr_w.h @@ -1,3 +1,4 @@ require_extension('A'); +auto res = MMU.load_int32(RS1); MMU.acquire_load_reservation(RS1); -WRITE_RD(MMU.load_int32(RS1)); +WRITE_RD(res); diff --git a/riscv/insns/mret.h b/riscv/insns/mret.h index 96933cf672..cedfc72840 100644 --- a/riscv/insns/mret.h +++ b/riscv/insns/mret.h @@ -2,8 +2,10 @@ require_privilege(PRV_M); set_pc_and_serialize(p->get_state()->mepc); reg_t s = STATE.mstatus; reg_t prev_prv = get_field(s, MSTATUS_MPP); +reg_t prev_virt = get_field(s, MSTATUS_MPV); s = set_field(s, MSTATUS_MIE, get_field(s, MSTATUS_MPIE)); s = set_field(s, MSTATUS_MPIE, 1); s = set_field(s, MSTATUS_MPP, PRV_U); -p->set_privilege(prev_prv); p->set_csr(CSR_MSTATUS, s); +p->set_privilege(prev_prv); +p->set_virt(prev_virt); diff --git a/riscv/insns/p_abs.h b/riscv/insns/p_abs.h new file mode 100644 index 0000000000..409446b218 --- /dev/null +++ b/riscv/insns/p_abs.h @@ -0,0 +1,4 @@ +if(sreg_t(RS1) > 0) + WRITE_RD(RS1); +else + WRITE_RD(-RS1); diff --git a/riscv/insns/p_addN.h b/riscv/insns/p_addN.h new file mode 100644 index 0000000000..303363ee9c --- /dev/null +++ b/riscv/insns/p_addN.h @@ -0,0 +1,7 @@ +sreg_t term1 = sext_xlen(RS1); +sreg_t term2 = sext_xlen(RS2); +int norm = insn.p_Luimm5(); + +sreg_t res = (term1 + term2) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_addNr.h b/riscv/insns/p_addNr.h new file mode 100644 index 0000000000..22018dd3d0 --- /dev/null +++ b/riscv/insns/p_addNr.h @@ -0,0 +1,7 @@ +sreg_t term1 = sext_xlen(RD); +sreg_t term2 = sext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] + +sreg_t res = (term1 + term2) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_addRN.h b/riscv/insns/p_addRN.h new file mode 100644 index 0000000000..7fe4082288 --- /dev/null +++ b/riscv/insns/p_addRN.h @@ -0,0 +1,8 @@ +sreg_t term1 = sext_xlen(RS1); +sreg_t term2 = sext_xlen(RS2); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +sreg_t res = ((term1 + term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_addRNr.h b/riscv/insns/p_addRNr.h new file mode 100644 index 0000000000..a41ec1569f --- /dev/null +++ b/riscv/insns/p_addRNr.h @@ -0,0 +1,8 @@ +sreg_t term1 = sext_xlen(RD); +sreg_t term2 = sext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +sreg_t res = ((term1 + term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_adduN.h b/riscv/insns/p_adduN.h new file mode 100644 index 0000000000..d1844e2ebf --- /dev/null +++ b/riscv/insns/p_adduN.h @@ -0,0 +1,9 @@ +reg_t term1 = zext_xlen(RS1); +reg_t term2 = zext_xlen(RS2); +int norm = insn.p_Luimm5(); + +reg_t res = (term1 + term2) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_adduNr.h b/riscv/insns/p_adduNr.h new file mode 100644 index 0000000000..e0205d2571 --- /dev/null +++ b/riscv/insns/p_adduNr.h @@ -0,0 +1,9 @@ +reg_t term1 = zext_xlen(RD); +reg_t term2 = zext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] + +reg_t res = (term1 + term2) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_adduRN.h b/riscv/insns/p_adduRN.h new file mode 100644 index 0000000000..53734b3c98 --- /dev/null +++ b/riscv/insns/p_adduRN.h @@ -0,0 +1,10 @@ +reg_t term1 = zext_xlen(RS1); +reg_t term2 = zext_xlen(RS2); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +reg_t res = ((term1 + term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_adduRNr.h b/riscv/insns/p_adduRNr.h new file mode 100644 index 0000000000..7ab19b3cd3 --- /dev/null +++ b/riscv/insns/p_adduRNr.h @@ -0,0 +1,10 @@ +reg_t term1 = zext_xlen(RD); +reg_t term2 = zext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +reg_t res = ((term1 + term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_bclr.h b/riscv/insns/p_bclr.h new file mode 100644 index 0000000000..b8244e6fc7 --- /dev/null +++ b/riscv/insns/p_bclr.h @@ -0,0 +1,8 @@ +sreg_t val = sext_xlen(RS1); +reg_t first = insn.p_zimm5(); +reg_t upto = insn.p_Luimm5(); + +reg_t bit_mask = ( (((reg_t)1<>1)%2; + res |= (val<<1)%4; + break; + case 1: // res[1:0] = val[1:0] + res = val%4; + break; + default: + res = 0; + break; +} +val >>=2; // remove LSBs from val (prep first block) + + +reg_t temp; +for(int i=0; i<5; i++) +{ + res <<=6; // shift finished part to safety + temp = (val & 0x3F) * mult_masks[group]; // create copies + temp = temp & and_masks[group]; // select bits from copies + res |= (temp % 255); // collapse selected bits together + val >>=6; // prep next block +} + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_bneimm.h b/riscv/insns/p_bneimm.h new file mode 100644 index 0000000000..ef565ccfd4 --- /dev/null +++ b/riscv/insns/p_bneimm.h @@ -0,0 +1,2 @@ +if(sreg_t(RS1) != insn.p_simm5()) + set_pc(BRANCH_TARGET); diff --git a/riscv/insns/p_bset.h b/riscv/insns/p_bset.h new file mode 100644 index 0000000000..d7fec87de5 --- /dev/null +++ b/riscv/insns/p_bset.h @@ -0,0 +1,8 @@ +sreg_t val = sext_xlen(RS1); +reg_t first = insn.p_zimm5(); +reg_t upto = insn.p_Luimm5(); + +reg_t set_mask = ( (((reg_t)1< rd = 0, not 32 + } +#else + if(val != 0x00) // rs1 = 0 -> rd = 0, not 32 + { + if(val >= ((reg_t)1 << 31)) { + // turn leading 1s into leading 0s + val = ~val; + } + val <<= 1; // to distinguish -1 from -2 + + // modified log2() from standfords bithacks (find highest '1') + const unsigned int b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; + const unsigned int S[] = {1, 2, 4, 8, 16}; + for (int i = 4; i >= 0; i--) + { + if (val & b[i]) + { + val >>= S[i]; + cnt |= S[i]; + } + } + + cnt = 32 - cnt; // pos of MSB+1 to #leading bits + } + +#endif + +WRITE_RD(cnt); diff --git a/riscv/insns/p_clip.h b/riscv/insns/p_clip.h new file mode 100644 index 0000000000..4cc255f00d --- /dev/null +++ b/riscv/insns/p_clip.h @@ -0,0 +1,9 @@ +sreg_t clip_lower = insn.p_zimm5() ? -(1 << (insn.p_zimm5() - 1)) : -1; +sreg_t clip_upper = insn.p_zimm5() ? ((1 << (insn.p_zimm5() - 1)) - 1) : 0; + +if(sreg_t(RS1) <= clip_lower) + WRITE_RD(clip_lower); +else if(sreg_t(RS1) >= clip_upper) + WRITE_RD(clip_upper); +else + WRITE_RD(sreg_t(RS1)); diff --git a/riscv/insns/p_clipr.h b/riscv/insns/p_clipr.h new file mode 100644 index 0000000000..abe0846e57 --- /dev/null +++ b/riscv/insns/p_clipr.h @@ -0,0 +1,6 @@ +if(sreg_t(RS1) <= -(sreg_t(RS2) + 1)) + WRITE_RD(-(sreg_t(RS2) + 1)); +else if(sreg_t(RS1) >= sreg_t(RS2)) + WRITE_RD(sreg_t(RS2)); +else + WRITE_RD(sreg_t(RS1)); diff --git a/riscv/insns/p_clipu.h b/riscv/insns/p_clipu.h new file mode 100644 index 0000000000..aa4da5858e --- /dev/null +++ b/riscv/insns/p_clipu.h @@ -0,0 +1,8 @@ +sreg_t clipu_upper = insn.p_zimm5() ? ((1 << (insn.p_zimm5() - 1)) - 1) : 0; + +if(sreg_t(RS1) <= 0) + WRITE_RD(0); +else if(sreg_t(RS1) >= clipu_upper) + WRITE_RD(clipu_upper); +else + WRITE_RD(sreg_t(RS1)); diff --git a/riscv/insns/p_clipur.h b/riscv/insns/p_clipur.h new file mode 100644 index 0000000000..08c03703a4 --- /dev/null +++ b/riscv/insns/p_clipur.h @@ -0,0 +1,6 @@ +if(sreg_t(RS1) <= 0) + WRITE_RD(0); +else if(sreg_t(RS1) >= sreg_t(RS2)) + WRITE_RD(sreg_t(RS2)); +else + WRITE_RD(sreg_t(RS1)); diff --git a/riscv/insns/p_cnt.h b/riscv/insns/p_cnt.h new file mode 100644 index 0000000000..7ffc93ad2b --- /dev/null +++ b/riscv/insns/p_cnt.h @@ -0,0 +1,14 @@ +reg_t val = zext_xlen(RS1); +reg_t cnt; + +#ifdef __GNUC__ + // "Returns the number of 1-bits in x." + cnt = __builtin_popcount(val); +#else + cnt = 0; + for(cnt = 0; val != 0x00; ++cnt) + { + val &= val - 1; + } +#endif +WRITE_RD(cnt); diff --git a/riscv/insns/p_extbs.h b/riscv/insns/p_extbs.h new file mode 100644 index 0000000000..de3e100e89 --- /dev/null +++ b/riscv/insns/p_extbs.h @@ -0,0 +1 @@ +WRITE_RD(sext8(RS1)); diff --git a/riscv/insns/p_extbz.h b/riscv/insns/p_extbz.h new file mode 100644 index 0000000000..419622cbaf --- /dev/null +++ b/riscv/insns/p_extbz.h @@ -0,0 +1 @@ +WRITE_RD(zext8(RS1)); diff --git a/riscv/insns/p_exths.h b/riscv/insns/p_exths.h new file mode 100644 index 0000000000..16cbe7b5a7 --- /dev/null +++ b/riscv/insns/p_exths.h @@ -0,0 +1 @@ +WRITE_RD(sext16(RS1)); diff --git a/riscv/insns/p_exthz.h b/riscv/insns/p_exthz.h new file mode 100644 index 0000000000..8b548b6ffa --- /dev/null +++ b/riscv/insns/p_exthz.h @@ -0,0 +1 @@ +WRITE_RD(zext16(RS1)); diff --git a/riscv/insns/p_extract.h b/riscv/insns/p_extract.h new file mode 100644 index 0000000000..c45ffde0a8 --- /dev/null +++ b/riscv/insns/p_extract.h @@ -0,0 +1,8 @@ +sreg_t val = sext_xlen(RS1); +reg_t first = insn.p_zimm5(); +reg_t upto = insn.p_Luimm5(); + +sreg_t res = val >> first; +res = vsext(res, upto+1); + +WRITE_RD(res); diff --git a/riscv/insns/p_extractr.h b/riscv/insns/p_extractr.h new file mode 100644 index 0000000000..db131ee423 --- /dev/null +++ b/riscv/insns/p_extractr.h @@ -0,0 +1,8 @@ +sreg_t val = sext_xlen(RS1); +reg_t first = zextr(RS2, 4, 0); // rs1[4:0] +reg_t upto = zextr(RS2, 9, 5); // rs1[9:5] + +sreg_t res = val >> first; +res = vsext(res, upto+1); + +WRITE_RD(res); diff --git a/riscv/insns/p_extractu.h b/riscv/insns/p_extractu.h new file mode 100644 index 0000000000..60b5f78870 --- /dev/null +++ b/riscv/insns/p_extractu.h @@ -0,0 +1,8 @@ +sreg_t val = zext_xlen(RS1); +reg_t first = insn.p_zimm5(); +reg_t upto = insn.p_Luimm5(); + +sreg_t res = val >> first; +res = vzext(res, upto+1); + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_extractur.h b/riscv/insns/p_extractur.h new file mode 100644 index 0000000000..eb30730b7c --- /dev/null +++ b/riscv/insns/p_extractur.h @@ -0,0 +1,8 @@ +sreg_t val = zext_xlen(RS1); +reg_t first = zextr(RS2, 4, 0); // rs1[4:0] +reg_t upto = zextr(RS2, 9, 5); // rs1[9:5] + +sreg_t res = val >> first; +res = vzext(res, upto+1); + +WRITE_RD(sext_xlen(res)); \ No newline at end of file diff --git a/riscv/insns/p_ff1.h b/riscv/insns/p_ff1.h new file mode 100644 index 0000000000..8b8a0dc519 --- /dev/null +++ b/riscv/insns/p_ff1.h @@ -0,0 +1,24 @@ +reg_t val = zext_xlen(RS1); +reg_t cnt; + +#ifdef __GNUC__ + // "Returns one plus the index of the least significant 1-bit of x, or if x is zero, returns zero. " + cnt = __builtin_ffs(val); + if(cnt == 0) { + cnt = 32; // rs = 0 -> rd = 32 + } else { + cnt--; + } +#else + // count trailing zero bits from standfords bithacks + cnt = 32; + val &= -signed(val); + if (val) cnt--; + if (val & 0x0000FFFF) cnt -= 16; + if (val & 0x00FF00FF) cnt -= 8; + if (val & 0x0F0F0F0F) cnt -= 4; + if (val & 0x33333333) cnt -= 2; + if (val & 0x55555555) cnt -= 1; +#endif + +WRITE_RD(cnt); diff --git a/riscv/insns/p_fl1.h b/riscv/insns/p_fl1.h new file mode 100644 index 0000000000..37ee52b37c --- /dev/null +++ b/riscv/insns/p_fl1.h @@ -0,0 +1,32 @@ +reg_t val = zext_xlen(RS1);; +reg_t cnt; + +#ifdef __GNUC__ + if(val) { + // "Returns the number of leading 0-bits in x" x=0 -> undef + cnt = 31 - __builtin_clz(val); + } else { + // rs1 = 0 -> rd = 0, not 32 + cnt = 32; + } +#else + if(val){ + // log2() from standfords bithacks (find highest '1') + const unsigned int b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; + const unsigned int S[] = {1, 2, 4, 8, 16}; + cnt = 0; + for (int i = 4; i >= 0; i--) + { + if (val & b[i]) + { + val >>= S[i]; + cnt |= S[i]; + } + } + } else { + // rs1 = 0 -> rd = 0, not 32 + cnt = 32; + } +#endif + +WRITE_RD(cnt); diff --git a/riscv/insns/p_insert.h b/riscv/insns/p_insert.h new file mode 100644 index 0000000000..7989718553 --- /dev/null +++ b/riscv/insns/p_insert.h @@ -0,0 +1,19 @@ +sreg_t res = RD; +sreg_t val = zext_xlen(RS1); +reg_t first = insn.p_zimm5(); +reg_t upto = insn.p_Luimm5(); + +int offset = first+upto-31; +if(offset < 0) { + offset = 0; +} + +reg_t bit_mask = ( (((reg_t)1<> norm; + +WRITE_RD(sext_xlen(acc)); \ No newline at end of file diff --git a/riscv/insns/p_machhsRN.h b/riscv/insns/p_machhsRN.h new file mode 100644 index 0000000000..7cd965b45b --- /dev/null +++ b/riscv/insns/p_machhsRN.h @@ -0,0 +1,9 @@ +sreg_t mul1 = sext16(RS1_H(1)); +sreg_t mul2 = sext16(RS2_H(1)); +sreg_t acc = sext_xlen(P_RS3); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +acc = ((mul1 * mul2) + acc + halfbit) >> norm; + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/p_machhuN.h b/riscv/insns/p_machhuN.h new file mode 100644 index 0000000000..aa1d44d93b --- /dev/null +++ b/riscv/insns/p_machhuN.h @@ -0,0 +1,10 @@ +reg_t mul1 = zext16(RS1_H(1)); +reg_t mul2 = zext16(RS2_H(1)); +reg_t acc = zext_xlen(P_RS3); +int norm = insn.p_Luimm5(); + +acc = ((mul1 * mul2) + acc) >> norm; + +WRITE_RD(sext_xlen(acc)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_machhuRN.h b/riscv/insns/p_machhuRN.h new file mode 100644 index 0000000000..3fb0027056 --- /dev/null +++ b/riscv/insns/p_machhuRN.h @@ -0,0 +1,11 @@ +reg_t mul1 = zext16(RS1_H(1)); +reg_t mul2 = zext16(RS2_H(1)); +reg_t acc = zext_xlen(P_RS3); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +acc = ((mul1 * mul2) + acc + halfbit) >> norm; + +WRITE_RD(sext_xlen(acc)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_macsN.h b/riscv/insns/p_macsN.h new file mode 100644 index 0000000000..e32a4abbe2 --- /dev/null +++ b/riscv/insns/p_macsN.h @@ -0,0 +1,8 @@ +sreg_t mul1 = sext16(RS1_H(0)); +sreg_t mul2 = sext16(RS2_H(0)); +sreg_t acc = sext_xlen(P_RS3); +int norm = insn.p_Luimm5(); + +acc = ((mul1 * mul2) + acc) >> norm; + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/p_macsRN.h b/riscv/insns/p_macsRN.h new file mode 100644 index 0000000000..e5dbe68afa --- /dev/null +++ b/riscv/insns/p_macsRN.h @@ -0,0 +1,9 @@ +sreg_t mul1 = sext16(RS1_H(0)); +sreg_t mul2 = sext16(RS2_H(0)); +sreg_t acc = sext_xlen(P_RS3); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +acc = ((mul1 * mul2) + acc + halfbit) >> norm; + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/p_macuN.h b/riscv/insns/p_macuN.h new file mode 100644 index 0000000000..60570d7c07 --- /dev/null +++ b/riscv/insns/p_macuN.h @@ -0,0 +1,10 @@ +reg_t mul1 = zext16(RS1_H(0)); +reg_t mul2 = zext16(RS2_H(0)); +reg_t acc = zext_xlen(P_RS3); +int norm = insn.p_Luimm5(); + +acc = ((mul1 * mul2) + acc) >> norm; + +WRITE_RD(sext_xlen(acc)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_macuRN.h b/riscv/insns/p_macuRN.h new file mode 100644 index 0000000000..ed360e8ebd --- /dev/null +++ b/riscv/insns/p_macuRN.h @@ -0,0 +1,11 @@ +reg_t mul1 = zext16(RS1_H(0)); +reg_t mul2 = zext16(RS2_H(0)); +reg_t acc = zext_xlen(P_RS3); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +acc = ((mul1 * mul2) + acc + halfbit) >> norm; + +WRITE_RD(sext_xlen(acc)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_max.h b/riscv/insns/p_max.h new file mode 100644 index 0000000000..7a7ddcd8e9 --- /dev/null +++ b/riscv/insns/p_max.h @@ -0,0 +1,4 @@ +if(sreg_t(RS1) > sreg_t(RS2)) + WRITE_RD(RS1); +else + WRITE_RD(RS2); diff --git a/riscv/insns/p_maxu.h b/riscv/insns/p_maxu.h new file mode 100644 index 0000000000..8cba177c7b --- /dev/null +++ b/riscv/insns/p_maxu.h @@ -0,0 +1,4 @@ +if(RS1 > RS2) + WRITE_RD(RS1); +else + WRITE_RD(RS2); diff --git a/riscv/insns/p_min.h b/riscv/insns/p_min.h new file mode 100644 index 0000000000..1fc66807ed --- /dev/null +++ b/riscv/insns/p_min.h @@ -0,0 +1,4 @@ +if(sreg_t(RS1) <= sreg_t(RS2)) + WRITE_RD(RS1); +else + WRITE_RD(RS2); diff --git a/riscv/insns/p_minu.h b/riscv/insns/p_minu.h new file mode 100644 index 0000000000..42339df5e6 --- /dev/null +++ b/riscv/insns/p_minu.h @@ -0,0 +1,4 @@ +if(RS1 <= RS2) + WRITE_RD(RS1); +else + WRITE_RD(RS2); diff --git a/riscv/insns/p_msu.h b/riscv/insns/p_msu.h new file mode 100644 index 0000000000..2a42cf05e0 --- /dev/null +++ b/riscv/insns/p_msu.h @@ -0,0 +1 @@ +WRITE_RD(sext_xlen(sreg_t(RD) - sext_xlen(sreg_t(RS1) * sreg_t(RS2)))); diff --git a/riscv/insns/p_mulhhsN.h b/riscv/insns/p_mulhhsN.h new file mode 100644 index 0000000000..e7cb59b6d1 --- /dev/null +++ b/riscv/insns/p_mulhhsN.h @@ -0,0 +1,7 @@ +sreg_t mul1 = sext16(RS1_H(1)); +sreg_t mul2 = sext16(RS2_H(1)); +int norm = insn.p_Luimm5(); + +sreg_t res = (mul1 * mul2) >> norm; + +WRITE_RD(sext_xlen(res)); \ No newline at end of file diff --git a/riscv/insns/p_mulhhsRN.h b/riscv/insns/p_mulhhsRN.h new file mode 100644 index 0000000000..a5ca521559 --- /dev/null +++ b/riscv/insns/p_mulhhsRN.h @@ -0,0 +1,8 @@ +sreg_t mul1 = sext16(RS1_H(1)); +sreg_t mul2 = sext16(RS2_H(1)); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +sreg_t res = ((mul1 * mul2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_mulhhuN.h b/riscv/insns/p_mulhhuN.h new file mode 100644 index 0000000000..5c1a4b38fa --- /dev/null +++ b/riscv/insns/p_mulhhuN.h @@ -0,0 +1,9 @@ +reg_t mul1 = zext16(RS1_H(1)); +reg_t mul2 = zext16(RS2_H(1)); +int norm = insn.p_Luimm5(); + +reg_t res = (mul1 * mul2) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_mulhhuRN.h b/riscv/insns/p_mulhhuRN.h new file mode 100644 index 0000000000..c51bfbcf0e --- /dev/null +++ b/riscv/insns/p_mulhhuRN.h @@ -0,0 +1,10 @@ +reg_t mul1 = zext16(RS1_H(1)); +reg_t mul2 = zext16(RS2_H(1)); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +reg_t res = ((mul1 * mul2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_mulsN.h b/riscv/insns/p_mulsN.h new file mode 100644 index 0000000000..f1cbb96194 --- /dev/null +++ b/riscv/insns/p_mulsN.h @@ -0,0 +1,7 @@ +sreg_t mul1 = sext16(RS1_H(0)); +sreg_t mul2 = sext16(RS2_H(0)); +int norm = insn.p_Luimm5(); + +sreg_t res = (mul1 * mul2) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_mulsRN.h b/riscv/insns/p_mulsRN.h new file mode 100644 index 0000000000..d7dfccdd05 --- /dev/null +++ b/riscv/insns/p_mulsRN.h @@ -0,0 +1,8 @@ +sreg_t mul1 = sext16(RS1_H(0)); +sreg_t mul2 = sext16(RS2_H(0)); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +sreg_t res = ((mul1 * mul2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_muluN.h b/riscv/insns/p_muluN.h new file mode 100644 index 0000000000..539610f750 --- /dev/null +++ b/riscv/insns/p_muluN.h @@ -0,0 +1,9 @@ +reg_t mul1 = zext16(RS1_H(0)); +reg_t mul2 = zext16(RS2_H(0)); +int norm = insn.p_Luimm5(); + +reg_t res = (mul1 * mul2) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_muluRN.h b/riscv/insns/p_muluRN.h new file mode 100644 index 0000000000..9fd8178615 --- /dev/null +++ b/riscv/insns/p_muluRN.h @@ -0,0 +1,10 @@ +reg_t mul1 = zext16(RS1_H(0)); +reg_t mul2 = zext16(RS2_H(0)); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +reg_t res = ((mul1 * mul2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_ror.h b/riscv/insns/p_ror.h new file mode 100644 index 0000000000..124c3aa3b0 --- /dev/null +++ b/riscv/insns/p_ror.h @@ -0,0 +1,6 @@ +reg_t val = zext_xlen(RS1); +reg_t rot = zext_xlen(RS2)%32; + +reg_t res = (val >> rot) | (((val << 32) >> rot)); + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_sb_irpost.h b/riscv/insns/p_sb_irpost.h new file mode 100644 index 0000000000..9339bc9cac --- /dev/null +++ b/riscv/insns/p_sb_irpost.h @@ -0,0 +1,2 @@ +MMU.store_uint8(RS1, RS2); +WRITE_RS1(RS1 + insn.s_imm()); diff --git a/riscv/insns/p_sb_rr.h b/riscv/insns/p_sb_rr.h new file mode 100644 index 0000000000..73e49727c4 --- /dev/null +++ b/riscv/insns/p_sb_rr.h @@ -0,0 +1 @@ +MMU.store_uint8(RS1 + sreg_t(P_RS3), RS2); diff --git a/riscv/insns/p_sb_rrpost.h b/riscv/insns/p_sb_rrpost.h new file mode 100644 index 0000000000..0442551744 --- /dev/null +++ b/riscv/insns/p_sb_rrpost.h @@ -0,0 +1,2 @@ +MMU.store_uint8(RS1, RS2); +WRITE_RS1(RS1 + sreg_t(P_RS3)); diff --git a/riscv/insns/p_sh_irpost.h b/riscv/insns/p_sh_irpost.h new file mode 100644 index 0000000000..f915c518dc --- /dev/null +++ b/riscv/insns/p_sh_irpost.h @@ -0,0 +1,2 @@ +MMU.store_uint16(RS1, RS2); +WRITE_RS1(RS1 + insn.s_imm()); diff --git a/riscv/insns/p_sh_rr.h b/riscv/insns/p_sh_rr.h new file mode 100644 index 0000000000..f3270bd561 --- /dev/null +++ b/riscv/insns/p_sh_rr.h @@ -0,0 +1 @@ +MMU.store_uint16(RS1 + sreg_t(P_RS3), RS2); diff --git a/riscv/insns/p_sh_rrpost.h b/riscv/insns/p_sh_rrpost.h new file mode 100644 index 0000000000..5043c62876 --- /dev/null +++ b/riscv/insns/p_sh_rrpost.h @@ -0,0 +1,2 @@ +MMU.store_uint16(RS1, RS2); +WRITE_RS1(RS1 + sreg_t(P_RS3)); diff --git a/riscv/insns/p_slet.h b/riscv/insns/p_slet.h new file mode 100644 index 0000000000..82f7cc2db1 --- /dev/null +++ b/riscv/insns/p_slet.h @@ -0,0 +1 @@ +WRITE_RD(sreg_t(RS1) <= sreg_t(RS2)); diff --git a/riscv/insns/p_sletu.h b/riscv/insns/p_sletu.h new file mode 100644 index 0000000000..12547170bb --- /dev/null +++ b/riscv/insns/p_sletu.h @@ -0,0 +1 @@ +WRITE_RD(RS1 <= RS2); diff --git a/riscv/insns/p_subN.h b/riscv/insns/p_subN.h new file mode 100644 index 0000000000..dec2151c3a --- /dev/null +++ b/riscv/insns/p_subN.h @@ -0,0 +1,7 @@ +sreg_t term1 = sext_xlen(RS1); +sreg_t term2 = sext_xlen(RS2); +int norm = insn.p_Luimm5(); + +sreg_t res = (term1 - term2) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_subNr.h b/riscv/insns/p_subNr.h new file mode 100644 index 0000000000..e1db45dfcc --- /dev/null +++ b/riscv/insns/p_subNr.h @@ -0,0 +1,7 @@ +sreg_t term1 = sext_xlen(RD); +sreg_t term2 = sext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] + +sreg_t res = (term1 - term2) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_subRN.h b/riscv/insns/p_subRN.h new file mode 100644 index 0000000000..d30ddb9ac3 --- /dev/null +++ b/riscv/insns/p_subRN.h @@ -0,0 +1,8 @@ +sreg_t term1 = sext_xlen(RS1); +sreg_t term2 = sext_xlen(RS2); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +sreg_t res = ((term1 - term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_subRNr.h b/riscv/insns/p_subRNr.h new file mode 100644 index 0000000000..be5d175f47 --- /dev/null +++ b/riscv/insns/p_subRNr.h @@ -0,0 +1,8 @@ +sreg_t term1 = sext_xlen(RD); +sreg_t term2 = sext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +sreg_t res = ((term1 - term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/p_subuN.h b/riscv/insns/p_subuN.h new file mode 100644 index 0000000000..3363cfe583 --- /dev/null +++ b/riscv/insns/p_subuN.h @@ -0,0 +1,9 @@ +reg_t term1 = zext_xlen(RS1); +reg_t term2 = zext_xlen(RS2); +int norm = insn.p_Luimm5(); + +reg_t res = (term1 - term2) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_subuNr.h b/riscv/insns/p_subuNr.h new file mode 100644 index 0000000000..074ed6c82b --- /dev/null +++ b/riscv/insns/p_subuNr.h @@ -0,0 +1,9 @@ +reg_t term1 = zext_xlen(RD); +reg_t term2 = zext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] + +reg_t res = (term1 - term2) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_subuRN.h b/riscv/insns/p_subuRN.h new file mode 100644 index 0000000000..656c483886 --- /dev/null +++ b/riscv/insns/p_subuRN.h @@ -0,0 +1,10 @@ +reg_t term1 = zext_xlen(RS1); +reg_t term2 = zext_xlen(RS2); +int norm = insn.p_Luimm5(); +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +reg_t res = ((term1 - term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_subuRNr.h b/riscv/insns/p_subuRNr.h new file mode 100644 index 0000000000..da421a4e08 --- /dev/null +++ b/riscv/insns/p_subuRNr.h @@ -0,0 +1,10 @@ +reg_t term1 = zext_xlen(RD); +reg_t term2 = zext_xlen(RS1); +int norm = RS2%0x20; // rs[4:0] +uint halfbit = ((uint)0x01 << (norm))/2; // for rounding + +reg_t res = ((term1 - term2) + halfbit) >> norm; + +WRITE_RD(sext_xlen(res)); // this is not a mistake, it needs to be signed here because + // Spike works with 64bit register representations internally. + // And it expects the results to be signed (see eg. slli) \ No newline at end of file diff --git a/riscv/insns/p_sw_irpost.h b/riscv/insns/p_sw_irpost.h new file mode 100644 index 0000000000..7ff0406fea --- /dev/null +++ b/riscv/insns/p_sw_irpost.h @@ -0,0 +1,2 @@ +MMU.store_uint32(RS1, RS2); +WRITE_RS1(RS1 + insn.s_imm()); diff --git a/riscv/insns/p_sw_rr.h b/riscv/insns/p_sw_rr.h new file mode 100644 index 0000000000..6bef97f73d --- /dev/null +++ b/riscv/insns/p_sw_rr.h @@ -0,0 +1 @@ +MMU.store_uint32(RS1 + sreg_t(P_RS3), RS2); diff --git a/riscv/insns/p_sw_rrpost.h b/riscv/insns/p_sw_rrpost.h new file mode 100644 index 0000000000..6382d6d801 --- /dev/null +++ b/riscv/insns/p_sw_rrpost.h @@ -0,0 +1,2 @@ +MMU.store_uint32(RS1, RS2); +WRITE_RS1(RS1 + sreg_t(P_RS3)); diff --git a/riscv/insns/pv_abs_b.h b/riscv/insns/pv_abs_b.h new file mode 100644 index 0000000000..c0bc089cc1 --- /dev/null +++ b/riscv/insns/pv_abs_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > 0 ? RS1_B(i) : -sext8(RS1_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_abs_h.h b/riscv/insns/pv_abs_h.h new file mode 100644 index 0000000000..42ca4ff3ca --- /dev/null +++ b/riscv/insns/pv_abs_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > 0 ? RS1_H(i) : -sext16(RS1_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_add_b.h b/riscv/insns/pv_add_b.h new file mode 100644 index 0000000000..ecae63a049 --- /dev/null +++ b/riscv/insns/pv_add_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) + sext8(RS2_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_add_h.h b/riscv/insns/pv_add_h.h new file mode 100644 index 0000000000..0a78665af0 --- /dev/null +++ b/riscv/insns/pv_add_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) + sext16(RS2_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_add_h_div2.h b/riscv/insns/pv_add_h_div2.h new file mode 100644 index 0000000000..d09a9e41f5 --- /dev/null +++ b/riscv/insns/pv_add_h_div2.h @@ -0,0 +1,18 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +int16_t temp; + +for(int i=0; i<(64/e16); i++) +{ + // (src1.h[i] + src2.h[i]) >> 1 doesn't work as shift + // will be performed in int32 (using overflows from add) + temp = src1.h[i] + src2.h[i]; + res.h[i] = temp >> 1; +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_add_h_div4.h b/riscv/insns/pv_add_h_div4.h new file mode 100644 index 0000000000..0dc157025c --- /dev/null +++ b/riscv/insns/pv_add_h_div4.h @@ -0,0 +1,18 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +int16_t temp; + +for(int i=0; i<(64/e16); i++) +{ + // (src1.h[i] + src2.h[i]) >> 2 doesn't work as shift + // will be performed in int32 (using overflows from add) + temp = src1.h[i] + src2.h[i]; + res.h[i] = temp >> 2; +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_add_h_div8.h b/riscv/insns/pv_add_h_div8.h new file mode 100644 index 0000000000..5abd486118 --- /dev/null +++ b/riscv/insns/pv_add_h_div8.h @@ -0,0 +1,18 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +int16_t temp; + +for(int i=0; i<(64/e16); i++) +{ + // (src1.h[i] + src2.h[i]) >> 3 doesn't work as shift + // will be performed in int32 (using overflows from add) + temp = src1.h[i] + src2.h[i]; + res.h[i] = temp >> 3; +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_add_sc_b.h b/riscv/insns/pv_add_sc_b.h new file mode 100644 index 0000000000..572b61c07f --- /dev/null +++ b/riscv/insns/pv_add_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) + sext8(RS2_B(0)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_add_sc_h.h b/riscv/insns/pv_add_sc_h.h new file mode 100644 index 0000000000..734a911b8f --- /dev/null +++ b/riscv/insns/pv_add_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) + sext16(RS2_H(0)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_add_sci_b.h b/riscv/insns/pv_add_sci_b.h new file mode 100644 index 0000000000..df47f1cb50 --- /dev/null +++ b/riscv/insns/pv_add_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) + insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_add_sci_h.h b/riscv/insns/pv_add_sci_h.h new file mode 100644 index 0000000000..907621c09f --- /dev/null +++ b/riscv/insns/pv_add_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) + insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_and_b.h b/riscv/insns/pv_and_b.h new file mode 100644 index 0000000000..d3711b762d --- /dev/null +++ b/riscv/insns/pv_and_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) & RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_and_h.h b/riscv/insns/pv_and_h.h new file mode 100644 index 0000000000..8bae35685b --- /dev/null +++ b/riscv/insns/pv_and_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) & RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_and_sc_b.h b/riscv/insns/pv_and_sc_b.h new file mode 100644 index 0000000000..b1e6c865e5 --- /dev/null +++ b/riscv/insns/pv_and_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) & RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_and_sc_h.h b/riscv/insns/pv_and_sc_h.h new file mode 100644 index 0000000000..2389d11e1b --- /dev/null +++ b/riscv/insns/pv_and_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) & RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_and_sci_b.h b/riscv/insns/pv_and_sci_b.h new file mode 100644 index 0000000000..7e4e9e0ac5 --- /dev/null +++ b/riscv/insns/pv_and_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) & insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_and_sci_h.h b/riscv/insns/pv_and_sci_h.h new file mode 100644 index 0000000000..fbd57d1163 --- /dev/null +++ b/riscv/insns/pv_and_sci_h.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_H(i) & insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avg_b.h b/riscv/insns/pv_avg_b.h new file mode 100644 index 0000000000..3d5d6d4723 --- /dev/null +++ b/riscv/insns/pv_avg_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(sext8(RS1_B(i)) + sext8(RS2_B(i))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avg_h.h b/riscv/insns/pv_avg_h.h new file mode 100644 index 0000000000..725f2f2e09 --- /dev/null +++ b/riscv/insns/pv_avg_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(sext16(RS1_H(i)) + sext16(RS2_H(i))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avg_sc_b.h b/riscv/insns/pv_avg_sc_b.h new file mode 100644 index 0000000000..0b7d2f8d2c --- /dev/null +++ b/riscv/insns/pv_avg_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(sext8(RS1_B(i)) + sext8(RS2_B(0))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avg_sc_h.h b/riscv/insns/pv_avg_sc_h.h new file mode 100644 index 0000000000..8a6cb5e504 --- /dev/null +++ b/riscv/insns/pv_avg_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(sext16(RS1_H(i)) + sext16(RS2_H(0))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avg_sci_b.h b/riscv/insns/pv_avg_sci_b.h new file mode 100644 index 0000000000..ff67065e34 --- /dev/null +++ b/riscv/insns/pv_avg_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(sext8(RS1_B(i)) + insn.p_simm6()) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avg_sci_h.h b/riscv/insns/pv_avg_sci_h.h new file mode 100644 index 0000000000..f7deefd25b --- /dev/null +++ b/riscv/insns/pv_avg_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(sext16(RS1_H(i)) + insn.p_simm6()) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avgu_b.h b/riscv/insns/pv_avgu_b.h new file mode 100644 index 0000000000..435c4d22cc --- /dev/null +++ b/riscv/insns/pv_avgu_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(zext8(RS1_B(i)) + zext8(RS2_B(i))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avgu_h.h b/riscv/insns/pv_avgu_h.h new file mode 100644 index 0000000000..3fdbaf4ddb --- /dev/null +++ b/riscv/insns/pv_avgu_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(zext16(RS1_H(i)) + zext16(RS2_H(i))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avgu_sc_b.h b/riscv/insns/pv_avgu_sc_b.h new file mode 100644 index 0000000000..47ca3888bc --- /dev/null +++ b/riscv/insns/pv_avgu_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(zext8(RS1_B(i)) + zext8(RS2_B(0))) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avgu_sc_h.h b/riscv/insns/pv_avgu_sc_h.h new file mode 100644 index 0000000000..0bf92f93b3 --- /dev/null +++ b/riscv/insns/pv_avgu_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(zext16(RS1_H(i)) + zext16(RS2_H(0))) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avgu_sci_b.h b/riscv/insns/pv_avgu_sci_b.h new file mode 100644 index 0000000000..fbc0dff921 --- /dev/null +++ b/riscv/insns/pv_avgu_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(zext8(RS1_B(i)) + insn.p_zimm6()) >> 1; + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_avgu_sci_h.h b/riscv/insns/pv_avgu_sci_h.h new file mode 100644 index 0000000000..dd8cd35442 --- /dev/null +++ b/riscv/insns/pv_avgu_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(zext16(RS1_H(i)) + insn.p_zimm6()) >> 1; + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_cmpeq_b.h b/riscv/insns/pv_cmpeq_b.h new file mode 100644 index 0000000000..b2e912cb1f --- /dev/null +++ b/riscv/insns/pv_cmpeq_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] == src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpeq_h.h b/riscv/insns/pv_cmpeq_h.h new file mode 100644 index 0000000000..6524f55b54 --- /dev/null +++ b/riscv/insns/pv_cmpeq_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] == src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpeq_sc_b.h b/riscv/insns/pv_cmpeq_sc_b.h new file mode 100644 index 0000000000..6103f22750 --- /dev/null +++ b/riscv/insns/pv_cmpeq_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] == src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpeq_sc_h.h b/riscv/insns/pv_cmpeq_sc_h.h new file mode 100644 index 0000000000..82eecef2b9 --- /dev/null +++ b/riscv/insns/pv_cmpeq_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] == src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpeq_sci_b.h b/riscv/insns/pv_cmpeq_sci_b.h new file mode 100644 index 0000000000..8ec09d3702 --- /dev/null +++ b/riscv/insns/pv_cmpeq_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] == src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpeq_sci_h.h b/riscv/insns/pv_cmpeq_sci_h.h new file mode 100644 index 0000000000..1079f724bc --- /dev/null +++ b/riscv/insns/pv_cmpeq_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] == src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpge_b.h b/riscv/insns/pv_cmpge_b.h new file mode 100644 index 0000000000..fe8a2f3871 --- /dev/null +++ b/riscv/insns/pv_cmpge_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] >= src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpge_h.h b/riscv/insns/pv_cmpge_h.h new file mode 100644 index 0000000000..f6d8883637 --- /dev/null +++ b/riscv/insns/pv_cmpge_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] >= src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpge_sc_b.h b/riscv/insns/pv_cmpge_sc_b.h new file mode 100644 index 0000000000..5471af6961 --- /dev/null +++ b/riscv/insns/pv_cmpge_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] >= src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpge_sc_h.h b/riscv/insns/pv_cmpge_sc_h.h new file mode 100644 index 0000000000..91a7f412ab --- /dev/null +++ b/riscv/insns/pv_cmpge_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] >= src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpge_sci_b.h b/riscv/insns/pv_cmpge_sci_b.h new file mode 100644 index 0000000000..1f6fa2d92b --- /dev/null +++ b/riscv/insns/pv_cmpge_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] >= src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpge_sci_h.h b/riscv/insns/pv_cmpge_sci_h.h new file mode 100644 index 0000000000..6a1ce3db51 --- /dev/null +++ b/riscv/insns/pv_cmpge_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] >= src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgeu_b.h b/riscv/insns/pv_cmpgeu_b.h new file mode 100644 index 0000000000..e587e93361 --- /dev/null +++ b/riscv/insns/pv_cmpgeu_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] >= src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgeu_h.h b/riscv/insns/pv_cmpgeu_h.h new file mode 100644 index 0000000000..ab67fff8c0 --- /dev/null +++ b/riscv/insns/pv_cmpgeu_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] >= src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpgeu_sc_b.h b/riscv/insns/pv_cmpgeu_sc_b.h new file mode 100644 index 0000000000..a364f6c903 --- /dev/null +++ b/riscv/insns/pv_cmpgeu_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] >= src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgeu_sc_h.h b/riscv/insns/pv_cmpgeu_sc_h.h new file mode 100644 index 0000000000..f8898f4fab --- /dev/null +++ b/riscv/insns/pv_cmpgeu_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] >= src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgeu_sci_b.h b/riscv/insns/pv_cmpgeu_sci_b.h new file mode 100644 index 0000000000..e4840524f0 --- /dev/null +++ b/riscv/insns/pv_cmpgeu_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] >= src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgeu_sci_h.h b/riscv/insns/pv_cmpgeu_sci_h.h new file mode 100644 index 0000000000..09eb4dd8ab --- /dev/null +++ b/riscv/insns/pv_cmpgeu_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] >= src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgt_b.h b/riscv/insns/pv_cmpgt_b.h new file mode 100644 index 0000000000..c2c4ba30e9 --- /dev/null +++ b/riscv/insns/pv_cmpgt_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] > src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgt_h.h b/riscv/insns/pv_cmpgt_h.h new file mode 100644 index 0000000000..338b923ece --- /dev/null +++ b/riscv/insns/pv_cmpgt_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] > src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpgt_sc_b.h b/riscv/insns/pv_cmpgt_sc_b.h new file mode 100644 index 0000000000..3dd660c2be --- /dev/null +++ b/riscv/insns/pv_cmpgt_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] > src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgt_sc_h.h b/riscv/insns/pv_cmpgt_sc_h.h new file mode 100644 index 0000000000..77258f332e --- /dev/null +++ b/riscv/insns/pv_cmpgt_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] > src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgt_sci_b.h b/riscv/insns/pv_cmpgt_sci_b.h new file mode 100644 index 0000000000..597f1c7af0 --- /dev/null +++ b/riscv/insns/pv_cmpgt_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] > src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgt_sci_h.h b/riscv/insns/pv_cmpgt_sci_h.h new file mode 100644 index 0000000000..5281a3c409 --- /dev/null +++ b/riscv/insns/pv_cmpgt_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] > src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgtu_b.h b/riscv/insns/pv_cmpgtu_b.h new file mode 100644 index 0000000000..49a34a6d71 --- /dev/null +++ b/riscv/insns/pv_cmpgtu_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] > src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgtu_h.h b/riscv/insns/pv_cmpgtu_h.h new file mode 100644 index 0000000000..a83073fa8c --- /dev/null +++ b/riscv/insns/pv_cmpgtu_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] > src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpgtu_sc_b.h b/riscv/insns/pv_cmpgtu_sc_b.h new file mode 100644 index 0000000000..641d11040b --- /dev/null +++ b/riscv/insns/pv_cmpgtu_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] > src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgtu_sc_h.h b/riscv/insns/pv_cmpgtu_sc_h.h new file mode 100644 index 0000000000..290d0a9795 --- /dev/null +++ b/riscv/insns/pv_cmpgtu_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] > src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgtu_sci_b.h b/riscv/insns/pv_cmpgtu_sci_b.h new file mode 100644 index 0000000000..acf42fa6b7 --- /dev/null +++ b/riscv/insns/pv_cmpgtu_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] > src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpgtu_sci_h.h b/riscv/insns/pv_cmpgtu_sci_h.h new file mode 100644 index 0000000000..7947ab2dea --- /dev/null +++ b/riscv/insns/pv_cmpgtu_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] > src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmple_b.h b/riscv/insns/pv_cmple_b.h new file mode 100644 index 0000000000..cef9591bfe --- /dev/null +++ b/riscv/insns/pv_cmple_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] <= src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmple_h.h b/riscv/insns/pv_cmple_h.h new file mode 100644 index 0000000000..f10b555558 --- /dev/null +++ b/riscv/insns/pv_cmple_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] <= src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmple_sc_b.h b/riscv/insns/pv_cmple_sc_b.h new file mode 100644 index 0000000000..3975bc9047 --- /dev/null +++ b/riscv/insns/pv_cmple_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] <= src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmple_sc_h.h b/riscv/insns/pv_cmple_sc_h.h new file mode 100644 index 0000000000..f4612313dc --- /dev/null +++ b/riscv/insns/pv_cmple_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] <= src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmple_sci_b.h b/riscv/insns/pv_cmple_sci_b.h new file mode 100644 index 0000000000..54a5c1f830 --- /dev/null +++ b/riscv/insns/pv_cmple_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] <= src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmple_sci_h.h b/riscv/insns/pv_cmple_sci_h.h new file mode 100644 index 0000000000..158616e96f --- /dev/null +++ b/riscv/insns/pv_cmple_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] <= src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpleu_b.h b/riscv/insns/pv_cmpleu_b.h new file mode 100644 index 0000000000..038c4ff7ab --- /dev/null +++ b/riscv/insns/pv_cmpleu_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] <= src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpleu_h.h b/riscv/insns/pv_cmpleu_h.h new file mode 100644 index 0000000000..7f0577b409 --- /dev/null +++ b/riscv/insns/pv_cmpleu_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] <= src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpleu_sc_b.h b/riscv/insns/pv_cmpleu_sc_b.h new file mode 100644 index 0000000000..92232bf865 --- /dev/null +++ b/riscv/insns/pv_cmpleu_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] <= src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpleu_sc_h.h b/riscv/insns/pv_cmpleu_sc_h.h new file mode 100644 index 0000000000..19e8a957a0 --- /dev/null +++ b/riscv/insns/pv_cmpleu_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] <= src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpleu_sci_b.h b/riscv/insns/pv_cmpleu_sci_b.h new file mode 100644 index 0000000000..d400010f6f --- /dev/null +++ b/riscv/insns/pv_cmpleu_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] <= src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpleu_sci_h.h b/riscv/insns/pv_cmpleu_sci_h.h new file mode 100644 index 0000000000..dbd2ca7940 --- /dev/null +++ b/riscv/insns/pv_cmpleu_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] <= src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmplt_b.h b/riscv/insns/pv_cmplt_b.h new file mode 100644 index 0000000000..5e1dda25ee --- /dev/null +++ b/riscv/insns/pv_cmplt_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] < src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmplt_h.h b/riscv/insns/pv_cmplt_h.h new file mode 100644 index 0000000000..d6cad0c119 --- /dev/null +++ b/riscv/insns/pv_cmplt_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] < src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmplt_sc_b.h b/riscv/insns/pv_cmplt_sc_b.h new file mode 100644 index 0000000000..d6d7d6823f --- /dev/null +++ b/riscv/insns/pv_cmplt_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] < src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmplt_sc_h.h b/riscv/insns/pv_cmplt_sc_h.h new file mode 100644 index 0000000000..6d0b77f8eb --- /dev/null +++ b/riscv/insns/pv_cmplt_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] < src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmplt_sci_b.h b/riscv/insns/pv_cmplt_sci_b.h new file mode 100644 index 0000000000..1382c7b43b --- /dev/null +++ b/riscv/insns/pv_cmplt_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] < src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmplt_sci_h.h b/riscv/insns/pv_cmplt_sci_h.h new file mode 100644 index 0000000000..43a060c523 --- /dev/null +++ b/riscv/insns/pv_cmplt_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] < src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpltu_b.h b/riscv/insns/pv_cmpltu_b.h new file mode 100644 index 0000000000..fae2d4265f --- /dev/null +++ b/riscv/insns/pv_cmpltu_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] < src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpltu_h.h b/riscv/insns/pv_cmpltu_h.h new file mode 100644 index 0000000000..932f2747c0 --- /dev/null +++ b/riscv/insns/pv_cmpltu_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] < src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpltu_sc_b.h b/riscv/insns/pv_cmpltu_sc_b.h new file mode 100644 index 0000000000..b29e612da8 --- /dev/null +++ b/riscv/insns/pv_cmpltu_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] < src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpltu_sc_h.h b/riscv/insns/pv_cmpltu_sc_h.h new file mode 100644 index 0000000000..0c3ee69195 --- /dev/null +++ b/riscv/insns/pv_cmpltu_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] < src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpltu_sci_b.h b/riscv/insns/pv_cmpltu_sci_b.h new file mode 100644 index 0000000000..67ff64c89e --- /dev/null +++ b/riscv/insns/pv_cmpltu_sci_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.bu[i] < src2.bu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpltu_sci_h.h b/riscv/insns/pv_cmpltu_sci_h.h new file mode 100644 index 0000000000..edff0c5f52 --- /dev/null +++ b/riscv/insns/pv_cmpltu_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((insn.p_zimm6())*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.hu[i] < src2.hu[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpne_b.h b/riscv/insns/pv_cmpne_b.h new file mode 100644 index 0000000000..4f8763897a --- /dev/null +++ b/riscv/insns/pv_cmpne_b.h @@ -0,0 +1,12 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] != src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpne_h.h b/riscv/insns/pv_cmpne_h.h new file mode 100644 index 0000000000..8f74a35562 --- /dev/null +++ b/riscv/insns/pv_cmpne_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] != src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_cmpne_sc_b.h b/riscv/insns/pv_cmpne_sc_b.h new file mode 100644 index 0000000000..c94538bbe5 --- /dev/null +++ b/riscv/insns/pv_cmpne_sc_b.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] != src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpne_sc_h.h b/riscv/insns/pv_cmpne_sc_h.h new file mode 100644 index 0000000000..f79334cca5 --- /dev/null +++ b/riscv/insns/pv_cmpne_sc_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.reg = ((RS2 & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] != src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpne_sci_b.h b/riscv/insns/pv_cmpne_sci_b.h new file mode 100644 index 0000000000..1930903b60 --- /dev/null +++ b/riscv/insns/pv_cmpne_sci_b.h @@ -0,0 +1,11 @@ +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FF)*0x0101010101010101)}; +union simd_reg res; + +for(int i=0; i<(64/e8); i++) +{ + res.b[i] = (src1.b[i] != src2.b[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cmpne_sci_h.h b/riscv/insns/pv_cmpne_sci_h.h new file mode 100644 index 0000000000..1a2b03671b --- /dev/null +++ b/riscv/insns/pv_cmpne_sci_h.h @@ -0,0 +1,13 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +// replicate lowest element in second operand +union simd_reg src2 = {.sreg = ((insn.p_simm6() & 0x0FFFF)*0x0001000100010001)}; +union simd_reg res; + +for(int i=0; i<(64/e16); i++) +{ + res.h[i] = (src1.h[i] != src2.h[i]); +} + +WRITE_RD(sext_xlen(res.sreg)); \ No newline at end of file diff --git a/riscv/insns/pv_cplxconj_h.h b/riscv/insns/pv_cplxconj_h.h new file mode 100644 index 0000000000..95c91e5bd5 --- /dev/null +++ b/riscv/insns/pv_cplxconj_h.h @@ -0,0 +1,11 @@ +reg_t src1 = RS1; +reg_t res; + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = src1; +int16_t imag = (src1 >> 16); + +imag = -imag; +res = (imag << 16) | real; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_i.h b/riscv/insns/pv_cplxmul_h_i.h new file mode 100644 index 0000000000..e899c49e78 --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_i.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real3 = src3; + +int16_t imag = ((sreg_t)real1*imag2 + imag1*real2) >> 15; +res = (imag << 16) | real3; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_i_div2.h b/riscv/insns/pv_cplxmul_h_i_div2.h new file mode 100644 index 0000000000..9d477176cd --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_i_div2.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real3 = src3; + +int16_t imag = ((sreg_t)real1*imag2 + imag1*real2) >> 16; +res = (imag << 16) | real3; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_i_div4.h b/riscv/insns/pv_cplxmul_h_i_div4.h new file mode 100644 index 0000000000..f23395b070 --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_i_div4.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real3 = src3; + +int16_t imag = ((sreg_t)real1*imag2 + imag1*real2) >> 17; +res = (imag << 16) | real3; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_i_div8.h b/riscv/insns/pv_cplxmul_h_i_div8.h new file mode 100644 index 0000000000..735bf9be2d --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_i_div8.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real3 = src3; + +int16_t imag = ((sreg_t)real1*imag2 + imag1*real2) >> 18; +res = (imag << 16) | real3; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_r.h b/riscv/insns/pv_cplxmul_h_r.h new file mode 100644 index 0000000000..b101bfe153 --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_r.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +int16_t imag3 = (src3 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = ((sreg_t)real1*real2 - imag1*imag2) >> 15; +res = (imag3 << 16) | real; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_r_div2.h b/riscv/insns/pv_cplxmul_h_r_div2.h new file mode 100644 index 0000000000..3b72aed003 --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_r_div2.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +int16_t imag3 = (src3 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = ((sreg_t)real1*real2 - imag1*imag2) >> 16; +res = (imag3 << 16) | real; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_r_div4.h b/riscv/insns/pv_cplxmul_h_r_div4.h new file mode 100644 index 0000000000..d5091837ed --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_r_div4.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +int16_t imag3 = (src3 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = ((sreg_t)real1*real2 - imag1*imag2) >> 17; +res = (imag3 << 16) | real; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_cplxmul_h_r_div8.h b/riscv/insns/pv_cplxmul_h_r_div8.h new file mode 100644 index 0000000000..0b503ad84a --- /dev/null +++ b/riscv/insns/pv_cplxmul_h_r_div8.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t src3 = P_RS3; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); +int16_t imag3 = (src3 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = ((sreg_t)real1*real2 - imag1*imag2) >> 18; +res = (imag3 << 16) | real; + +WRITE_RD(sext_xlen(res)); diff --git a/riscv/insns/pv_dotsp_b.h b/riscv/insns/pv_dotsp_b.h new file mode 100644 index 0000000000..93b7233cca --- /dev/null +++ b/riscv/insns/pv_dotsp_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotsp_h.h b/riscv/insns/pv_dotsp_h.h new file mode 100644 index 0000000000..9feed35efd --- /dev/null +++ b/riscv/insns/pv_dotsp_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotsp_sc_b.h b/riscv/insns/pv_dotsp_sc_b.h new file mode 100644 index 0000000000..cef11d5e77 --- /dev/null +++ b/riscv/insns/pv_dotsp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotsp_sc_h.h b/riscv/insns/pv_dotsp_sc_h.h new file mode 100644 index 0000000000..ef558d39f0 --- /dev/null +++ b/riscv/insns/pv_dotsp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotsp_sci_b.h b/riscv/insns/pv_dotsp_sci_b.h new file mode 100644 index 0000000000..3470fd55a3 --- /dev/null +++ b/riscv/insns/pv_dotsp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotsp_sci_h.h b/riscv/insns/pv_dotsp_sci_h.h new file mode 100644 index 0000000000..97e30eb290 --- /dev/null +++ b/riscv/insns/pv_dotsp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotup_b.h b/riscv/insns/pv_dotup_b.h new file mode 100644 index 0000000000..fa77f36678 --- /dev/null +++ b/riscv/insns/pv_dotup_b.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotup_h.h b/riscv/insns/pv_dotup_h.h new file mode 100644 index 0000000000..4e170b238b --- /dev/null +++ b/riscv/insns/pv_dotup_h.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotup_sc_b.h b/riscv/insns/pv_dotup_sc_b.h new file mode 100644 index 0000000000..a581d0162a --- /dev/null +++ b/riscv/insns/pv_dotup_sc_b.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotup_sc_h.h b/riscv/insns/pv_dotup_sc_h.h new file mode 100644 index 0000000000..b78762a871 --- /dev/null +++ b/riscv/insns/pv_dotup_sc_h.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotup_sci_b.h b/riscv/insns/pv_dotup_sci_b.h new file mode 100644 index 0000000000..0dedb1caf4 --- /dev/null +++ b/riscv/insns/pv_dotup_sci_b.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * insn.p_zimm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotup_sci_h.h b/riscv/insns/pv_dotup_sci_h.h new file mode 100644 index 0000000000..64a36d5690 --- /dev/null +++ b/riscv/insns/pv_dotup_sci_h.h @@ -0,0 +1,6 @@ +uint32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * insn.p_zimm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotusp_b.h b/riscv/insns/pv_dotusp_b.h new file mode 100644 index 0000000000..1cdfc2f2c2 --- /dev/null +++ b/riscv/insns/pv_dotusp_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotusp_h.h b/riscv/insns/pv_dotusp_h.h new file mode 100644 index 0000000000..81968a14d6 --- /dev/null +++ b/riscv/insns/pv_dotusp_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotusp_sc_b.h b/riscv/insns/pv_dotusp_sc_b.h new file mode 100644 index 0000000000..d562a7d4d3 --- /dev/null +++ b/riscv/insns/pv_dotusp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotusp_sc_h.h b/riscv/insns/pv_dotusp_sc_h.h new file mode 100644 index 0000000000..3815c3721f --- /dev/null +++ b/riscv/insns/pv_dotusp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotusp_sci_b.h b/riscv/insns/pv_dotusp_sci_b.h new file mode 100644 index 0000000000..92c229540e --- /dev/null +++ b/riscv/insns/pv_dotusp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_dotusp_sci_h.h b/riscv/insns/pv_dotusp_sci_h.h new file mode 100644 index 0000000000..8f91a89a33 --- /dev/null +++ b/riscv/insns/pv_dotusp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = 0; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_extract_b.h b/riscv/insns/pv_extract_b.h new file mode 100644 index 0000000000..fce80bbb6e --- /dev/null +++ b/riscv/insns/pv_extract_b.h @@ -0,0 +1 @@ +WRITE_RD(sext8(RS1_B(insn.p_zimm6() & 0x03))); diff --git a/riscv/insns/pv_extract_h.h b/riscv/insns/pv_extract_h.h new file mode 100644 index 0000000000..ee35393d49 --- /dev/null +++ b/riscv/insns/pv_extract_h.h @@ -0,0 +1 @@ +WRITE_RD(sext16(RS1_H(insn.p_zimm6() & 0x01))); diff --git a/riscv/insns/pv_extractu_b.h b/riscv/insns/pv_extractu_b.h new file mode 100644 index 0000000000..c240233874 --- /dev/null +++ b/riscv/insns/pv_extractu_b.h @@ -0,0 +1 @@ +WRITE_RD(zext8(RS1_B(insn.p_zimm6() & 0x03))); diff --git a/riscv/insns/pv_extractu_h.h b/riscv/insns/pv_extractu_h.h new file mode 100644 index 0000000000..90b679afd0 --- /dev/null +++ b/riscv/insns/pv_extractu_h.h @@ -0,0 +1 @@ +WRITE_RD(zext16(RS1_H(insn.p_zimm6() & 0x01))); diff --git a/riscv/insns/pv_insert_b.h b/riscv/insns/pv_insert_b.h new file mode 100644 index 0000000000..5575e79671 --- /dev/null +++ b/riscv/insns/pv_insert_b.h @@ -0,0 +1,6 @@ +uint32_t ins_rd = RD; +uint8_t i = insn.p_zimm6() & 0x03; /* select to which rd half to write the 16-bit value */ + +ins_rd = (ins_rd & ~(0xFF << ((xlen >> 2) * i))) | ((RS1_H(0) & 0xFF) << ((xlen >> 2) * i)); + +WRITE_RD(sext_xlen(ins_rd)); diff --git a/riscv/insns/pv_insert_h.h b/riscv/insns/pv_insert_h.h new file mode 100644 index 0000000000..eccb0eda6d --- /dev/null +++ b/riscv/insns/pv_insert_h.h @@ -0,0 +1,6 @@ +uint32_t ins_rd = RD; +uint8_t i = insn.p_zimm6() & 0x01; /* select to which rd half to write the 16-bit value */ + +ins_rd = (ins_rd & ~(0xFFFF << ((xlen >> 1) * i))) | ((RS1_H(0) & 0xFFFF) << ((xlen >> 1) * i)); + +WRITE_RD(sext_xlen(ins_rd)); diff --git a/riscv/insns/pv_max_b.h b/riscv/insns/pv_max_b.h new file mode 100644 index 0000000000..4dc3e6be89 --- /dev/null +++ b/riscv/insns/pv_max_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > sext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_max_h.h b/riscv/insns/pv_max_h.h new file mode 100644 index 0000000000..c65a32da64 --- /dev/null +++ b/riscv/insns/pv_max_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > sext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_max_sc_b.h b/riscv/insns/pv_max_sc_b.h new file mode 100644 index 0000000000..896087f623 --- /dev/null +++ b/riscv/insns/pv_max_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > sext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_max_sc_h.h b/riscv/insns/pv_max_sc_h.h new file mode 100644 index 0000000000..fd55fb49b3 --- /dev/null +++ b/riscv/insns/pv_max_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > sext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_max_sci_b.h b/riscv/insns/pv_max_sci_b.h new file mode 100644 index 0000000000..5e06669faa --- /dev/null +++ b/riscv/insns/pv_max_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) > insn.p_simm6() ? RS1_B(i) : insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_max_sci_h.h b/riscv/insns/pv_max_sci_h.h new file mode 100644 index 0000000000..ce1df2ee1d --- /dev/null +++ b/riscv/insns/pv_max_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) > insn.p_simm6() ? RS1_H(i) : insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_maxu_b.h b/riscv/insns/pv_maxu_b.h new file mode 100644 index 0000000000..5821c17266 --- /dev/null +++ b/riscv/insns/pv_maxu_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) > zext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_maxu_h.h b/riscv/insns/pv_maxu_h.h new file mode 100644 index 0000000000..3e587c3c99 --- /dev/null +++ b/riscv/insns/pv_maxu_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) > zext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_maxu_sc_b.h b/riscv/insns/pv_maxu_sc_b.h new file mode 100644 index 0000000000..c297b87ab2 --- /dev/null +++ b/riscv/insns/pv_maxu_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) > zext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_maxu_sc_h.h b/riscv/insns/pv_maxu_sc_h.h new file mode 100644 index 0000000000..fbb5c7feb9 --- /dev/null +++ b/riscv/insns/pv_maxu_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) > zext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_maxu_sci_b.h b/riscv/insns/pv_maxu_sci_b.h new file mode 100644 index 0000000000..ab5f6e5f98 --- /dev/null +++ b/riscv/insns/pv_maxu_sci_b.h @@ -0,0 +1,10 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) > insn.p_zimm6() ? RS1_B(i) : insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); + diff --git a/riscv/insns/pv_maxu_sci_h.h b/riscv/insns/pv_maxu_sci_h.h new file mode 100644 index 0000000000..9aaf9effc6 --- /dev/null +++ b/riscv/insns/pv_maxu_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) > insn.p_zimm6() ? RS1_H(i) : insn.p_zimm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_min_b.h b/riscv/insns/pv_min_b.h new file mode 100644 index 0000000000..1b9104b55f --- /dev/null +++ b/riscv/insns/pv_min_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) <= sext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_min_h.h b/riscv/insns/pv_min_h.h new file mode 100644 index 0000000000..bbc83caea8 --- /dev/null +++ b/riscv/insns/pv_min_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) <= sext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_min_sc_b.h b/riscv/insns/pv_min_sc_b.h new file mode 100644 index 0000000000..1d2aac507b --- /dev/null +++ b/riscv/insns/pv_min_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) <= sext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_min_sc_h.h b/riscv/insns/pv_min_sc_h.h new file mode 100644 index 0000000000..b2b8ab1109 --- /dev/null +++ b/riscv/insns/pv_min_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) <= sext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_min_sci_b.h b/riscv/insns/pv_min_sci_b.h new file mode 100644 index 0000000000..031b51f362 --- /dev/null +++ b/riscv/insns/pv_min_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) <= insn.p_simm6() ? RS1_B(i) : insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_min_sci_h.h b/riscv/insns/pv_min_sci_h.h new file mode 100644 index 0000000000..d007e06620 --- /dev/null +++ b/riscv/insns/pv_min_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) <= insn.p_simm6() ? RS1_H(i) : insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_minu_b.h b/riscv/insns/pv_minu_b.h new file mode 100644 index 0000000000..bbb92ca55a --- /dev/null +++ b/riscv/insns/pv_minu_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) <= zext8(RS2_B(i)) ? RS1_B(i) : RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_minu_h.h b/riscv/insns/pv_minu_h.h new file mode 100644 index 0000000000..fa7b0a4e21 --- /dev/null +++ b/riscv/insns/pv_minu_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) <= zext16(RS2_H(i)) ? RS1_H(i) : RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_minu_sc_b.h b/riscv/insns/pv_minu_sc_b.h new file mode 100644 index 0000000000..566bcce6da --- /dev/null +++ b/riscv/insns/pv_minu_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) <= zext8(RS2_B(0)) ? RS1_B(i) : RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_minu_sc_h.h b/riscv/insns/pv_minu_sc_h.h new file mode 100644 index 0000000000..7471d96780 --- /dev/null +++ b/riscv/insns/pv_minu_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) <= zext16(RS2_H(0)) ? RS1_H(i) : RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_minu_sci_b.h b/riscv/insns/pv_minu_sci_b.h new file mode 100644 index 0000000000..75c43787c3 --- /dev/null +++ b/riscv/insns/pv_minu_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) <= insn.p_zimm6() ? RS1_B(i) : insn.p_zimm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_minu_sci_h.h b/riscv/insns/pv_minu_sci_h.h new file mode 100644 index 0000000000..c665e92f40 --- /dev/null +++ b/riscv/insns/pv_minu_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) <= insn.p_zimm6() ? RS1_H(i) : insn.p_zimm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_or_b.h b/riscv/insns/pv_or_b.h new file mode 100644 index 0000000000..d27a6e5d17 --- /dev/null +++ b/riscv/insns/pv_or_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) | RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_or_h.h b/riscv/insns/pv_or_h.h new file mode 100644 index 0000000000..65b112893e --- /dev/null +++ b/riscv/insns/pv_or_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) | RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_or_sc_b.h b/riscv/insns/pv_or_sc_b.h new file mode 100644 index 0000000000..cac508744b --- /dev/null +++ b/riscv/insns/pv_or_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) | RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_or_sc_h.h b/riscv/insns/pv_or_sc_h.h new file mode 100644 index 0000000000..e6f567cf3b --- /dev/null +++ b/riscv/insns/pv_or_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) | RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_or_sci_b.h b/riscv/insns/pv_or_sci_b.h new file mode 100644 index 0000000000..0cb7b5cb6c --- /dev/null +++ b/riscv/insns/pv_or_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) | insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_or_sci_h.h b/riscv/insns/pv_or_sci_h.h new file mode 100644 index 0000000000..e95922e1eb --- /dev/null +++ b/riscv/insns/pv_or_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) | insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_pack.h b/riscv/insns/pv_pack.h new file mode 100644 index 0000000000..a7c9b69e5b --- /dev/null +++ b/riscv/insns/pv_pack.h @@ -0,0 +1,7 @@ +reg_t src1 = RS1_H(0); +reg_t src2 = RS2_H(0); + +reg_t res = (src1 << 16) | src2; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_pack_h.h b/riscv/insns/pv_pack_h.h new file mode 100644 index 0000000000..7ad94faa9d --- /dev/null +++ b/riscv/insns/pv_pack_h.h @@ -0,0 +1,7 @@ +reg_t src1 = RS1_H(1); +reg_t src2 = RS2_H(1); + +reg_t res = (src1 << 16) | src2; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_packhi_b.h b/riscv/insns/pv_packhi_b.h new file mode 100644 index 0000000000..b8a7a46c8d --- /dev/null +++ b/riscv/insns/pv_packhi_b.h @@ -0,0 +1,8 @@ +reg_t src1 = RS1_B(0); +reg_t src2 = RS2_B(0); +reg_t res = RD & 0x0FFFF; + +res |= (src1 << 24) | (src2 << 16); + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_packlo_b.h b/riscv/insns/pv_packlo_b.h new file mode 100644 index 0000000000..4606fe1f01 --- /dev/null +++ b/riscv/insns/pv_packlo_b.h @@ -0,0 +1,8 @@ +reg_t src1 = RS1_B(0); +reg_t src2 = RS2_B(0); +reg_t res = RD & 0xFFFF0000; + +res |= (src1 << 8) | src2; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_sdotsp_b.h b/riscv/insns/pv_sdotsp_b.h new file mode 100644 index 0000000000..812e3d4369 --- /dev/null +++ b/riscv/insns/pv_sdotsp_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotsp_h.h b/riscv/insns/pv_sdotsp_h.h new file mode 100644 index 0000000000..9ccfae939d --- /dev/null +++ b/riscv/insns/pv_sdotsp_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotsp_sc_b.h b/riscv/insns/pv_sdotsp_sc_b.h new file mode 100644 index 0000000000..e665a669f2 --- /dev/null +++ b/riscv/insns/pv_sdotsp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotsp_sc_h.h b/riscv/insns/pv_sdotsp_sc_h.h new file mode 100644 index 0000000000..fa1ca93fee --- /dev/null +++ b/riscv/insns/pv_sdotsp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotsp_sci_b.h b/riscv/insns/pv_sdotsp_sci_b.h new file mode 100644 index 0000000000..31aab1fe54 --- /dev/null +++ b/riscv/insns/pv_sdotsp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sext8(RS1_B(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotsp_sci_h.h b/riscv/insns/pv_sdotsp_sci_h.h new file mode 100644 index 0000000000..151d16a2e9 --- /dev/null +++ b/riscv/insns/pv_sdotsp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sext16(RS1_H(i)) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotup_b.h b/riscv/insns/pv_sdotup_b.h new file mode 100644 index 0000000000..82e47b4f82 --- /dev/null +++ b/riscv/insns/pv_sdotup_b.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotup_h.h b/riscv/insns/pv_sdotup_h.h new file mode 100644 index 0000000000..de77009a07 --- /dev/null +++ b/riscv/insns/pv_sdotup_h.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotup_sc_b.h b/riscv/insns/pv_sdotup_sc_b.h new file mode 100644 index 0000000000..717fffc119 --- /dev/null +++ b/riscv/insns/pv_sdotup_sc_b.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * zext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotup_sc_h.h b/riscv/insns/pv_sdotup_sc_h.h new file mode 100644 index 0000000000..ecf0485664 --- /dev/null +++ b/riscv/insns/pv_sdotup_sc_h.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * zext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotup_sci_b.h b/riscv/insns/pv_sdotup_sci_b.h new file mode 100644 index 0000000000..bd4d850e6d --- /dev/null +++ b/riscv/insns/pv_sdotup_sci_b.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += zext8(RS1_B(i)) * insn.p_zimm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotup_sci_h.h b/riscv/insns/pv_sdotup_sci_h.h new file mode 100644 index 0000000000..145e73717f --- /dev/null +++ b/riscv/insns/pv_sdotup_sci_h.h @@ -0,0 +1,6 @@ +uint32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += zext16(RS1_H(i)) * insn.p_zimm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotusp_b.h b/riscv/insns/pv_sdotusp_b.h new file mode 100644 index 0000000000..05d268ed21 --- /dev/null +++ b/riscv/insns/pv_sdotusp_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotusp_h.h b/riscv/insns/pv_sdotusp_h.h new file mode 100644 index 0000000000..fdc550db1c --- /dev/null +++ b/riscv/insns/pv_sdotusp_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(i)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotusp_sc_b.h b/riscv/insns/pv_sdotusp_sc_b.h new file mode 100644 index 0000000000..2840cd1483 --- /dev/null +++ b/riscv/insns/pv_sdotusp_sc_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * sext8(RS2_B(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotusp_sc_h.h b/riscv/insns/pv_sdotusp_sc_h.h new file mode 100644 index 0000000000..ca4c25ac10 --- /dev/null +++ b/riscv/insns/pv_sdotusp_sc_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * sext16(RS2_H(0)); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotusp_sci_b.h b/riscv/insns/pv_sdotusp_sci_b.h new file mode 100644 index 0000000000..d6823f83a9 --- /dev/null +++ b/riscv/insns/pv_sdotusp_sci_b.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/8 - 1; i >= 0; i--) + acc += sreg_t(zext8(RS1_B(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_sdotusp_sci_h.h b/riscv/insns/pv_sdotusp_sci_h.h new file mode 100644 index 0000000000..42c4fbe88b --- /dev/null +++ b/riscv/insns/pv_sdotusp_sci_h.h @@ -0,0 +1,6 @@ +int32_t acc = RD; + +for(int i = xlen/16 - 1; i >= 0; i--) + acc += sreg_t(zext16(RS1_H(i))) * insn.p_simm6(); + +WRITE_RD(sext_xlen(acc)); diff --git a/riscv/insns/pv_shuffle2_b.h b/riscv/insns/pv_shuffle2_b.h new file mode 100644 index 0000000000..8dd4e99945 --- /dev/null +++ b/riscv/insns/pv_shuffle2_b.h @@ -0,0 +1,14 @@ +uint8_t src_sel; // select rd or rs1 as source (bit [2] of second operand) +uint8_t byte_sel; // select which byte from source (bits [1:0] of second operand) +uint8_t source; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + byte_sel = RS2_B(i) & 0x03; // bits [1:0] of RS2_B(i) + src_sel = (RS2_B(i) >> 2) & 0x01; // bit [2] of RS2_B(i) + source = src_sel ? RS1_B(byte_sel) : RD_B(byte_sel); + simd_rd <<= 8; + simd_rd += (uint32_t)source & 0x000000FF; +} + +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_shuffle2_h.h b/riscv/insns/pv_shuffle2_h.h new file mode 100644 index 0000000000..362a4bdc77 --- /dev/null +++ b/riscv/insns/pv_shuffle2_h.h @@ -0,0 +1,14 @@ +uint8_t src_sel; // select rd or rs1 as source (bit [1] of second operand) +uint8_t half_sel; // select which half from source (bit [0] of second operand) +uint16_t source; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + half_sel = RS2_H(i) & 0x01; // bit [0] of RS2_H(i) + src_sel = (RS2_H(i) >> 1) & 0x01; // bit [1] of RS2_H(i) + source = src_sel ? RS1_H(half_sel) : RD_H(half_sel); + simd_rd <<= 16; + simd_rd += (uint32_t)source & 0x0000FFFF; +} + +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_shuffle_b.h b/riscv/insns/pv_shuffle_b.h new file mode 100644 index 0000000000..efd7790c81 --- /dev/null +++ b/riscv/insns/pv_shuffle_b.h @@ -0,0 +1,16 @@ +reg_t selector = RS2; + +reg_t sel3 = (selector >> 24)%4; +reg_t sel2 = (selector >> 16)%4; +reg_t sel1 = (selector >> 8)%4; +reg_t sel0 = selector%4; + +reg_t byte3 = RS1_B(sel3); +reg_t byte2 = RS1_B(sel2); +reg_t byte1 = RS1_B(sel1); +reg_t byte0 = RS1_B(sel0); + +reg_t res = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_shuffle_h.h b/riscv/insns/pv_shuffle_h.h new file mode 100644 index 0000000000..3920c9f619 --- /dev/null +++ b/riscv/insns/pv_shuffle_h.h @@ -0,0 +1,11 @@ +reg_t selector = RS2; + +reg_t h_sel = (selector >> 16)%2; +reg_t l_sel = selector%2; +reg_t hhalf = RS1_H(h_sel); +reg_t lhalf = RS1_H(l_sel); + +reg_t res = (hhalf << 16) | lhalf; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_shuffle_sci_h.h b/riscv/insns/pv_shuffle_sci_h.h new file mode 100644 index 0000000000..2358492738 --- /dev/null +++ b/riscv/insns/pv_shuffle_sci_h.h @@ -0,0 +1,11 @@ +reg_t selector = insn.p_zimm6(); + +reg_t h_sel = (selector >> 1)%2; +reg_t l_sel = selector%2; +reg_t hhalf = RS1_H(h_sel); +reg_t lhalf = RS1_H(l_sel); + +reg_t res = (hhalf << 16) | lhalf; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_shufflei0_sci_b.h b/riscv/insns/pv_shufflei0_sci_b.h new file mode 100644 index 0000000000..a1524c8d46 --- /dev/null +++ b/riscv/insns/pv_shufflei0_sci_b.h @@ -0,0 +1,15 @@ +reg_t selector = insn.p_zimm6();; + +reg_t sel2 = (selector >> 4)%4; +reg_t sel1 = (selector >> 2)%4; +reg_t sel0 = selector%4; + +reg_t byte3 = RS1_B(0); +reg_t byte2 = RS1_B(sel2); +reg_t byte1 = RS1_B(sel1); +reg_t byte0 = RS1_B(sel0); + +reg_t res = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_shufflei1_sci_b.h b/riscv/insns/pv_shufflei1_sci_b.h new file mode 100644 index 0000000000..5ccb2c7549 --- /dev/null +++ b/riscv/insns/pv_shufflei1_sci_b.h @@ -0,0 +1,15 @@ +reg_t selector = insn.p_zimm6();; + +reg_t sel2 = (selector >> 4)%4; +reg_t sel1 = (selector >> 2)%4; +reg_t sel0 = selector%4; + +reg_t byte3 = RS1_B(1); +reg_t byte2 = RS1_B(sel2); +reg_t byte1 = RS1_B(sel1); +reg_t byte0 = RS1_B(sel0); + +reg_t res = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_shufflei2_sci_b.h b/riscv/insns/pv_shufflei2_sci_b.h new file mode 100644 index 0000000000..a84166929d --- /dev/null +++ b/riscv/insns/pv_shufflei2_sci_b.h @@ -0,0 +1,15 @@ +reg_t selector = insn.p_zimm6();; + +reg_t sel2 = (selector >> 4)%4; +reg_t sel1 = (selector >> 2)%4; +reg_t sel0 = selector%4; + +reg_t byte3 = RS1_B(2); +reg_t byte2 = RS1_B(sel2); +reg_t byte1 = RS1_B(sel1); +reg_t byte0 = RS1_B(sel0); + +reg_t res = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_shufflei3_sci_b.h b/riscv/insns/pv_shufflei3_sci_b.h new file mode 100644 index 0000000000..89c1d91132 --- /dev/null +++ b/riscv/insns/pv_shufflei3_sci_b.h @@ -0,0 +1,15 @@ +reg_t selector = insn.p_zimm6();; + +reg_t sel2 = (selector >> 4)%4; +reg_t sel1 = (selector >> 2)%4; +reg_t sel0 = selector%4; + +reg_t byte3 = RS1_B(3); +reg_t byte2 = RS1_B(sel2); +reg_t byte1 = RS1_B(sel1); +reg_t byte0 = RS1_B(sel0); + +reg_t res = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_sll_b.h b/riscv/insns/pv_sll_b.h new file mode 100644 index 0000000000..ca8bcd6883 --- /dev/null +++ b/riscv/insns/pv_sll_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) << (zext8(RS2_B(i)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sll_h.h b/riscv/insns/pv_sll_h.h new file mode 100644 index 0000000000..cb9200caca --- /dev/null +++ b/riscv/insns/pv_sll_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) << (zext16(RS2_H(i)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sll_sc_b.h b/riscv/insns/pv_sll_sc_b.h new file mode 100644 index 0000000000..d320519982 --- /dev/null +++ b/riscv/insns/pv_sll_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) << (zext8(RS2_B(0)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sll_sc_h.h b/riscv/insns/pv_sll_sc_h.h new file mode 100644 index 0000000000..e84cf0214a --- /dev/null +++ b/riscv/insns/pv_sll_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) << (zext16(RS2_H(0)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sll_sci_b.h b/riscv/insns/pv_sll_sci_b.h new file mode 100644 index 0000000000..8e637bea82 --- /dev/null +++ b/riscv/insns/pv_sll_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) << (insn.p_simm6() & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sll_sci_h.h b/riscv/insns/pv_sll_sci_h.h new file mode 100644 index 0000000000..ec94a2e288 --- /dev/null +++ b/riscv/insns/pv_sll_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) << (insn.p_simm6() & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sra_b.h b/riscv/insns/pv_sra_b.h new file mode 100644 index 0000000000..9525a0afc1 --- /dev/null +++ b/riscv/insns/pv_sra_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) >> (zext8(RS2_B(i)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sra_h.h b/riscv/insns/pv_sra_h.h new file mode 100644 index 0000000000..b3e8a0b944 --- /dev/null +++ b/riscv/insns/pv_sra_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) >> (zext16(RS2_H(i)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sra_sc_b.h b/riscv/insns/pv_sra_sc_b.h new file mode 100644 index 0000000000..9442d92804 --- /dev/null +++ b/riscv/insns/pv_sra_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) >> (zext8(RS2_B(0)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sra_sc_h.h b/riscv/insns/pv_sra_sc_h.h new file mode 100644 index 0000000000..1e012f750e --- /dev/null +++ b/riscv/insns/pv_sra_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) >> (zext16(RS2_H(0)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sra_sci_b.h b/riscv/insns/pv_sra_sci_b.h new file mode 100644 index 0000000000..3dafb3cb51 --- /dev/null +++ b/riscv/insns/pv_sra_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) >> (insn.p_simm6() & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sra_sci_h.h b/riscv/insns/pv_sra_sci_h.h new file mode 100644 index 0000000000..4f56d0e5ee --- /dev/null +++ b/riscv/insns/pv_sra_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) >> (insn.p_simm6() & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_srl_b.h b/riscv/insns/pv_srl_b.h new file mode 100644 index 0000000000..37be2e23aa --- /dev/null +++ b/riscv/insns/pv_srl_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) >> (zext8(RS2_B(i)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_srl_h.h b/riscv/insns/pv_srl_h.h new file mode 100644 index 0000000000..1b35116d3b --- /dev/null +++ b/riscv/insns/pv_srl_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) >> (zext16(RS2_H(i)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_srl_sc_b.h b/riscv/insns/pv_srl_sc_b.h new file mode 100644 index 0000000000..4b04ab6f74 --- /dev/null +++ b/riscv/insns/pv_srl_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) >> (zext8(RS2_B(0)) & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_srl_sc_h.h b/riscv/insns/pv_srl_sc_h.h new file mode 100644 index 0000000000..f49f784db8 --- /dev/null +++ b/riscv/insns/pv_srl_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) >> (zext16(RS2_H(0)) & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_srl_sci_b.h b/riscv/insns/pv_srl_sci_b.h new file mode 100644 index 0000000000..b0b38f2a90 --- /dev/null +++ b/riscv/insns/pv_srl_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = zext8(RS1_B(i)) >> (insn.p_simm6() & 0x07); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_srl_sci_h.h b/riscv/insns/pv_srl_sci_h.h new file mode 100644 index 0000000000..5aba29cc91 --- /dev/null +++ b/riscv/insns/pv_srl_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = zext16(RS1_H(i)) >> (insn.p_simm6() & 0x0F); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sub_b.h b/riscv/insns/pv_sub_b.h new file mode 100644 index 0000000000..2ce1fe224f --- /dev/null +++ b/riscv/insns/pv_sub_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) - sext8(RS2_B(i)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sub_h.h b/riscv/insns/pv_sub_h.h new file mode 100644 index 0000000000..4ec5137268 --- /dev/null +++ b/riscv/insns/pv_sub_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) - sext16(RS2_H(i)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sub_h_div2.h b/riscv/insns/pv_sub_h_div2.h new file mode 100644 index 0000000000..a5ea860a0a --- /dev/null +++ b/riscv/insns/pv_sub_h_div2.h @@ -0,0 +1,18 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +int16_t temp; + +for(int i=0; i<(64/e16); i++) +{ + // (src1.h[i] - src2.h[i]) >> 1 doesn't work as shift + // will be performed in int32 (using overflows from add) + temp = src1.h[i] - src2.h[i]; + res.h[i] = temp >> 1; +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_sub_h_div4.h b/riscv/insns/pv_sub_h_div4.h new file mode 100644 index 0000000000..8615108286 --- /dev/null +++ b/riscv/insns/pv_sub_h_div4.h @@ -0,0 +1,18 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +int16_t temp; + +for(int i=0; i<(64/e16); i++) +{ + // (src1.h[i] - src2.h[i]) >> 2 doesn't work as shift + // will be performed in int32 (using overflows from add) + temp = src1.h[i] +-src2.h[i]; + res.h[i] = temp >> 2; +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_sub_h_div8.h b/riscv/insns/pv_sub_h_div8.h new file mode 100644 index 0000000000..027ea66f45 --- /dev/null +++ b/riscv/insns/pv_sub_h_div8.h @@ -0,0 +1,18 @@ +// Todo: explicit use of SIMD insns (ie xsimd lib) would be better + +union simd_reg src1 = {.reg = RS1}; +union simd_reg src2 = {.reg = RS2}; +union simd_reg res; + +int16_t temp; + +for(int i=0; i<(64/e16); i++) +{ + // (src1.h[i] - src2.h[i]) >> 3 doesn't work as shift + // will be performed in int32 (using overflows from add) + temp = src1.h[i] - src2.h[i]; + res.h[i] = temp >> 3; +} + +WRITE_RD(sext_xlen(res.sreg)); + diff --git a/riscv/insns/pv_sub_sc_b.h b/riscv/insns/pv_sub_sc_b.h new file mode 100644 index 0000000000..3375e64c24 --- /dev/null +++ b/riscv/insns/pv_sub_sc_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) - sext8(RS2_B(0)); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sub_sc_h.h b/riscv/insns/pv_sub_sc_h.h new file mode 100644 index 0000000000..4bb12839c4 --- /dev/null +++ b/riscv/insns/pv_sub_sc_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) - sext16(RS2_H(0)); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sub_sci_b.h b/riscv/insns/pv_sub_sci_b.h new file mode 100644 index 0000000000..20cc941239 --- /dev/null +++ b/riscv/insns/pv_sub_sci_b.h @@ -0,0 +1,9 @@ +int8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = sext8(RS1_B(i)) - insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_sub_sci_h.h b/riscv/insns/pv_sub_sci_h.h new file mode 100644 index 0000000000..50b11a6653 --- /dev/null +++ b/riscv/insns/pv_sub_sci_h.h @@ -0,0 +1,9 @@ +int16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = sext16(RS1_H(i)) - insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_subrotmj_h.h b/riscv/insns/pv_subrotmj_h.h new file mode 100644 index 0000000000..04289fcead --- /dev/null +++ b/riscv/insns/pv_subrotmj_h.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = imag1 - imag2; +uint16_t imag = real2 - real1; +res = (imag << 16) | real; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_subrotmj_h_div2.h b/riscv/insns/pv_subrotmj_h_div2.h new file mode 100644 index 0000000000..3e2d698e3d --- /dev/null +++ b/riscv/insns/pv_subrotmj_h_div2.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = (int16_t)(imag1 - imag2) >> 1; +uint16_t imag = (int16_t)(real2 - real1) >> 1; +res = (imag << 16) | real; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_subrotmj_h_div4.h b/riscv/insns/pv_subrotmj_h_div4.h new file mode 100644 index 0000000000..21e980869c --- /dev/null +++ b/riscv/insns/pv_subrotmj_h_div4.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = (int16_t)(imag1 - imag2) >> 2; +uint16_t imag = (int16_t)(real2 - real1) >> 2; +res = (imag << 16) | real; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_subrotmj_h_div8.h b/riscv/insns/pv_subrotmj_h_div8.h new file mode 100644 index 0000000000..13babf74db --- /dev/null +++ b/riscv/insns/pv_subrotmj_h_div8.h @@ -0,0 +1,16 @@ +reg_t src1 = RS1; +reg_t src2 = RS2; +reg_t res; + +int16_t real1 = src1; +int16_t imag1 = (src1 >> 16); +int16_t real2 = src2; +int16_t imag2 = (src2 >> 16); + +// uint16_t zero-extends to uint32_t, which is needed +uint16_t real = (int16_t)(imag1 - imag2) >> 3; +uint16_t imag = (int16_t)(real2 - real1) >> 3; +res = (imag << 16) | real; + +WRITE_RD(sext_xlen(res)); + diff --git a/riscv/insns/pv_xor_b.h b/riscv/insns/pv_xor_b.h new file mode 100644 index 0000000000..2fc203b4d6 --- /dev/null +++ b/riscv/insns/pv_xor_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) ^ RS2_B(i); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_xor_h.h b/riscv/insns/pv_xor_h.h new file mode 100644 index 0000000000..56cf0b7c9a --- /dev/null +++ b/riscv/insns/pv_xor_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) ^ RS2_H(i); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_xor_sc_b.h b/riscv/insns/pv_xor_sc_b.h new file mode 100644 index 0000000000..ed3d5075ab --- /dev/null +++ b/riscv/insns/pv_xor_sc_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) ^ RS2_B(0); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_xor_sc_h.h b/riscv/insns/pv_xor_sc_h.h new file mode 100644 index 0000000000..9d632f367b --- /dev/null +++ b/riscv/insns/pv_xor_sc_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) ^ RS2_H(0); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_xor_sci_b.h b/riscv/insns/pv_xor_sci_b.h new file mode 100644 index 0000000000..7ecbf94fc4 --- /dev/null +++ b/riscv/insns/pv_xor_sci_b.h @@ -0,0 +1,9 @@ +uint8_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/8 - 1; i >= 0; i--){ + temp = RS1_B(i) ^ insn.p_simm6(); + simd_rd <<= 8; + simd_rd += (uint32_t)temp & 0x000000FF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/pv_xor_sci_h.h b/riscv/insns/pv_xor_sci_h.h new file mode 100644 index 0000000000..0a02ced601 --- /dev/null +++ b/riscv/insns/pv_xor_sci_h.h @@ -0,0 +1,9 @@ +uint16_t temp; +uint32_t simd_rd = 0; + +for(int i = xlen/16 - 1; i >= 0; i--){ + temp = RS1_H(i) ^ insn.p_simm6(); + simd_rd <<= 16; + simd_rd += (uint32_t)temp & 0x0000FFFF; +} +WRITE_RD(sext_xlen(simd_rd)); diff --git a/riscv/insns/sc_d.h b/riscv/insns/sc_d.h index aeeabd350d..54023ed449 100644 --- a/riscv/insns/sc_d.h +++ b/riscv/insns/sc_d.h @@ -1,11 +1,11 @@ require_extension('A'); require_rv64; -if (MMU.check_load_reservation(RS1)) -{ + +bool have_reservation = MMU.check_load_reservation(RS1, 8); + +if (have_reservation) MMU.store_uint64(RS1, RS2); - WRITE_RD(0); -} -else - WRITE_RD(1); MMU.yield_load_reservation(); + +WRITE_RD(!have_reservation); diff --git a/riscv/insns/sc_w.h b/riscv/insns/sc_w.h index 4b4be50584..e430dcb2e5 100644 --- a/riscv/insns/sc_w.h +++ b/riscv/insns/sc_w.h @@ -1,10 +1,10 @@ require_extension('A'); -if (MMU.check_load_reservation(RS1)) -{ + +bool have_reservation = MMU.check_load_reservation(RS1, 4); + +if (have_reservation) MMU.store_uint32(RS1, RS2); - WRITE_RD(0); -} -else - WRITE_RD(1); MMU.yield_load_reservation(); + +WRITE_RD(!have_reservation); diff --git a/riscv/insns/sfence_vma.h b/riscv/insns/sfence_vma.h index fc4625f0bf..ff949c7fad 100644 --- a/riscv/insns/sfence_vma.h +++ b/riscv/insns/sfence_vma.h @@ -1,2 +1,8 @@ -require_privilege(get_field(STATE.mstatus, MSTATUS_TVM) ? PRV_M : PRV_S); +require_extension('S'); +if (STATE.v) { + if (STATE.prv == PRV_U || get_field(STATE.hstatus, HSTATUS_VTVM)) + require_novirt(); +} else { + require_privilege(get_field(STATE.mstatus, MSTATUS_TVM) ? PRV_M : PRV_S); +} MMU.flush_tlb(); diff --git a/riscv/insns/sret.h b/riscv/insns/sret.h index ae841de93f..315f4f0ec2 100644 --- a/riscv/insns/sret.h +++ b/riscv/insns/sret.h @@ -1,9 +1,20 @@ -require_privilege(get_field(STATE.mstatus, MSTATUS_TSR) ? PRV_M : PRV_S); -set_pc_and_serialize(p->get_state()->sepc); +require_extension('S'); +if (STATE.v) { + if (STATE.prv == PRV_U || get_field(STATE.hstatus, HSTATUS_VTSR)) + require_novirt(); +} else { + require_privilege(get_field(STATE.mstatus, MSTATUS_TSR) ? PRV_M : PRV_S); +} +reg_t next_pc = (STATE.v) ? p->get_state()->vsepc : p->get_state()->sepc; +set_pc_and_serialize(next_pc); reg_t s = STATE.mstatus; reg_t prev_prv = get_field(s, MSTATUS_SPP); s = set_field(s, MSTATUS_SIE, get_field(s, MSTATUS_SPIE)); s = set_field(s, MSTATUS_SPIE, 1); s = set_field(s, MSTATUS_SPP, PRV_U); -p->set_privilege(prev_prv); p->set_csr(CSR_MSTATUS, s); +p->set_privilege(prev_prv); +if (!STATE.v) { + reg_t prev_virt = get_field(STATE.hstatus, HSTATUS_SPV); + p->set_virt(prev_virt); +} diff --git a/riscv/insns/vaadd_vv.h b/riscv/insns/vaadd_vv.h new file mode 100644 index 0000000000..0a14467f61 --- /dev/null +++ b/riscv/insns/vaadd_vv.h @@ -0,0 +1,2 @@ +// vaadd.vv vd, vs2, vs1 +VI_VVX_LOOP_AVG(vs1, +, true); diff --git a/riscv/insns/vaadd_vx.h b/riscv/insns/vaadd_vx.h new file mode 100644 index 0000000000..ae00d8e46c --- /dev/null +++ b/riscv/insns/vaadd_vx.h @@ -0,0 +1,2 @@ +// vaadd.vx vd, vs2, rs1 +VI_VVX_LOOP_AVG(rs1, +, false); diff --git a/riscv/insns/vaaddu_vv.h b/riscv/insns/vaaddu_vv.h new file mode 100644 index 0000000000..2f3fe745e7 --- /dev/null +++ b/riscv/insns/vaaddu_vv.h @@ -0,0 +1,2 @@ +// vaaddu.vv vd, vs2, vs1 +VI_VVX_ULOOP_AVG(vs1, +, true); diff --git a/riscv/insns/vaaddu_vx.h b/riscv/insns/vaaddu_vx.h new file mode 100644 index 0000000000..0e9fddcb1b --- /dev/null +++ b/riscv/insns/vaaddu_vx.h @@ -0,0 +1,2 @@ +// vaaddu.vx vd, vs2, rs1 +VI_VVX_ULOOP_AVG(rs1, +, false); diff --git a/riscv/insns/vadc_vim.h b/riscv/insns/vadc_vim.h new file mode 100644 index 0000000000..824fac970e --- /dev/null +++ b/riscv/insns/vadc_vim.h @@ -0,0 +1,10 @@ +// vadc.vim vd, vs2, simm5, v0 +VI_XI_LOOP_WITH_CARRY +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & simm5) + (op_mask & vs2) + carry; + vd = res; +}) diff --git a/riscv/insns/vadc_vvm.h b/riscv/insns/vadc_vvm.h new file mode 100644 index 0000000000..2d6803a8b4 --- /dev/null +++ b/riscv/insns/vadc_vvm.h @@ -0,0 +1,10 @@ +// vadc.vvm vd, vs2, rs1, v0 +VI_VV_LOOP_WITH_CARRY +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & vs1) + (op_mask & vs2) + carry; + vd = res; +}) diff --git a/riscv/insns/vadc_vxm.h b/riscv/insns/vadc_vxm.h new file mode 100644 index 0000000000..0d2d052cca --- /dev/null +++ b/riscv/insns/vadc_vxm.h @@ -0,0 +1,10 @@ +// vadc.vxm vd, vs2, rs1, v0 +VI_XI_LOOP_WITH_CARRY +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & rs1) + (op_mask & vs2) + carry; + vd = res; +}) diff --git a/riscv/insns/vadd_vi.h b/riscv/insns/vadd_vi.h new file mode 100644 index 0000000000..45fc6b74e7 --- /dev/null +++ b/riscv/insns/vadd_vi.h @@ -0,0 +1,5 @@ +// vadd.vi vd, simm5, vs2, vm +VI_VI_LOOP +({ + vd = simm5 + vs2; +}) diff --git a/riscv/insns/vadd_vv.h b/riscv/insns/vadd_vv.h new file mode 100644 index 0000000000..45c6bdcba7 --- /dev/null +++ b/riscv/insns/vadd_vv.h @@ -0,0 +1,5 @@ +// vadd.vv vd, vs1, vs2, vm +VI_VV_LOOP +({ + vd = vs1 + vs2; +}) diff --git a/riscv/insns/vadd_vx.h b/riscv/insns/vadd_vx.h new file mode 100644 index 0000000000..33e72ee495 --- /dev/null +++ b/riscv/insns/vadd_vx.h @@ -0,0 +1,5 @@ +// vadd.vx vd, rs1, vs2, vm +VI_VX_LOOP +({ + vd = rs1 + vs2; +}) diff --git a/riscv/insns/vamoaddei16_v.h b/riscv/insns/vamoaddei16_v.h new file mode 100644 index 0000000000..3cb3db709d --- /dev/null +++ b/riscv/insns/vamoaddei16_v.h @@ -0,0 +1,2 @@ +//vamoadde.v vd, (rs1), vs2, vd +VI_AMO({ return lhs + vs3; }, uint, e16); diff --git a/riscv/insns/vamoaddei32_v.h b/riscv/insns/vamoaddei32_v.h new file mode 100644 index 0000000000..2bd77fcbd2 --- /dev/null +++ b/riscv/insns/vamoaddei32_v.h @@ -0,0 +1,2 @@ +//vamoadde.v vd, (rs1), vs2, vd +VI_AMO({ return lhs + vs3; }, uint, e32); diff --git a/riscv/insns/vamoaddei64_v.h b/riscv/insns/vamoaddei64_v.h new file mode 100644 index 0000000000..79ca748205 --- /dev/null +++ b/riscv/insns/vamoaddei64_v.h @@ -0,0 +1,2 @@ +//vamoadde.v vd, (rs1), vs2, vd +VI_AMO({ return lhs + vs3; }, uint, e64); diff --git a/riscv/insns/vamoaddei8_v.h b/riscv/insns/vamoaddei8_v.h new file mode 100644 index 0000000000..06b8c79302 --- /dev/null +++ b/riscv/insns/vamoaddei8_v.h @@ -0,0 +1,2 @@ +//vamoadde.v vd, (rs1), vs2, vd +VI_AMO({ return lhs + vs3; }, uint, e8); diff --git a/riscv/insns/vamoandei16_v.h b/riscv/insns/vamoandei16_v.h new file mode 100644 index 0000000000..be119497f3 --- /dev/null +++ b/riscv/insns/vamoandei16_v.h @@ -0,0 +1,2 @@ +//vamoande.v vd, (rs1), vs2, vd +VI_AMO({ return lhs & vs3; }, uint, e16); diff --git a/riscv/insns/vamoandei32_v.h b/riscv/insns/vamoandei32_v.h new file mode 100644 index 0000000000..71506704ff --- /dev/null +++ b/riscv/insns/vamoandei32_v.h @@ -0,0 +1,2 @@ +//vamoande.v vd, (rs1), vs2, vd +VI_AMO({ return lhs & vs3; }, uint, e32); diff --git a/riscv/insns/vamoandei64_v.h b/riscv/insns/vamoandei64_v.h new file mode 100644 index 0000000000..3efae3b59f --- /dev/null +++ b/riscv/insns/vamoandei64_v.h @@ -0,0 +1,2 @@ +//vamoande.v vd, (rs1), vs2, vd +VI_AMO({ return lhs & vs3; }, uint, e64); diff --git a/riscv/insns/vamoandei8_v.h b/riscv/insns/vamoandei8_v.h new file mode 100644 index 0000000000..c47645d3e0 --- /dev/null +++ b/riscv/insns/vamoandei8_v.h @@ -0,0 +1,2 @@ +//vamoande.v vd, (rs1), vs2, vd +VI_AMO({ return lhs & vs3; }, uint, e8); diff --git a/riscv/insns/vamomaxei16_v.h b/riscv/insns/vamomaxei16_v.h new file mode 100644 index 0000000000..ca67893e99 --- /dev/null +++ b/riscv/insns/vamomaxei16_v.h @@ -0,0 +1,2 @@ +//vamomaxe.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3; }, int, e16); diff --git a/riscv/insns/vamomaxei32_v.h b/riscv/insns/vamomaxei32_v.h new file mode 100644 index 0000000000..b6823cd042 --- /dev/null +++ b/riscv/insns/vamomaxei32_v.h @@ -0,0 +1,2 @@ +//vamomaxe.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3; }, int, e32); diff --git a/riscv/insns/vamomaxei64_v.h b/riscv/insns/vamomaxei64_v.h new file mode 100644 index 0000000000..46e8a3bbd1 --- /dev/null +++ b/riscv/insns/vamomaxei64_v.h @@ -0,0 +1,2 @@ +//vamomaxe.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3; }, int, e64); diff --git a/riscv/insns/vamomaxei8_v.h b/riscv/insns/vamomaxei8_v.h new file mode 100644 index 0000000000..9697b3a4cb --- /dev/null +++ b/riscv/insns/vamomaxei8_v.h @@ -0,0 +1,2 @@ +//vamomaxe.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3; }, int, e8); diff --git a/riscv/insns/vamomaxuei16_v.h b/riscv/insns/vamomaxuei16_v.h new file mode 100644 index 0000000000..e05971dfcf --- /dev/null +++ b/riscv/insns/vamomaxuei16_v.h @@ -0,0 +1,2 @@ +//vamomaxue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3;; }, uint, e16); diff --git a/riscv/insns/vamomaxuei32_v.h b/riscv/insns/vamomaxuei32_v.h new file mode 100644 index 0000000000..9b873543b9 --- /dev/null +++ b/riscv/insns/vamomaxuei32_v.h @@ -0,0 +1,2 @@ +//vamomaxue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3;; }, uint, e32); diff --git a/riscv/insns/vamomaxuei64_v.h b/riscv/insns/vamomaxuei64_v.h new file mode 100644 index 0000000000..bbfbc9f2a3 --- /dev/null +++ b/riscv/insns/vamomaxuei64_v.h @@ -0,0 +1,2 @@ +//vamomaxue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3;; }, uint, e64); diff --git a/riscv/insns/vamomaxuei8_v.h b/riscv/insns/vamomaxuei8_v.h new file mode 100644 index 0000000000..357ba2454a --- /dev/null +++ b/riscv/insns/vamomaxuei8_v.h @@ -0,0 +1,2 @@ +//vamomaxue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs >= vs3 ? lhs : vs3;; }, uint, e8); diff --git a/riscv/insns/vamominei16_v.h b/riscv/insns/vamominei16_v.h new file mode 100644 index 0000000000..9d1ecac643 --- /dev/null +++ b/riscv/insns/vamominei16_v.h @@ -0,0 +1,2 @@ +//vamomine.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3; }, int, e16); diff --git a/riscv/insns/vamominei32_v.h b/riscv/insns/vamominei32_v.h new file mode 100644 index 0000000000..6cb8475e39 --- /dev/null +++ b/riscv/insns/vamominei32_v.h @@ -0,0 +1,2 @@ +//vamomine.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3; }, int, e32); diff --git a/riscv/insns/vamominei64_v.h b/riscv/insns/vamominei64_v.h new file mode 100644 index 0000000000..9ef3d4ee3b --- /dev/null +++ b/riscv/insns/vamominei64_v.h @@ -0,0 +1,2 @@ +//vamomine.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3; }, int, e64); diff --git a/riscv/insns/vamominei8_v.h b/riscv/insns/vamominei8_v.h new file mode 100644 index 0000000000..5c035ea47b --- /dev/null +++ b/riscv/insns/vamominei8_v.h @@ -0,0 +1,2 @@ +//vamomine.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3; }, int, e8); diff --git a/riscv/insns/vamominuei16_v.h b/riscv/insns/vamominuei16_v.h new file mode 100644 index 0000000000..d4a8f89292 --- /dev/null +++ b/riscv/insns/vamominuei16_v.h @@ -0,0 +1,2 @@ +//vamominue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3;; }, uint, e16); diff --git a/riscv/insns/vamominuei32_v.h b/riscv/insns/vamominuei32_v.h new file mode 100644 index 0000000000..16296c5beb --- /dev/null +++ b/riscv/insns/vamominuei32_v.h @@ -0,0 +1,2 @@ +//vamominue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3;; }, uint, e32); diff --git a/riscv/insns/vamominuei64_v.h b/riscv/insns/vamominuei64_v.h new file mode 100644 index 0000000000..fd850fd063 --- /dev/null +++ b/riscv/insns/vamominuei64_v.h @@ -0,0 +1,2 @@ +//vamominue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3;; }, uint, e64); diff --git a/riscv/insns/vamominuei8_v.h b/riscv/insns/vamominuei8_v.h new file mode 100644 index 0000000000..3749d0525d --- /dev/null +++ b/riscv/insns/vamominuei8_v.h @@ -0,0 +1,2 @@ +//vamominue.v vd, (rs1), vs2, vd +VI_AMO({ return lhs < vs3 ? lhs : vs3;; }, uint, e8); diff --git a/riscv/insns/vamoorei16_v.h b/riscv/insns/vamoorei16_v.h new file mode 100644 index 0000000000..a5ba1caa74 --- /dev/null +++ b/riscv/insns/vamoorei16_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs | vs3; }, uint, e16); diff --git a/riscv/insns/vamoorei32_v.h b/riscv/insns/vamoorei32_v.h new file mode 100644 index 0000000000..94e4458e49 --- /dev/null +++ b/riscv/insns/vamoorei32_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs | vs3; }, uint, e32); diff --git a/riscv/insns/vamoorei64_v.h b/riscv/insns/vamoorei64_v.h new file mode 100644 index 0000000000..84e03944e5 --- /dev/null +++ b/riscv/insns/vamoorei64_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs | vs3; }, uint, e64); diff --git a/riscv/insns/vamoorei8_v.h b/riscv/insns/vamoorei8_v.h new file mode 100644 index 0000000000..364035dbb2 --- /dev/null +++ b/riscv/insns/vamoorei8_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs | vs3; }, uint, e8); diff --git a/riscv/insns/vamoswapei16_v.h b/riscv/insns/vamoswapei16_v.h new file mode 100644 index 0000000000..31ff021030 --- /dev/null +++ b/riscv/insns/vamoswapei16_v.h @@ -0,0 +1,2 @@ +//vamoswape.v vd, (rs1), vs2, vd +VI_AMO({ return vs3; }, uint, e16); diff --git a/riscv/insns/vamoswapei32_v.h b/riscv/insns/vamoswapei32_v.h new file mode 100644 index 0000000000..a5741929ab --- /dev/null +++ b/riscv/insns/vamoswapei32_v.h @@ -0,0 +1,2 @@ +//vamoswape.v vd, (rs1), vs2, vd +VI_AMO({ return vs3; }, uint, e32); diff --git a/riscv/insns/vamoswapei64_v.h b/riscv/insns/vamoswapei64_v.h new file mode 100644 index 0000000000..58bd035217 --- /dev/null +++ b/riscv/insns/vamoswapei64_v.h @@ -0,0 +1,2 @@ +//vamoswape.v vd, (rs1), vs2, vd +VI_AMO({ return vs3; }, uint, e64); diff --git a/riscv/insns/vamoswapei8_v.h b/riscv/insns/vamoswapei8_v.h new file mode 100644 index 0000000000..af37c8c3f8 --- /dev/null +++ b/riscv/insns/vamoswapei8_v.h @@ -0,0 +1,2 @@ +//vamoswape.v vd, (rs1), vs2, vd +VI_AMO({ return vs3; }, uint, e8); diff --git a/riscv/insns/vamoxorei16_v.h b/riscv/insns/vamoxorei16_v.h new file mode 100644 index 0000000000..61e8c3272c --- /dev/null +++ b/riscv/insns/vamoxorei16_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs ^ vs3; }, uint, e16); diff --git a/riscv/insns/vamoxorei32_v.h b/riscv/insns/vamoxorei32_v.h new file mode 100644 index 0000000000..d48d951504 --- /dev/null +++ b/riscv/insns/vamoxorei32_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs ^ vs3; }, uint, e32); diff --git a/riscv/insns/vamoxorei64_v.h b/riscv/insns/vamoxorei64_v.h new file mode 100644 index 0000000000..f7a3ca42e1 --- /dev/null +++ b/riscv/insns/vamoxorei64_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs ^ vs3; }, uint, e64); diff --git a/riscv/insns/vamoxorei8_v.h b/riscv/insns/vamoxorei8_v.h new file mode 100644 index 0000000000..4b6c79824c --- /dev/null +++ b/riscv/insns/vamoxorei8_v.h @@ -0,0 +1,2 @@ +//vamoore.v vd, (rs1), vs2, vd +VI_AMO({ return lhs ^ vs3; }, uint, e8); diff --git a/riscv/insns/vand_vi.h b/riscv/insns/vand_vi.h new file mode 100644 index 0000000000..dd9618ba94 --- /dev/null +++ b/riscv/insns/vand_vi.h @@ -0,0 +1,5 @@ +// vand.vi vd, simm5, vs2, vm +VI_VI_LOOP +({ + vd = simm5 & vs2; +}) diff --git a/riscv/insns/vand_vv.h b/riscv/insns/vand_vv.h new file mode 100644 index 0000000000..65558e4b6a --- /dev/null +++ b/riscv/insns/vand_vv.h @@ -0,0 +1,5 @@ +// vand.vv vd, vs1, vs2, vm +VI_VV_LOOP +({ + vd = vs1 & vs2; +}) diff --git a/riscv/insns/vand_vx.h b/riscv/insns/vand_vx.h new file mode 100644 index 0000000000..8eea1ed526 --- /dev/null +++ b/riscv/insns/vand_vx.h @@ -0,0 +1,5 @@ +// vand.vx vd, rs1, vs2, vm +VI_VX_LOOP +({ + vd = rs1 & vs2; +}) diff --git a/riscv/insns/vasub_vv.h b/riscv/insns/vasub_vv.h new file mode 100644 index 0000000000..a45c18db7d --- /dev/null +++ b/riscv/insns/vasub_vv.h @@ -0,0 +1,2 @@ +// vasub.vv vd, vs2, vs1 +VI_VVX_LOOP_AVG(vs1, -, true); diff --git a/riscv/insns/vasub_vx.h b/riscv/insns/vasub_vx.h new file mode 100644 index 0000000000..4e8dba1c9a --- /dev/null +++ b/riscv/insns/vasub_vx.h @@ -0,0 +1,2 @@ +// vasub.vx vd, vs2, rs1 +VI_VVX_LOOP_AVG(rs1, -, false); diff --git a/riscv/insns/vasubu_vv.h b/riscv/insns/vasubu_vv.h new file mode 100644 index 0000000000..8e2be01aca --- /dev/null +++ b/riscv/insns/vasubu_vv.h @@ -0,0 +1,2 @@ +// vasubu.vv vd, vs2, vs1 +VI_VVX_ULOOP_AVG(vs1, -, true); diff --git a/riscv/insns/vasubu_vx.h b/riscv/insns/vasubu_vx.h new file mode 100644 index 0000000000..3cc9ca8a01 --- /dev/null +++ b/riscv/insns/vasubu_vx.h @@ -0,0 +1,2 @@ +// vasubu.vx vd, vs2, rs1 +VI_VVX_ULOOP_AVG(rs1, -, false); diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h new file mode 100644 index 0000000000..325e40adb4 --- /dev/null +++ b/riscv/insns/vcompress_vm.h @@ -0,0 +1,33 @@ +// vcompress vd, vs2, vs1 +require(P.VU.vstart == 0); +require_align(insn.rd(), P.VU.vflmul); +require_align(insn.rs2(), P.VU.vflmul); +require(insn.rd() != insn.rs2()); +require_noover(insn.rd(), P.VU.vflmul, insn.rs1(), 1); + +reg_t pos = 0; + +VI_GENERAL_LOOP_BASE + const int midx = i / 64; + const int mpos = i % 64; + + bool do_mask = (P.VU.elt(rs1_num, midx) >> mpos) & 0x1; + if (do_mask) { + switch (sew) { + case e8: + P.VU.elt(rd_num, pos, true) = P.VU.elt(rs2_num, i); + break; + case e16: + P.VU.elt(rd_num, pos, true) = P.VU.elt(rs2_num, i); + break; + case e32: + P.VU.elt(rd_num, pos, true) = P.VU.elt(rs2_num, i); + break; + default: + P.VU.elt(rd_num, pos, true) = P.VU.elt(rs2_num, i); + break; + } + + ++pos; + } +VI_LOOP_END; diff --git a/riscv/insns/vdiv_vv.h b/riscv/insns/vdiv_vv.h new file mode 100644 index 0000000000..0d4bd0d8e4 --- /dev/null +++ b/riscv/insns/vdiv_vv.h @@ -0,0 +1,10 @@ +// vdiv.vv vd, vs2, vs1 +VI_VV_LOOP +({ + if (vs1 == 0) + vd = -1; + else if (vs2 == (INT64_MIN >> (64 - sew)) && vs1 == -1) + vd = vs2; + else + vd = vs2 / vs1; +}) diff --git a/riscv/insns/vdiv_vx.h b/riscv/insns/vdiv_vx.h new file mode 100644 index 0000000000..405295270e --- /dev/null +++ b/riscv/insns/vdiv_vx.h @@ -0,0 +1,10 @@ +// vdiv.vx vd, vs2, rs1 +VI_VX_LOOP +({ + if(rs1 == 0) + vd = -1; + else if(vs2 == (INT64_MIN >> (64 - sew)) && rs1 == -1) + vd = vs2; + else + vd = vs2 / rs1; +}) diff --git a/riscv/insns/vdivu_vv.h b/riscv/insns/vdivu_vv.h new file mode 100644 index 0000000000..ef6e777d6b --- /dev/null +++ b/riscv/insns/vdivu_vv.h @@ -0,0 +1,8 @@ +// vdivu.vv vd, vs2, vs1 +VI_VV_ULOOP +({ + if(vs1 == 0) + vd = -1; + else + vd = vs2 / vs1; +}) diff --git a/riscv/insns/vdivu_vx.h b/riscv/insns/vdivu_vx.h new file mode 100644 index 0000000000..7ffe1c6803 --- /dev/null +++ b/riscv/insns/vdivu_vx.h @@ -0,0 +1,8 @@ +// vdivu.vx vd, vs2, rs1 +VI_VX_ULOOP +({ + if(rs1 == 0) + vd = -1; + else + vd = vs2 / rs1; +}) diff --git a/riscv/insns/vdot_vv.h b/riscv/insns/vdot_vv.h new file mode 100644 index 0000000000..7685230497 --- /dev/null +++ b/riscv/insns/vdot_vv.h @@ -0,0 +1,5 @@ +// vdot vd, vs2, vs1 +VI_VV_LOOP +({ + vd += vs2 * vs1; +}) diff --git a/riscv/insns/vdotu_vv.h b/riscv/insns/vdotu_vv.h new file mode 100644 index 0000000000..9c4c59dde2 --- /dev/null +++ b/riscv/insns/vdotu_vv.h @@ -0,0 +1,5 @@ +// vdotu vd, vs2, vs1 +VI_VV_ULOOP +({ + vd += vs2 * vs1; +}) diff --git a/riscv/insns/vfadd_vf.h b/riscv/insns/vfadd_vf.h new file mode 100644 index 0000000000..2b808e0ccd --- /dev/null +++ b/riscv/insns/vfadd_vf.h @@ -0,0 +1,11 @@ +// vfadd.vf vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f16_add(rs1, vs2); +}, +{ + vd = f32_add(rs1, vs2); +}, +{ + vd = f64_add(rs1, vs2); +}) diff --git a/riscv/insns/vfadd_vv.h b/riscv/insns/vfadd_vv.h new file mode 100644 index 0000000000..ce94921d56 --- /dev/null +++ b/riscv/insns/vfadd_vv.h @@ -0,0 +1,11 @@ +// vfadd.vv vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f16_add(vs1, vs2); +}, +{ + vd = f32_add(vs1, vs2); +}, +{ + vd = f64_add(vs1, vs2); +}) diff --git a/riscv/insns/vfclass_v.h b/riscv/insns/vfclass_v.h new file mode 100644 index 0000000000..658f28a23f --- /dev/null +++ b/riscv/insns/vfclass_v.h @@ -0,0 +1,11 @@ +// vfclass.v vd, vs2, vm +VI_VFP_V_LOOP +({ + vd.v = f16_classify(vs2); +}, +{ + vd.v = f32_classify(vs2); +}, +{ + vd.v = f64_classify(vs2); +}) diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h new file mode 100644 index 0000000000..c53b0e1fd6 --- /dev/null +++ b/riscv/insns/vfcvt_f_x_v.h @@ -0,0 +1,14 @@ +// vfcvt.f.x.v vd, vd2, vm +VI_VFP_VF_LOOP +({ + auto vs2_i = P.VU.elt(rs2_num, i); + vd = i32_to_f16(vs2_i); +}, +{ + auto vs2_i = P.VU.elt(rs2_num, i); + vd = i32_to_f32(vs2_i); +}, +{ + auto vs2_i = P.VU.elt(rs2_num, i); + vd = i64_to_f64(vs2_i); +}) diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h new file mode 100644 index 0000000000..bd03768dbd --- /dev/null +++ b/riscv/insns/vfcvt_f_xu_v.h @@ -0,0 +1,14 @@ +// vfcvt.f.xu.v vd, vd2, vm +VI_VFP_VF_LOOP +({ + auto vs2_u = P.VU.elt(rs2_num, i); + vd = ui32_to_f16(vs2_u); +}, +{ + auto vs2_u = P.VU.elt(rs2_num, i); + vd = ui32_to_f32(vs2_u); +}, +{ + auto vs2_u = P.VU.elt(rs2_num, i); + vd = ui64_to_f64(vs2_u); +}) diff --git a/riscv/insns/vfcvt_rtz_x_f_v.h b/riscv/insns/vfcvt_rtz_x_f_v.h new file mode 100644 index 0000000000..e7241bd033 --- /dev/null +++ b/riscv/insns/vfcvt_rtz_x_f_v.h @@ -0,0 +1,11 @@ +// vfcvt.rtz.x.f.v vd, vd2, vm +VI_VFP_VF_LOOP +({ + P.VU.elt(rd_num, i) = f16_to_i16(vs2, softfloat_round_minMag, true); +}, +{ + P.VU.elt(rd_num, i) = f32_to_i32(vs2, softfloat_round_minMag, true); +}, +{ + P.VU.elt(rd_num, i) = f64_to_i64(vs2, softfloat_round_minMag, true); +}) diff --git a/riscv/insns/vfcvt_rtz_xu_f_v.h b/riscv/insns/vfcvt_rtz_xu_f_v.h new file mode 100644 index 0000000000..d3d266d0c9 --- /dev/null +++ b/riscv/insns/vfcvt_rtz_xu_f_v.h @@ -0,0 +1,11 @@ +// vfcvt.rtz.xu.f.v vd, vd2, vm +VI_VFP_VF_LOOP +({ + P.VU.elt(rd_num, i) = f16_to_ui16(vs2, softfloat_round_minMag, true); +}, +{ + P.VU.elt(rd_num, i) = f32_to_ui32(vs2, softfloat_round_minMag, true); +}, +{ + P.VU.elt(rd_num, i) = f64_to_ui64(vs2, softfloat_round_minMag, true); +}) diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h new file mode 100644 index 0000000000..01e5ca17f4 --- /dev/null +++ b/riscv/insns/vfcvt_x_f_v.h @@ -0,0 +1,11 @@ +// vfcvt.x.f.v vd, vd2, vm +VI_VFP_VF_LOOP +({ + P.VU.elt(rd_num, i) = f16_to_i16(vs2, STATE.frm, true); +}, +{ + P.VU.elt(rd_num, i) = f32_to_i32(vs2, STATE.frm, true); +}, +{ + P.VU.elt(rd_num, i) = f64_to_i64(vs2, STATE.frm, true); +}) diff --git a/riscv/insns/vfcvt_xu_f_v.h b/riscv/insns/vfcvt_xu_f_v.h new file mode 100644 index 0000000000..725cbda23a --- /dev/null +++ b/riscv/insns/vfcvt_xu_f_v.h @@ -0,0 +1,11 @@ +// vfcvt.xu.f.v vd, vd2, vm +VI_VFP_VV_LOOP +({ + P.VU.elt(rd_num, i) = f16_to_ui16(vs2, STATE.frm, true); +}, +{ + P.VU.elt(rd_num, i) = f32_to_ui32(vs2, STATE.frm, true); +}, +{ + P.VU.elt(rd_num, i) = f64_to_ui64(vs2, STATE.frm, true); +}) diff --git a/riscv/insns/vfdiv_vf.h b/riscv/insns/vfdiv_vf.h new file mode 100644 index 0000000000..a703ef02bf --- /dev/null +++ b/riscv/insns/vfdiv_vf.h @@ -0,0 +1,11 @@ +// vfdiv.vf vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f16_div(vs2, rs1); +}, +{ + vd = f32_div(vs2, rs1); +}, +{ + vd = f64_div(vs2, rs1); +}) diff --git a/riscv/insns/vfdiv_vv.h b/riscv/insns/vfdiv_vv.h new file mode 100644 index 0000000000..c66d751659 --- /dev/null +++ b/riscv/insns/vfdiv_vv.h @@ -0,0 +1,11 @@ +// vfdiv.vv vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f16_div(vs2, vs1); +}, +{ + vd = f32_div(vs2, vs1); +}, +{ + vd = f64_div(vs2, vs1); +}) diff --git a/riscv/insns/vfdot_vv.h b/riscv/insns/vfdot_vv.h new file mode 100644 index 0000000000..8f5225acd4 --- /dev/null +++ b/riscv/insns/vfdot_vv.h @@ -0,0 +1,11 @@ +// vfdot.vv vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f16_add(vd, f16_mul(vs2, vs1)); +}, +{ + vd = f32_add(vd, f32_mul(vs2, vs1)); +}, +{ + vd = f64_add(vd, f64_mul(vs2, vs1)); +}) diff --git a/riscv/insns/vfirst_m.h b/riscv/insns/vfirst_m.h new file mode 100644 index 0000000000..309572374d --- /dev/null +++ b/riscv/insns/vfirst_m.h @@ -0,0 +1,20 @@ +// vmfirst rd, vs2 +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs2_num = insn.rs2(); +require(P.VU.vstart == 0); +reg_t pos = -1; +for (reg_t i=P.VU.vstart; i < vl; ++i) { + VI_LOOP_ELEMENT_SKIP() + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx ) >> mpos) & 0x1) == 1; + if (vs2_lsb) { + pos = i; + break; + } +} +P.VU.vstart = 0; +WRITE_RD(pos); diff --git a/riscv/insns/vfmacc_vf.h b/riscv/insns/vfmacc_vf.h new file mode 100644 index 0000000000..61578d3318 --- /dev/null +++ b/riscv/insns/vfmacc_vf.h @@ -0,0 +1,11 @@ +// vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(vs2[i] * x[rs1]) + vd[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(rs1, vs2, vd); +}, +{ + vd = f32_mulAdd(rs1, vs2, vd); +}, +{ + vd = f64_mulAdd(rs1, vs2, vd); +}) diff --git a/riscv/insns/vfmacc_vv.h b/riscv/insns/vfmacc_vv.h new file mode 100644 index 0000000000..499b1d4d22 --- /dev/null +++ b/riscv/insns/vfmacc_vv.h @@ -0,0 +1,11 @@ +// vfmacc.vv vd, rs1, vs2, vm # vd[i] = +(vs2[i] * vs1[i]) + vd[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(vs1, vs2, vd); +}, +{ + vd = f32_mulAdd(vs1, vs2, vd); +}, +{ + vd = f64_mulAdd(vs1, vs2, vd); +}) diff --git a/riscv/insns/vfmadd_vf.h b/riscv/insns/vfmadd_vf.h new file mode 100644 index 0000000000..2a01429506 --- /dev/null +++ b/riscv/insns/vfmadd_vf.h @@ -0,0 +1,11 @@ +// vfmadd: vd[i] = +(vd[i] * f[rs1]) + vs2[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(vd, rs1, vs2); +}, +{ + vd = f32_mulAdd(vd, rs1, vs2); +}, +{ + vd = f64_mulAdd(vd, rs1, vs2); +}) diff --git a/riscv/insns/vfmadd_vv.h b/riscv/insns/vfmadd_vv.h new file mode 100644 index 0000000000..7ef734f847 --- /dev/null +++ b/riscv/insns/vfmadd_vv.h @@ -0,0 +1,11 @@ +// vfmadd: vd[i] = +(vd[i] * vs1[i]) + vs2[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(vd, vs1, vs2); +}, +{ + vd = f32_mulAdd(vd, vs1, vs2); +}, +{ + vd = f64_mulAdd(vd, vs1, vs2); +}) diff --git a/riscv/insns/vfmax_vf.h b/riscv/insns/vfmax_vf.h new file mode 100644 index 0000000000..c4b74cbd54 --- /dev/null +++ b/riscv/insns/vfmax_vf.h @@ -0,0 +1,11 @@ +// vfmax +VI_VFP_VF_LOOP +({ + vd = f16_max(vs2, rs1); +}, +{ + vd = f32_max(vs2, rs1); +}, +{ + vd = f64_max(vs2, rs1); +}) diff --git a/riscv/insns/vfmax_vv.h b/riscv/insns/vfmax_vv.h new file mode 100644 index 0000000000..6439c8997f --- /dev/null +++ b/riscv/insns/vfmax_vv.h @@ -0,0 +1,11 @@ +// vfmax +VI_VFP_VV_LOOP +({ + vd = f16_max(vs2, vs1); +}, +{ + vd = f32_max(vs2, vs1); +}, +{ + vd = f64_max(vs2, vs1); +}) diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h new file mode 100644 index 0000000000..c9b39fe052 --- /dev/null +++ b/riscv/insns/vfmerge_vfm.h @@ -0,0 +1,50 @@ +// vfmerge_vf vd, vs2, vs1, vm +VI_CHECK_SSS(false); +VI_VFP_COMMON; + +switch(P.VU.vsew) { + case e16: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f16(READ_FREG(rs1_num)); + auto vs2 = P.VU.elt(rs2_num, i); + + int midx = i / 64; + int mpos = i % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? rs1 : vs2; + } + break; + case e32: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f32(READ_FREG(rs1_num)); + auto vs2 = P.VU.elt(rs2_num, i); + + int midx = i / 64; + int mpos = i % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? rs1 : vs2; + } + break; + case e64: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f64(READ_FREG(rs1_num)); + auto vs2 = P.VU.elt(rs2_num, i); + + int midx = i / 64; + int mpos = i % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? rs1 : vs2; + } + break; + default: + require(0); + break; +} + +P.VU.vstart = 0; diff --git a/riscv/insns/vfmin_vf.h b/riscv/insns/vfmin_vf.h new file mode 100644 index 0000000000..1560cdf7dc --- /dev/null +++ b/riscv/insns/vfmin_vf.h @@ -0,0 +1,11 @@ +// vfmin vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f16_min(vs2, rs1); +}, +{ + vd = f32_min(vs2, rs1); +}, +{ + vd = f64_min(vs2, rs1); +}) diff --git a/riscv/insns/vfmin_vv.h b/riscv/insns/vfmin_vv.h new file mode 100644 index 0000000000..882a774044 --- /dev/null +++ b/riscv/insns/vfmin_vv.h @@ -0,0 +1,11 @@ +// vfmin vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f16_min(vs2, vs1); +}, +{ + vd = f32_min(vs2, vs1); +}, +{ + vd = f64_min(vs2, vs1); +}) diff --git a/riscv/insns/vfmsac_vf.h b/riscv/insns/vfmsac_vf.h new file mode 100644 index 0000000000..8af397b999 --- /dev/null +++ b/riscv/insns/vfmsac_vf.h @@ -0,0 +1,11 @@ +// vfmsac: vd[i] = +(f[rs1] * vs2[i]) - vd[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(rs1, vs2, f16(vd.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(rs1, vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfmsac_vv.h b/riscv/insns/vfmsac_vv.h new file mode 100644 index 0000000000..3bb50e50a9 --- /dev/null +++ b/riscv/insns/vfmsac_vv.h @@ -0,0 +1,11 @@ +// vfmsac: vd[i] = +(vs1[i] * vs2[i]) - vd[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(vs1, vs2, f16(vd.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(vs1, vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfmsub_vf.h b/riscv/insns/vfmsub_vf.h new file mode 100644 index 0000000000..ab77b4c6e1 --- /dev/null +++ b/riscv/insns/vfmsub_vf.h @@ -0,0 +1,11 @@ +// vfmsub: vd[i] = +(vd[i] * f[rs1]) - vs2[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(vd, rs1, f16(vs2.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(vd, rs1, f64(vs2.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfmsub_vv.h b/riscv/insns/vfmsub_vv.h new file mode 100644 index 0000000000..3cac937fd1 --- /dev/null +++ b/riscv/insns/vfmsub_vv.h @@ -0,0 +1,11 @@ +// vfmsub: vd[i] = +(vd[i] * vs1[i]) - vs2[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(vd, vs1, f16(vs2.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(vd, vs1, f32(vs2.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(vd, vs1, f64(vs2.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfmul_vf.h b/riscv/insns/vfmul_vf.h new file mode 100644 index 0000000000..f5f63e497a --- /dev/null +++ b/riscv/insns/vfmul_vf.h @@ -0,0 +1,11 @@ +// vfmul.vf vd, vs2, rs1, vm +VI_VFP_VF_LOOP +({ + vd = f16_mul(vs2, rs1); +}, +{ + vd = f32_mul(vs2, rs1); +}, +{ + vd = f64_mul(vs2, rs1); +}) diff --git a/riscv/insns/vfmul_vv.h b/riscv/insns/vfmul_vv.h new file mode 100644 index 0000000000..7930fd034e --- /dev/null +++ b/riscv/insns/vfmul_vv.h @@ -0,0 +1,11 @@ +// vfmul.vv vd, vs1, vs2, vm +VI_VFP_VV_LOOP +({ + vd = f16_mul(vs1, vs2); +}, +{ + vd = f32_mul(vs1, vs2); +}, +{ + vd = f64_mul(vs1, vs2); +}) diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h new file mode 100644 index 0000000000..4a81436c10 --- /dev/null +++ b/riscv/insns/vfmv_f_s.h @@ -0,0 +1,38 @@ +// vfmv_f_s: rd = vs2[0] (rs1=0) +require_vector(true); +require_fp; +require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) || + (P.VU.vsew == e32 && p->supports_extension('F')) || + (P.VU.vsew == e64 && p->supports_extension('D'))); +require(STATE.frm < 0x5); + +reg_t rs2_num = insn.rs2(); +uint64_t vs2_0 = 0; +const reg_t sew = P.VU.vsew; +switch(sew) { + case e16: + vs2_0 = P.VU.elt(rs2_num, 0); + break; + case e32: + vs2_0 = P.VU.elt(rs2_num, 0); + break; + case e64: + vs2_0 = P.VU.elt(rs2_num, 0); + break; + default: + require(0); + break; +} + +// nan_extened +if (FLEN > sew) { + vs2_0 = vs2_0 | (UINT64_MAX << sew); +} + +if (FLEN == 64) { + WRITE_FRD(f64(vs2_0)); +} else { + WRITE_FRD(f32(vs2_0)); +} + +P.VU.vstart = 0; diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h new file mode 100644 index 0000000000..52ed7b2d11 --- /dev/null +++ b/riscv/insns/vfmv_s_f.h @@ -0,0 +1,29 @@ +// vfmv_s_f: vd[0] = rs1 (vs2=0) +require_vector(true); +require_fp; +require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) || + (P.VU.vsew == e32 && p->supports_extension('F')) || + (P.VU.vsew == e64 && p->supports_extension('D'))); +require(STATE.frm < 0x5); + +reg_t vl = P.VU.vl; + +if (vl > 0 && P.VU.vstart < vl) { + reg_t rd_num = insn.rd(); + + switch(P.VU.vsew) { + case e16: + P.VU.elt(rd_num, 0, true) = f16(FRS1).v; + break; + case e32: + P.VU.elt(rd_num, 0, true) = f32(FRS1).v; + break; + case e64: + if (FLEN == 64) + P.VU.elt(rd_num, 0, true) = f64(FRS1).v; + else + P.VU.elt(rd_num, 0, true) = f32(FRS1).v; + break; + } +} +P.VU.vstart = 0; diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h new file mode 100644 index 0000000000..fb9c78827e --- /dev/null +++ b/riscv/insns/vfmv_v_f.h @@ -0,0 +1,31 @@ +// vfmv_vf vd, vs1 +require_align(insn.rd(), P.VU.vflmul); +VI_VFP_COMMON +switch(P.VU.vsew) { + case e16: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f16(READ_FREG(rs1_num)); + + vd = rs1; + } + break; + case e32: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f32(READ_FREG(rs1_num)); + + vd = rs1; + } + break; + case e64: + for (reg_t i=P.VU.vstart; i(rd_num, i, true); + auto rs1 = f64(READ_FREG(rs1_num)); + + vd = rs1; + } + break; +} + +P.VU.vstart = 0; diff --git a/riscv/insns/vfncvt_f_f_w.h b/riscv/insns/vfncvt_f_f_w.h new file mode 100644 index 0000000000..e9f3b25746 --- /dev/null +++ b/riscv/insns/vfncvt_f_f_w.h @@ -0,0 +1,23 @@ +// vfncvt.f.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_f16(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f64_to_f32(vs2); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('D')); +}, +false, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfncvt_f_x_w.h b/riscv/insns/vfncvt_f_x_w.h new file mode 100644 index 0000000000..556ee3c878 --- /dev/null +++ b/riscv/insns/vfncvt_f_x_w.h @@ -0,0 +1,23 @@ +// vfncvt.f.x.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = i32_to_f16(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = i64_to_f32(vs2); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +false, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfncvt_f_xu_w.h b/riscv/insns/vfncvt_f_xu_w.h new file mode 100644 index 0000000000..0626ecb4ad --- /dev/null +++ b/riscv/insns/vfncvt_f_xu_w.h @@ -0,0 +1,23 @@ +// vfncvt.f.xu.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = ui32_to_f16(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = ui64_to_f32(vs2); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +false, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfncvt_rod_f_f_w.h b/riscv/insns/vfncvt_rod_f_f_w.h new file mode 100644 index 0000000000..7113fd572a --- /dev/null +++ b/riscv/insns/vfncvt_rod_f_f_w.h @@ -0,0 +1,25 @@ +// vfncvt.rod.f.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + softfloat_roundingMode = softfloat_round_odd; + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_f16(vs2); +}, +{ + softfloat_roundingMode = softfloat_round_odd; + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f64_to_f32(vs2); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +false, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfncvt_rtz_x_f_w.h b/riscv/insns/vfncvt_rtz_x_f_w.h new file mode 100644 index 0000000000..1dfc6ecea4 --- /dev/null +++ b/riscv/insns/vfncvt_rtz_x_f_w.h @@ -0,0 +1,24 @@ +// vfncvt.rtz.x.f.w vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_i8(vs2, softfloat_round_minMag, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_i16(vs2, softfloat_round_minMag, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f64_to_i32(vs2, softfloat_round_minMag, true); +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +{ + require(p->supports_extension('D')); +}, +false, (P.VU.vsew <= 32)) diff --git a/riscv/insns/vfncvt_rtz_xu_f_w.h b/riscv/insns/vfncvt_rtz_xu_f_w.h new file mode 100644 index 0000000000..c6adcec9ab --- /dev/null +++ b/riscv/insns/vfncvt_rtz_xu_f_w.h @@ -0,0 +1,24 @@ +// vfncvt.rtz.xu.f.w vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_ui8(vs2, softfloat_round_minMag, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_ui16(vs2, softfloat_round_minMag, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f64_to_ui32(vs2, softfloat_round_minMag, true); +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +{ + require(p->supports_extension('D')); +}, +false, (P.VU.vsew <= 32)) diff --git a/riscv/insns/vfncvt_x_f_w.h b/riscv/insns/vfncvt_x_f_w.h new file mode 100644 index 0000000000..01b52a2af5 --- /dev/null +++ b/riscv/insns/vfncvt_x_f_w.h @@ -0,0 +1,24 @@ +// vfncvt.x.f.w vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_i8(vs2, STATE.frm, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_i16(vs2, STATE.frm, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f64_to_i32(vs2, STATE.frm, true); +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +{ + require(p->supports_extension('D')); +}, +false, (P.VU.vsew <= 32)) diff --git a/riscv/insns/vfncvt_xu_f_w.h b/riscv/insns/vfncvt_xu_f_w.h new file mode 100644 index 0000000000..bb55ec318d --- /dev/null +++ b/riscv/insns/vfncvt_xu_f_w.h @@ -0,0 +1,24 @@ +// vfncvt.xu.f.w vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_ui8(vs2, STATE.frm, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_ui16(vs2, STATE.frm, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f64_to_ui32(vs2, STATE.frm, true); +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +{ + require(p->supports_extension('D')); +}, +false, (P.VU.vsew <= 32)) diff --git a/riscv/insns/vfnmacc_vf.h b/riscv/insns/vfnmacc_vf.h new file mode 100644 index 0000000000..1b99302c6c --- /dev/null +++ b/riscv/insns/vfnmacc_vf.h @@ -0,0 +1,11 @@ +// vfnmacc: vd[i] = -(f[rs1] * vs2[i]) - vd[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), f16(vd.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(rs1, f64(vs2.v ^ F64_SIGN), f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfnmacc_vv.h b/riscv/insns/vfnmacc_vv.h new file mode 100644 index 0000000000..7200e063ab --- /dev/null +++ b/riscv/insns/vfnmacc_vv.h @@ -0,0 +1,11 @@ +// vfnmacc: vd[i] = -(vs1[i] * vs2[i]) - vd[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(f16(vs2.v ^ F16_SIGN), vs1, f16(vd.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(f32(vs2.v ^ F32_SIGN), vs1, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(f64(vs2.v ^ F64_SIGN), vs1, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfnmadd_vf.h b/riscv/insns/vfnmadd_vf.h new file mode 100644 index 0000000000..cb9c217ff2 --- /dev/null +++ b/riscv/insns/vfnmadd_vf.h @@ -0,0 +1,11 @@ +// vfnmadd: vd[i] = -(vd[i] * f[rs1]) - vs2[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, f16(vs2.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), rs1, f64(vs2.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfnmadd_vv.h b/riscv/insns/vfnmadd_vv.h new file mode 100644 index 0000000000..7160ed7d6f --- /dev/null +++ b/riscv/insns/vfnmadd_vv.h @@ -0,0 +1,11 @@ +// vfnmadd: vd[i] = -(vd[i] * vs1[i]) - vs2[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, f16(vs2.v ^ F16_SIGN)); +}, +{ + vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, f32(vs2.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), vs1, f64(vs2.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfnmsac_vf.h b/riscv/insns/vfnmsac_vf.h new file mode 100644 index 0000000000..aa6baa30c6 --- /dev/null +++ b/riscv/insns/vfnmsac_vf.h @@ -0,0 +1,11 @@ +// vfnmsac: vd[i] = -(f[rs1] * vs2[i]) + vd[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), vd); +}, +{ + vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd); +}, +{ + vd = f64_mulAdd(rs1, f64(vs2.v ^ F64_SIGN), vd); +}) diff --git a/riscv/insns/vfnmsac_vv.h b/riscv/insns/vfnmsac_vv.h new file mode 100644 index 0000000000..47db61d2d0 --- /dev/null +++ b/riscv/insns/vfnmsac_vv.h @@ -0,0 +1,11 @@ +// vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs2[i] * vs1[i]) + vd[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(f16(vs1.v ^ F16_SIGN), vs2, vd); +}, +{ + vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd); +}, +{ + vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, vd); +}) diff --git a/riscv/insns/vfnmsub_vf.h b/riscv/insns/vfnmsub_vf.h new file mode 100644 index 0000000000..43aa9e2685 --- /dev/null +++ b/riscv/insns/vfnmsub_vf.h @@ -0,0 +1,11 @@ +// vfnmsub: vd[i] = -(vd[i] * f[rs1]) + vs2[i] +VI_VFP_VF_LOOP +({ + vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, vs2); +}, +{ + vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2); +}, +{ + vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), rs1, vs2); +}) diff --git a/riscv/insns/vfnmsub_vv.h b/riscv/insns/vfnmsub_vv.h new file mode 100644 index 0000000000..2a45c8fca0 --- /dev/null +++ b/riscv/insns/vfnmsub_vv.h @@ -0,0 +1,11 @@ +// vfnmsub: vd[i] = -(vd[i] * vs1[i]) + vs2[i] +VI_VFP_VV_LOOP +({ + vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, vs2); +}, +{ + vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, vs2); +}, +{ + vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), vs1, vs2); +}) diff --git a/riscv/insns/vfrdiv_vf.h b/riscv/insns/vfrdiv_vf.h new file mode 100644 index 0000000000..b283343cc2 --- /dev/null +++ b/riscv/insns/vfrdiv_vf.h @@ -0,0 +1,11 @@ +// vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i] +VI_VFP_VF_LOOP +({ + vd = f16_div(rs1, vs2); +}, +{ + vd = f32_div(rs1, vs2); +}, +{ + vd = f64_div(rs1, vs2); +}) diff --git a/riscv/insns/vfrece7_v.h b/riscv/insns/vfrece7_v.h new file mode 100644 index 0000000000..69c026b058 --- /dev/null +++ b/riscv/insns/vfrece7_v.h @@ -0,0 +1,11 @@ +// vfclass.v vd, vs2, vm +VI_VFP_V_LOOP +({ + vd = f16_recip7(vs2); +}, +{ + vd = f32_recip7(vs2); +}, +{ + vd = f64_recip7(vs2); +}) diff --git a/riscv/insns/vfredmax_vs.h b/riscv/insns/vfredmax_vs.h new file mode 100644 index 0000000000..f19ec59791 --- /dev/null +++ b/riscv/insns/vfredmax_vs.h @@ -0,0 +1,12 @@ +// vfredmax vd, vs2, vs1 +bool is_propagate = false; +VI_VFP_VV_LOOP_REDUCTION +({ + vd_0 = f16_max(vd_0, vs2); +}, +{ + vd_0 = f32_max(vd_0, vs2); +}, +{ + vd_0 = f64_max(vd_0, vs2); +}) diff --git a/riscv/insns/vfredmin_vs.h b/riscv/insns/vfredmin_vs.h new file mode 100644 index 0000000000..e3cf151324 --- /dev/null +++ b/riscv/insns/vfredmin_vs.h @@ -0,0 +1,12 @@ +// vfredmin vd, vs2, vs1 +bool is_propagate = false; +VI_VFP_VV_LOOP_REDUCTION +({ + vd_0 = f16_min(vd_0, vs2); +}, +{ + vd_0 = f32_min(vd_0, vs2); +}, +{ + vd_0 = f64_min(vd_0, vs2); +}) diff --git a/riscv/insns/vfredosum_vs.h b/riscv/insns/vfredosum_vs.h new file mode 100644 index 0000000000..2438a7ba9c --- /dev/null +++ b/riscv/insns/vfredosum_vs.h @@ -0,0 +1,12 @@ +// vfredosum: vd[0] = sum( vs2[*] , vs1[0] ) +bool is_propagate = false; +VI_VFP_VV_LOOP_REDUCTION +({ + vd_0 = f16_add(vd_0, vs2); +}, +{ + vd_0 = f32_add(vd_0, vs2); +}, +{ + vd_0 = f64_add(vd_0, vs2); +}) diff --git a/riscv/insns/vfredsum_vs.h b/riscv/insns/vfredsum_vs.h new file mode 100644 index 0000000000..bad7308e54 --- /dev/null +++ b/riscv/insns/vfredsum_vs.h @@ -0,0 +1,12 @@ +// vfredsum: vd[0] = sum( vs2[*] , vs1[0] ) +bool is_propagate = true; +VI_VFP_VV_LOOP_REDUCTION +({ + vd_0 = f16_add(vd_0, vs2); +}, +{ + vd_0 = f32_add(vd_0, vs2); +}, +{ + vd_0 = f64_add(vd_0, vs2); +}) diff --git a/riscv/insns/vfrsqrte7_v.h b/riscv/insns/vfrsqrte7_v.h new file mode 100644 index 0000000000..a073764127 --- /dev/null +++ b/riscv/insns/vfrsqrte7_v.h @@ -0,0 +1,11 @@ +// vfclass.v vd, vs2, vm +VI_VFP_V_LOOP +({ + vd = f16_rsqrte7(vs2); +}, +{ + vd = f32_rsqrte7(vs2); +}, +{ + vd = f64_rsqrte7(vs2); +}) diff --git a/riscv/insns/vfrsub_vf.h b/riscv/insns/vfrsub_vf.h new file mode 100644 index 0000000000..7fb26a5b5d --- /dev/null +++ b/riscv/insns/vfrsub_vf.h @@ -0,0 +1,11 @@ +// vfsub.vf vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f16_sub(rs1, vs2); +}, +{ + vd = f32_sub(rs1, vs2); +}, +{ + vd = f64_sub(rs1, vs2); +}) diff --git a/riscv/insns/vfsgnj_vf.h b/riscv/insns/vfsgnj_vf.h new file mode 100644 index 0000000000..ce06185ee9 --- /dev/null +++ b/riscv/insns/vfsgnj_vf.h @@ -0,0 +1,11 @@ +// vfsgnj vd, vs2, vs1 +VI_VFP_VF_LOOP +({ + vd = fsgnj16(vs2.v, rs1.v, false, false); +}, +{ + vd = fsgnj32(vs2.v, rs1.v, false, false); +}, +{ + vd = fsgnj64(vs2.v, rs1.v, false, false); +}) diff --git a/riscv/insns/vfsgnj_vv.h b/riscv/insns/vfsgnj_vv.h new file mode 100644 index 0000000000..722cb29cf0 --- /dev/null +++ b/riscv/insns/vfsgnj_vv.h @@ -0,0 +1,11 @@ +// vfsgnj +VI_VFP_VV_LOOP +({ + vd = fsgnj16(vs2.v, vs1.v, false, false); +}, +{ + vd = fsgnj32(vs2.v, vs1.v, false, false); +}, +{ + vd = fsgnj64(vs2.v, vs1.v, false, false); +}) diff --git a/riscv/insns/vfsgnjn_vf.h b/riscv/insns/vfsgnjn_vf.h new file mode 100644 index 0000000000..e4894124aa --- /dev/null +++ b/riscv/insns/vfsgnjn_vf.h @@ -0,0 +1,11 @@ +// vfsgnn +VI_VFP_VF_LOOP +({ + vd = fsgnj16(vs2.v, rs1.v, true, false); +}, +{ + vd = fsgnj32(vs2.v, rs1.v, true, false); +}, +{ + vd = fsgnj64(vs2.v, rs1.v, true, false); +}) diff --git a/riscv/insns/vfsgnjn_vv.h b/riscv/insns/vfsgnjn_vv.h new file mode 100644 index 0000000000..1d91f69199 --- /dev/null +++ b/riscv/insns/vfsgnjn_vv.h @@ -0,0 +1,11 @@ +// vfsgnn +VI_VFP_VV_LOOP +({ + vd = fsgnj16(vs2.v, vs1.v, true, false); +}, +{ + vd = fsgnj32(vs2.v, vs1.v, true, false); +}, +{ + vd = fsgnj64(vs2.v, vs1.v, true, false); +}) diff --git a/riscv/insns/vfsgnjx_vf.h b/riscv/insns/vfsgnjx_vf.h new file mode 100644 index 0000000000..7be164c770 --- /dev/null +++ b/riscv/insns/vfsgnjx_vf.h @@ -0,0 +1,11 @@ +// vfsgnx +VI_VFP_VF_LOOP +({ + vd = fsgnj16(vs2.v, rs1.v, false, true); +}, +{ + vd = fsgnj32(vs2.v, rs1.v, false, true); +}, +{ + vd = fsgnj64(vs2.v, rs1.v, false, true); +}) diff --git a/riscv/insns/vfsgnjx_vv.h b/riscv/insns/vfsgnjx_vv.h new file mode 100644 index 0000000000..b04b8454ac --- /dev/null +++ b/riscv/insns/vfsgnjx_vv.h @@ -0,0 +1,11 @@ +// vfsgnx +VI_VFP_VV_LOOP +({ + vd = fsgnj16(vs2.v, vs1.v, false, true); +}, +{ + vd = fsgnj32(vs2.v, vs1.v, false, true); +}, +{ + vd = fsgnj64(vs2.v, vs1.v, false, true); +}) diff --git a/riscv/insns/vfslide1down_vf.h b/riscv/insns/vfslide1down_vf.h new file mode 100644 index 0000000000..66eeaccbf4 --- /dev/null +++ b/riscv/insns/vfslide1down_vf.h @@ -0,0 +1,36 @@ +//vfslide1down.vf vd, vs2, rs1 +VI_CHECK_SLIDE(false); + +VI_VFP_LOOP_BASE +if (i != vl - 1) { + switch (P.VU.vsew) { + case e16: { + VI_XI_SLIDEDOWN_PARAMS(e16, 1); + vd = vs2; + } + break; + case e32: { + VI_XI_SLIDEDOWN_PARAMS(e32, 1); + vd = vs2; + } + break; + case e64: { + VI_XI_SLIDEDOWN_PARAMS(e64, 1); + vd = vs2; + } + break; + } +} else { + switch (P.VU.vsew) { + case e16: + P.VU.elt(rd_num, vl - 1, true) = f16(FRS1); + break; + case e32: + P.VU.elt(rd_num, vl - 1, true) = f32(FRS1); + break; + case e64: + P.VU.elt(rd_num, vl - 1, true) = f64(FRS1); + break; + } +} +VI_VFP_LOOP_END diff --git a/riscv/insns/vfslide1up_vf.h b/riscv/insns/vfslide1up_vf.h new file mode 100644 index 0000000000..b9c2817c28 --- /dev/null +++ b/riscv/insns/vfslide1up_vf.h @@ -0,0 +1,36 @@ +//vfslide1up.vf vd, vs2, rs1 +VI_CHECK_SLIDE(true); + +VI_VFP_LOOP_BASE +if (i != 0) { + switch (P.VU.vsew) { + case e16: { + VI_XI_SLIDEUP_PARAMS(e16, 1); + vd = vs2; + } + break; + case e32: { + VI_XI_SLIDEUP_PARAMS(e32, 1); + vd = vs2; + } + break; + case e64: { + VI_XI_SLIDEUP_PARAMS(e64, 1); + vd = vs2; + } + break; + } +} else { + switch (P.VU.vsew) { + case e16: + P.VU.elt(rd_num, 0, true) = f16(FRS1); + break; + case e32: + P.VU.elt(rd_num, 0, true) = f32(FRS1); + break; + case e64: + P.VU.elt(rd_num, 0, true) = f64(FRS1); + break; + } +} +VI_VFP_LOOP_END diff --git a/riscv/insns/vfsqrt_v.h b/riscv/insns/vfsqrt_v.h new file mode 100644 index 0000000000..86f0148d4b --- /dev/null +++ b/riscv/insns/vfsqrt_v.h @@ -0,0 +1,11 @@ +// vsqrt.v vd, vd2, vm +VI_VFP_V_LOOP +({ + vd = f16_sqrt(vs2); +}, +{ + vd = f32_sqrt(vs2); +}, +{ + vd = f64_sqrt(vs2); +}) diff --git a/riscv/insns/vfsub_vf.h b/riscv/insns/vfsub_vf.h new file mode 100644 index 0000000000..fc6877ca5a --- /dev/null +++ b/riscv/insns/vfsub_vf.h @@ -0,0 +1,11 @@ +// vfsub.vf vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f16_sub(vs2, rs1); +}, +{ + vd = f32_sub(vs2, rs1); +}, +{ + vd = f64_sub(vs2, rs1); +}) diff --git a/riscv/insns/vfsub_vv.h b/riscv/insns/vfsub_vv.h new file mode 100644 index 0000000000..b0403f1180 --- /dev/null +++ b/riscv/insns/vfsub_vv.h @@ -0,0 +1,11 @@ +// vfsub.vv vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f16_sub(vs2, vs1); +}, +{ + vd = f32_sub(vs2, vs1); +}, +{ + vd = f64_sub(vs2, vs1); +}) diff --git a/riscv/insns/vfwadd_vf.h b/riscv/insns/vfwadd_vf.h new file mode 100644 index 0000000000..b8249001e0 --- /dev/null +++ b/riscv/insns/vfwadd_vf.h @@ -0,0 +1,8 @@ +// vfwadd.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_add(vs2, rs1); +}, +{ + vd = f64_add(vs2, rs1); +}) diff --git a/riscv/insns/vfwadd_vv.h b/riscv/insns/vfwadd_vv.h new file mode 100644 index 0000000000..7255a50e13 --- /dev/null +++ b/riscv/insns/vfwadd_vv.h @@ -0,0 +1,8 @@ +// vfwadd.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_add(vs2, vs1); +}, +{ + vd = f64_add(vs2, vs1); +}) diff --git a/riscv/insns/vfwadd_wf.h b/riscv/insns/vfwadd_wf.h new file mode 100644 index 0000000000..021b17f049 --- /dev/null +++ b/riscv/insns/vfwadd_wf.h @@ -0,0 +1,8 @@ +// vfwadd.wf vd, vs2, vs1 +VI_VFP_WF_LOOP_WIDE +({ + vd = f32_add(vs2, rs1); +}, +{ + vd = f64_add(vs2, rs1); +}) diff --git a/riscv/insns/vfwadd_wv.h b/riscv/insns/vfwadd_wv.h new file mode 100644 index 0000000000..c1ed038925 --- /dev/null +++ b/riscv/insns/vfwadd_wv.h @@ -0,0 +1,8 @@ +// vfwadd.wv vd, vs2, vs1 +VI_VFP_WV_LOOP_WIDE +({ + vd = f32_add(vs2, vs1); +}, +{ + vd = f64_add(vs2, vs1); +}) diff --git a/riscv/insns/vfwcvt_f_f_v.h b/riscv/insns/vfwcvt_f_f_v.h new file mode 100644 index 0000000000..9bf3f386e3 --- /dev/null +++ b/riscv/insns/vfwcvt_f_f_v.h @@ -0,0 +1,23 @@ +// vfwcvt.f.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_f32(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_f64(vs2); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('D')); +}, +true, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfwcvt_f_x_v.h b/riscv/insns/vfwcvt_f_x_v.h new file mode 100644 index 0000000000..481f37122a --- /dev/null +++ b/riscv/insns/vfwcvt_f_x_v.h @@ -0,0 +1,24 @@ +// vfwcvt.f.x.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = i32_to_f16(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = i32_to_f32(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = i32_to_f64(vs2); +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +{ + require(p->supports_extension('D')); +}, +true, (P.VU.vsew >= 8)) diff --git a/riscv/insns/vfwcvt_f_xu_v.h b/riscv/insns/vfwcvt_f_xu_v.h new file mode 100644 index 0000000000..544f33dd4b --- /dev/null +++ b/riscv/insns/vfwcvt_f_xu_v.h @@ -0,0 +1,24 @@ +// vfwcvt.f.xu.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = ui32_to_f16(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = ui32_to_f32(vs2); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = ui32_to_f64(vs2); +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +{ + require(p->supports_extension('D')); +}, +true, (P.VU.vsew >= 8)) diff --git a/riscv/insns/vfwcvt_rtz_x_f_v.h b/riscv/insns/vfwcvt_rtz_x_f_v.h new file mode 100644 index 0000000000..7cbcf3116e --- /dev/null +++ b/riscv/insns/vfwcvt_rtz_x_f_v.h @@ -0,0 +1,23 @@ +// vfwcvt.rtz.x.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_i32(vs2, softfloat_round_minMag, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_i64(vs2, softfloat_round_minMag, true); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +true, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfwcvt_rtz_xu_f_v.h b/riscv/insns/vfwcvt_rtz_xu_f_v.h new file mode 100644 index 0000000000..81be047a3c --- /dev/null +++ b/riscv/insns/vfwcvt_rtz_xu_f_v.h @@ -0,0 +1,23 @@ +// vfwcvt.rtz,xu.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_ui32(vs2, softfloat_round_minMag, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_ui64(vs2, softfloat_round_minMag, true); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +true, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfwcvt_x_f_v.h b/riscv/insns/vfwcvt_x_f_v.h new file mode 100644 index 0000000000..ebd99c0cd1 --- /dev/null +++ b/riscv/insns/vfwcvt_x_f_v.h @@ -0,0 +1,23 @@ +// vfwcvt.x.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_i32(vs2, STATE.frm, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_i64(vs2, STATE.frm, true); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +true, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfwcvt_xu_f_v.h b/riscv/insns/vfwcvt_xu_f_v.h new file mode 100644 index 0000000000..55036f6c55 --- /dev/null +++ b/riscv/insns/vfwcvt_xu_f_v.h @@ -0,0 +1,23 @@ +// vfwcvt.xu.f.v vd, vs2, vm +VI_VFP_CVT_SCALE +({ + ; +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f16_to_ui32(vs2, STATE.frm, true); +}, +{ + auto vs2 = P.VU.elt(rs2_num, i); + P.VU.elt(rd_num, i, true) = f32_to_ui64(vs2, STATE.frm, true); +}, +{ + ; +}, +{ + require(p->supports_extension(EXT_ZFH)); +}, +{ + require(p->supports_extension('F')); +}, +true, (P.VU.vsew >= 16)) diff --git a/riscv/insns/vfwmacc_vf.h b/riscv/insns/vfwmacc_vf.h new file mode 100644 index 0000000000..441fa0a791 --- /dev/null +++ b/riscv/insns/vfwmacc_vf.h @@ -0,0 +1,8 @@ +// vfwmacc.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_mulAdd(rs1, vs2, vd); +}, +{ + vd = f64_mulAdd(rs1, vs2, vd); +}) diff --git a/riscv/insns/vfwmacc_vv.h b/riscv/insns/vfwmacc_vv.h new file mode 100644 index 0000000000..a654198bfd --- /dev/null +++ b/riscv/insns/vfwmacc_vv.h @@ -0,0 +1,8 @@ +// vfwmacc.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_mulAdd(vs1, vs2, vd); +}, +{ + vd = f64_mulAdd(vs1, vs2, vd); +}) diff --git a/riscv/insns/vfwmsac_vf.h b/riscv/insns/vfwmsac_vf.h new file mode 100644 index 0000000000..18010ff490 --- /dev/null +++ b/riscv/insns/vfwmsac_vf.h @@ -0,0 +1,8 @@ +// vfwmsac.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(rs1, vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfwmsac_vv.h b/riscv/insns/vfwmsac_vv.h new file mode 100644 index 0000000000..9dc4073fef --- /dev/null +++ b/riscv/insns/vfwmsac_vv.h @@ -0,0 +1,8 @@ +// vfwmsac.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(vs1, vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfwmul_vf.h b/riscv/insns/vfwmul_vf.h new file mode 100644 index 0000000000..2bb543f63a --- /dev/null +++ b/riscv/insns/vfwmul_vf.h @@ -0,0 +1,8 @@ +// vfwmul.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_mul(vs2, rs1); +}, +{ + vd = f64_mul(vs2, rs1); +}) diff --git a/riscv/insns/vfwmul_vv.h b/riscv/insns/vfwmul_vv.h new file mode 100644 index 0000000000..2ce38e62c1 --- /dev/null +++ b/riscv/insns/vfwmul_vv.h @@ -0,0 +1,8 @@ +// vfwmul.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_mul(vs2, vs1); +}, +{ + vd = f64_mul(vs2, vs1); +}) diff --git a/riscv/insns/vfwnmacc_vf.h b/riscv/insns/vfwnmacc_vf.h new file mode 100644 index 0000000000..038bda08ca --- /dev/null +++ b/riscv/insns/vfwnmacc_vf.h @@ -0,0 +1,8 @@ +// vfwnmacc.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_mulAdd(f32(rs1.v ^ F32_SIGN), vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfwnmacc_vv.h b/riscv/insns/vfwnmacc_vv.h new file mode 100644 index 0000000000..bf863e04c2 --- /dev/null +++ b/riscv/insns/vfwnmacc_vv.h @@ -0,0 +1,8 @@ +// vfwnmacc.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfwnmsac_vf.h b/riscv/insns/vfwnmsac_vf.h new file mode 100644 index 0000000000..1e288e1b91 --- /dev/null +++ b/riscv/insns/vfwnmsac_vf.h @@ -0,0 +1,8 @@ +// vfwnmacc.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_mulAdd(f32(rs1.v ^ F32_SIGN), vs2, vd); +}, +{ + vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, vd); +}) diff --git a/riscv/insns/vfwnmsac_vv.h b/riscv/insns/vfwnmsac_vv.h new file mode 100644 index 0000000000..ce97749e1c --- /dev/null +++ b/riscv/insns/vfwnmsac_vv.h @@ -0,0 +1,8 @@ +// vfwnmsac.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd); +}, +{ + vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, vd); +}) diff --git a/riscv/insns/vfwredosum_vs.h b/riscv/insns/vfwredosum_vs.h new file mode 100644 index 0000000000..1f42d8ff2f --- /dev/null +++ b/riscv/insns/vfwredosum_vs.h @@ -0,0 +1,9 @@ +// vfwredosum.vs vd, vs2, vs1 +bool is_propagate = false; +VI_VFP_VV_LOOP_WIDE_REDUCTION +({ + vd_0 = f32_add(vd_0, vs2); +}, +{ + vd_0 = f64_add(vd_0, vs2); +}) diff --git a/riscv/insns/vfwredsum_vs.h b/riscv/insns/vfwredsum_vs.h new file mode 100644 index 0000000000..4ef28969dc --- /dev/null +++ b/riscv/insns/vfwredsum_vs.h @@ -0,0 +1,9 @@ +// vfwredsum.vs vd, vs2, vs1 +bool is_propagate = true; +VI_VFP_VV_LOOP_WIDE_REDUCTION +({ + vd_0 = f32_add(vd_0, vs2); +}, +{ + vd_0 = f64_add(vd_0, vs2); +}) diff --git a/riscv/insns/vfwsub_vf.h b/riscv/insns/vfwsub_vf.h new file mode 100644 index 0000000000..8c37688419 --- /dev/null +++ b/riscv/insns/vfwsub_vf.h @@ -0,0 +1,8 @@ +// vfwsub.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f32_sub(vs2, rs1); +}, +{ + vd = f64_sub(vs2, rs1); +}) diff --git a/riscv/insns/vfwsub_vv.h b/riscv/insns/vfwsub_vv.h new file mode 100644 index 0000000000..ce08e36af7 --- /dev/null +++ b/riscv/insns/vfwsub_vv.h @@ -0,0 +1,8 @@ +// vfwsub.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f32_sub(vs2, vs1); +}, +{ + vd = f64_sub(vs2, vs1); +}) diff --git a/riscv/insns/vfwsub_wf.h b/riscv/insns/vfwsub_wf.h new file mode 100644 index 0000000000..f6f47ca5cf --- /dev/null +++ b/riscv/insns/vfwsub_wf.h @@ -0,0 +1,8 @@ +// vfwsub.wf vd, vs2, rs1 +VI_VFP_WF_LOOP_WIDE +({ + vd = f32_sub(vs2, rs1); +}, +{ + vd = f64_sub(vs2, rs1); +}) diff --git a/riscv/insns/vfwsub_wv.h b/riscv/insns/vfwsub_wv.h new file mode 100644 index 0000000000..eef904dcc2 --- /dev/null +++ b/riscv/insns/vfwsub_wv.h @@ -0,0 +1,8 @@ +// vfwsub.wv vd, vs2, vs1 +VI_VFP_WV_LOOP_WIDE +({ + vd = f32_sub(vs2, vs1); +}, +{ + vd = f64_sub(vs2, vs1); +}) diff --git a/riscv/insns/vid_v.h b/riscv/insns/vid_v.h new file mode 100644 index 0000000000..012d124a43 --- /dev/null +++ b/riscv/insns/vid_v.h @@ -0,0 +1,31 @@ +// vmpopc rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); +require_align(rd_num, P.VU.vflmul); +require_vm; + +for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) { + VI_LOOP_ELEMENT_SKIP(); + + switch (sew) { + case e8: + P.VU.elt(rd_num, i, true) = i; + break; + case e16: + P.VU.elt(rd_num, i, true) = i; + break; + case e32: + P.VU.elt(rd_num, i, true) = i; + break; + default: + P.VU.elt(rd_num, i, true) = i; + break; + } +} + +P.VU.vstart = 0; diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h new file mode 100644 index 0000000000..a4368254e3 --- /dev/null +++ b/riscv/insns/viota_m.h @@ -0,0 +1,53 @@ +// vmpopc rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); +require(P.VU.vstart == 0); +require_vm; +require_align(rd_num, P.VU.vflmul); +require_noover(rd_num, P.VU.vflmul, rs2_num, 1); + +int cnt = 0; +for (reg_t i = 0; i < vl; ++i) { + const int midx = i / 64; + const int mpos = i % 64; + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx) >> mpos) & 0x1) == 1; + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + + bool has_one = false; + if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { + if (vs2_lsb) { + has_one = true; + } + } + + bool use_ori = (insn.v_vm() == 0) && !do_mask; + switch (sew) { + case e8: + P.VU.elt(rd_num, i, true) = use_ori ? + P.VU.elt(rd_num, i) : cnt; + break; + case e16: + P.VU.elt(rd_num, i, true) = use_ori ? + P.VU.elt(rd_num, i) : cnt; + break; + case e32: + P.VU.elt(rd_num, i, true) = use_ori ? + P.VU.elt(rd_num, i) : cnt; + break; + default: + P.VU.elt(rd_num, i, true) = use_ori ? + P.VU.elt(rd_num, i) : cnt; + break; + } + + if (has_one) { + cnt++; + } +} + diff --git a/riscv/insns/vl1re16_v.h b/riscv/insns/vl1re16_v.h new file mode 100644 index 0000000000..220e83e6bb --- /dev/null +++ b/riscv/insns/vl1re16_v.h @@ -0,0 +1,2 @@ +// vl1re16.v vd, (rs1) +VI_LD_WHOLE(uint16); diff --git a/riscv/insns/vl1re32_v.h b/riscv/insns/vl1re32_v.h new file mode 100644 index 0000000000..e72ca02a3d --- /dev/null +++ b/riscv/insns/vl1re32_v.h @@ -0,0 +1,2 @@ +// vl1re32.v vd, (rs1) +VI_LD_WHOLE(uint32); diff --git a/riscv/insns/vl1re64_v.h b/riscv/insns/vl1re64_v.h new file mode 100644 index 0000000000..265701a06f --- /dev/null +++ b/riscv/insns/vl1re64_v.h @@ -0,0 +1,2 @@ +// vl1re64.v vd, (rs1) +VI_LD_WHOLE(uint64); diff --git a/riscv/insns/vl1re8_v.h b/riscv/insns/vl1re8_v.h new file mode 100644 index 0000000000..b4ce661688 --- /dev/null +++ b/riscv/insns/vl1re8_v.h @@ -0,0 +1,2 @@ +// vl1re8.v vd, (rs1) +VI_LD_WHOLE(uint8); diff --git a/riscv/insns/vl2re16_v.h b/riscv/insns/vl2re16_v.h new file mode 100644 index 0000000000..2846edd980 --- /dev/null +++ b/riscv/insns/vl2re16_v.h @@ -0,0 +1,2 @@ +// vl2e16.v vd, (rs1) +VI_LD_WHOLE(uint16); diff --git a/riscv/insns/vl2re32_v.h b/riscv/insns/vl2re32_v.h new file mode 100644 index 0000000000..5cea835524 --- /dev/null +++ b/riscv/insns/vl2re32_v.h @@ -0,0 +1,2 @@ +// vl2re32.v vd, (rs1) +VI_LD_WHOLE(uint32); diff --git a/riscv/insns/vl2re64_v.h b/riscv/insns/vl2re64_v.h new file mode 100644 index 0000000000..efdf2ce2ac --- /dev/null +++ b/riscv/insns/vl2re64_v.h @@ -0,0 +1,2 @@ +// vl2re64.v vd, (rs1) +VI_LD_WHOLE(uint64); diff --git a/riscv/insns/vl2re8_v.h b/riscv/insns/vl2re8_v.h new file mode 100644 index 0000000000..fcc3c4c057 --- /dev/null +++ b/riscv/insns/vl2re8_v.h @@ -0,0 +1,2 @@ +// vl2re8.v vd, (rs1) +VI_LD_WHOLE(uint8); diff --git a/riscv/insns/vl4re16_v.h b/riscv/insns/vl4re16_v.h new file mode 100644 index 0000000000..0363418319 --- /dev/null +++ b/riscv/insns/vl4re16_v.h @@ -0,0 +1,2 @@ +// vl4re16.v vd, (rs1) +VI_LD_WHOLE(uint16); diff --git a/riscv/insns/vl4re32_v.h b/riscv/insns/vl4re32_v.h new file mode 100644 index 0000000000..e37cc1ab7b --- /dev/null +++ b/riscv/insns/vl4re32_v.h @@ -0,0 +1,2 @@ +// vl4re32.v vd, (rs1) +VI_LD_WHOLE(uint32); diff --git a/riscv/insns/vl4re64_v.h b/riscv/insns/vl4re64_v.h new file mode 100644 index 0000000000..11486f5d1e --- /dev/null +++ b/riscv/insns/vl4re64_v.h @@ -0,0 +1,2 @@ +// vl4re64.v vd, (rs1) +VI_LD_WHOLE(uint64); diff --git a/riscv/insns/vl4re8_v.h b/riscv/insns/vl4re8_v.h new file mode 100644 index 0000000000..f9ce3ff7c7 --- /dev/null +++ b/riscv/insns/vl4re8_v.h @@ -0,0 +1,2 @@ +// vl4re8.v vd, (rs1) +VI_LD_WHOLE(uint8); diff --git a/riscv/insns/vl8re16_v.h b/riscv/insns/vl8re16_v.h new file mode 100644 index 0000000000..0b3f1413ba --- /dev/null +++ b/riscv/insns/vl8re16_v.h @@ -0,0 +1,2 @@ +// vl8re16.v vd, (rs1) +VI_LD_WHOLE(uint16); diff --git a/riscv/insns/vl8re32_v.h b/riscv/insns/vl8re32_v.h new file mode 100644 index 0000000000..3372b89d05 --- /dev/null +++ b/riscv/insns/vl8re32_v.h @@ -0,0 +1,2 @@ +// vl8re32.v vd, (rs1) +VI_LD_WHOLE(uint32); diff --git a/riscv/insns/vl8re64_v.h b/riscv/insns/vl8re64_v.h new file mode 100644 index 0000000000..f9a9ca981f --- /dev/null +++ b/riscv/insns/vl8re64_v.h @@ -0,0 +1,2 @@ +// vl8re64.v vd, (rs1) +VI_LD_WHOLE(uint64); diff --git a/riscv/insns/vl8re8_v.h b/riscv/insns/vl8re8_v.h new file mode 100644 index 0000000000..ee05e81a9d --- /dev/null +++ b/riscv/insns/vl8re8_v.h @@ -0,0 +1,2 @@ +// vl8re8.v vd, (rs1) +VI_LD_WHOLE(uint8); diff --git a/riscv/insns/vle16_v.h b/riscv/insns/vle16_v.h new file mode 100644 index 0000000000..7bd2e837af --- /dev/null +++ b/riscv/insns/vle16_v.h @@ -0,0 +1,2 @@ +// vle16.v and vlseg[2-8]e16.v +VI_LD(0, (i * nf + fn), int16); diff --git a/riscv/insns/vle16ff_v.h b/riscv/insns/vle16ff_v.h new file mode 100644 index 0000000000..53c8889137 --- /dev/null +++ b/riscv/insns/vle16ff_v.h @@ -0,0 +1,2 @@ +// vle16ff.v and vlseg[2-8]e16ff.v +VI_LDST_FF(int16); diff --git a/riscv/insns/vle32_v.h b/riscv/insns/vle32_v.h new file mode 100644 index 0000000000..9399fd621f --- /dev/null +++ b/riscv/insns/vle32_v.h @@ -0,0 +1,2 @@ +// vle32.v and vlseg[2-8]e32.v +VI_LD(0, (i * nf + fn), int32); diff --git a/riscv/insns/vle32ff_v.h b/riscv/insns/vle32ff_v.h new file mode 100644 index 0000000000..7d03d7ddd5 --- /dev/null +++ b/riscv/insns/vle32ff_v.h @@ -0,0 +1,2 @@ +// vle32ff.v and vlseg[2-8]e32ff.v +VI_LDST_FF(int32); diff --git a/riscv/insns/vle64_v.h b/riscv/insns/vle64_v.h new file mode 100644 index 0000000000..3f2654dd8b --- /dev/null +++ b/riscv/insns/vle64_v.h @@ -0,0 +1,2 @@ +// vle64.v and vlseg[2-8]e64.v +VI_LD(0, (i * nf + fn), int64); diff --git a/riscv/insns/vle64ff_v.h b/riscv/insns/vle64ff_v.h new file mode 100644 index 0000000000..39996da6f1 --- /dev/null +++ b/riscv/insns/vle64ff_v.h @@ -0,0 +1,2 @@ +// vle64ff.v and vlseg[2-8]e64ff.v +VI_LDST_FF(int64); diff --git a/riscv/insns/vle8_v.h b/riscv/insns/vle8_v.h new file mode 100644 index 0000000000..5613a1dd3e --- /dev/null +++ b/riscv/insns/vle8_v.h @@ -0,0 +1,2 @@ +// vle8.v and vlseg[2-8]e8.v +VI_LD(0, (i * nf + fn), int8); diff --git a/riscv/insns/vle8ff_v.h b/riscv/insns/vle8ff_v.h new file mode 100644 index 0000000000..b56d1d339c --- /dev/null +++ b/riscv/insns/vle8ff_v.h @@ -0,0 +1,2 @@ +// vle8ff.v and vlseg[2-8]e8ff.v +VI_LDST_FF(int8); diff --git a/riscv/insns/vlse16_v.h b/riscv/insns/vlse16_v.h new file mode 100644 index 0000000000..7622ded97d --- /dev/null +++ b/riscv/insns/vlse16_v.h @@ -0,0 +1,2 @@ +// vlse16.v and vlsseg[2-8]e16.v +VI_LD(i * RS2, fn, int16); diff --git a/riscv/insns/vlse32_v.h b/riscv/insns/vlse32_v.h new file mode 100644 index 0000000000..1afc5e9cf4 --- /dev/null +++ b/riscv/insns/vlse32_v.h @@ -0,0 +1,2 @@ +// vlse32.v and vlsseg[2-8]e32.v +VI_LD(i * RS2, fn, int32); diff --git a/riscv/insns/vlse64_v.h b/riscv/insns/vlse64_v.h new file mode 100644 index 0000000000..c6d999955e --- /dev/null +++ b/riscv/insns/vlse64_v.h @@ -0,0 +1,2 @@ +// vlse64.v and vlsseg[2-8]e64.v +VI_LD(i * RS2, fn, int64); diff --git a/riscv/insns/vlse8_v.h b/riscv/insns/vlse8_v.h new file mode 100644 index 0000000000..021a1fbcb4 --- /dev/null +++ b/riscv/insns/vlse8_v.h @@ -0,0 +1,2 @@ +// vlse8.v and vlsseg[2-8]e8.v +VI_LD(i * RS2, fn, int8); diff --git a/riscv/insns/vlxei16_v.h b/riscv/insns/vlxei16_v.h new file mode 100644 index 0000000000..6e4ed49b57 --- /dev/null +++ b/riscv/insns/vlxei16_v.h @@ -0,0 +1,2 @@ +// vlxei16.v and vlxseg[2-8]e16.v +VI_LD_INDEX(e16, true); diff --git a/riscv/insns/vlxei32_v.h b/riscv/insns/vlxei32_v.h new file mode 100644 index 0000000000..a7da8ff035 --- /dev/null +++ b/riscv/insns/vlxei32_v.h @@ -0,0 +1,2 @@ +// vlxe32.v and vlxseg[2-8]ei32.v +VI_LD_INDEX(e32, true); diff --git a/riscv/insns/vlxei64_v.h b/riscv/insns/vlxei64_v.h new file mode 100644 index 0000000000..067224e4c5 --- /dev/null +++ b/riscv/insns/vlxei64_v.h @@ -0,0 +1,3 @@ +// vlxei64.v and vlxseg[2-8]ei64.v +VI_LD_INDEX(e64, true); + diff --git a/riscv/insns/vlxei8_v.h b/riscv/insns/vlxei8_v.h new file mode 100644 index 0000000000..d27304996b --- /dev/null +++ b/riscv/insns/vlxei8_v.h @@ -0,0 +1,2 @@ +// vlxei8.v and vlxseg[2-8]ei8.v +VI_LD_INDEX(e8, true); diff --git a/riscv/insns/vmacc_vv.h b/riscv/insns/vmacc_vv.h new file mode 100644 index 0000000000..e6ec93ff71 --- /dev/null +++ b/riscv/insns/vmacc_vv.h @@ -0,0 +1,5 @@ +// vmacc.vv: vd[i] = +(vs1[i] * vs2[i]) + vd[i] +VI_VV_LOOP +({ + vd = vs1 * vs2 + vd; +}) diff --git a/riscv/insns/vmacc_vx.h b/riscv/insns/vmacc_vx.h new file mode 100644 index 0000000000..d40b264a05 --- /dev/null +++ b/riscv/insns/vmacc_vx.h @@ -0,0 +1,5 @@ +// vmacc.vx: vd[i] = +(x[rs1] * vs2[i]) + vd[i] +VI_VX_LOOP +({ + vd = rs1 * vs2 + vd; +}) diff --git a/riscv/insns/vmadc_vim.h b/riscv/insns/vmadc_vim.h new file mode 100644 index 0000000000..afdca7e3c5 --- /dev/null +++ b/riscv/insns/vmadc_vim.h @@ -0,0 +1,13 @@ +// vmadc.vim vd, vs2, simm5 +VI_XI_LOOP_CARRY +({ + auto v0 = P.VU.elt(0, midx); + const uint64_t mmask = UINT64_C(1) << mpos; \ + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; + + uint128_t res = (op_mask & simm5) + (op_mask & vs2) + carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmadc_vvm.h b/riscv/insns/vmadc_vvm.h new file mode 100644 index 0000000000..a5d54c6dff --- /dev/null +++ b/riscv/insns/vmadc_vvm.h @@ -0,0 +1,13 @@ +// vmadc.vvm vd, vs2, rs1 +VI_VV_LOOP_CARRY +({ + auto v0 = P.VU.elt(0, midx); + const uint64_t mmask = UINT64_C(1) << mpos; \ + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; + + uint128_t res = (op_mask & vs1) + (op_mask & vs2) + carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmadc_vxm.h b/riscv/insns/vmadc_vxm.h new file mode 100644 index 0000000000..ca0342e0ce --- /dev/null +++ b/riscv/insns/vmadc_vxm.h @@ -0,0 +1,13 @@ +// vadc.vx vd, vs2, rs1 +VI_XI_LOOP_CARRY +({ + auto v0 = P.VU.elt(0, midx); + const uint64_t mmask = UINT64_C(1) << mpos; \ + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; + + uint128_t res = (op_mask & rs1) + (op_mask & vs2) + carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmadd_vv.h b/riscv/insns/vmadd_vv.h new file mode 100644 index 0000000000..a1c0d2ed64 --- /dev/null +++ b/riscv/insns/vmadd_vv.h @@ -0,0 +1,5 @@ +// vmadd: vd[i] = (vd[i] * vs1[i]) + vs2[i] +VI_VV_LOOP +({ + vd = vd * vs1 + vs2; +}) diff --git a/riscv/insns/vmadd_vx.h b/riscv/insns/vmadd_vx.h new file mode 100644 index 0000000000..1a8a001593 --- /dev/null +++ b/riscv/insns/vmadd_vx.h @@ -0,0 +1,5 @@ +// vmadd: vd[i] = (vd[i] * x[rs1]) + vs2[i] +VI_VX_LOOP +({ + vd = vd * rs1 + vs2; +}) diff --git a/riscv/insns/vmand_mm.h b/riscv/insns/vmand_mm.h new file mode 100644 index 0000000000..04615c60fc --- /dev/null +++ b/riscv/insns/vmand_mm.h @@ -0,0 +1,2 @@ +// vmand.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 & vs1); diff --git a/riscv/insns/vmandnot_mm.h b/riscv/insns/vmandnot_mm.h new file mode 100644 index 0000000000..4c26469c7e --- /dev/null +++ b/riscv/insns/vmandnot_mm.h @@ -0,0 +1,2 @@ +// vmandnot.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 & ~vs1); diff --git a/riscv/insns/vmax_vv.h b/riscv/insns/vmax_vv.h new file mode 100644 index 0000000000..b9f15c5f18 --- /dev/null +++ b/riscv/insns/vmax_vv.h @@ -0,0 +1,10 @@ +// vmax.vv vd, vs2, vs1, vm # Vector-vector +VI_VV_LOOP +({ + if (vs1 >= vs2) { + vd = vs1; + } else { + vd = vs2; + } + +}) diff --git a/riscv/insns/vmax_vx.h b/riscv/insns/vmax_vx.h new file mode 100644 index 0000000000..06f3f43160 --- /dev/null +++ b/riscv/insns/vmax_vx.h @@ -0,0 +1,10 @@ +// vmax.vx vd, vs2, rs1, vm # vector-scalar +VI_VX_LOOP +({ + if (rs1 >= vs2) { + vd = rs1; + } else { + vd = vs2; + } + +}) diff --git a/riscv/insns/vmaxu_vv.h b/riscv/insns/vmaxu_vv.h new file mode 100644 index 0000000000..4e6868d19e --- /dev/null +++ b/riscv/insns/vmaxu_vv.h @@ -0,0 +1,9 @@ +// vmaxu.vv vd, vs2, vs1, vm # Vector-vector +VI_VV_ULOOP +({ + if (vs1 >= vs2) { + vd = vs1; + } else { + vd = vs2; + } +}) diff --git a/riscv/insns/vmaxu_vx.h b/riscv/insns/vmaxu_vx.h new file mode 100644 index 0000000000..cab89188f7 --- /dev/null +++ b/riscv/insns/vmaxu_vx.h @@ -0,0 +1,9 @@ +// vmaxu.vx vd, vs2, rs1, vm # vector-scalar +VI_VX_ULOOP +({ + if (rs1 >= vs2) { + vd = rs1; + } else { + vd = vs2; + } +}) diff --git a/riscv/insns/vmerge_vim.h b/riscv/insns/vmerge_vim.h new file mode 100644 index 0000000000..fd6ae1cdc4 --- /dev/null +++ b/riscv/insns/vmerge_vim.h @@ -0,0 +1,11 @@ +// vmerge.vim vd, vs2, simm5 +require_vector(true); +VI_CHECK_SSS(false); +VI_VVXI_MERGE_LOOP +({ + int midx = i / 64; + int mpos = i % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? simm5 : vs2; +}) diff --git a/riscv/insns/vmerge_vvm.h b/riscv/insns/vmerge_vvm.h new file mode 100644 index 0000000000..df416b2c78 --- /dev/null +++ b/riscv/insns/vmerge_vvm.h @@ -0,0 +1,11 @@ +// vmerge.vvm vd, vs2, vs1 +require_vector(true); +VI_CHECK_SSS(true); +VI_VVXI_MERGE_LOOP +({ + int midx = i / 64; + int mpos = i % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? vs1 : vs2; +}) diff --git a/riscv/insns/vmerge_vxm.h b/riscv/insns/vmerge_vxm.h new file mode 100644 index 0000000000..122a7b733e --- /dev/null +++ b/riscv/insns/vmerge_vxm.h @@ -0,0 +1,11 @@ +// vmerge.vxm vd, vs2, rs1 +require_vector(true); +VI_CHECK_SSS(false); +VI_VVXI_MERGE_LOOP +({ + int midx = i / 64; + int mpos = i % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? rs1 : vs2; +}) diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h new file mode 100644 index 0000000000..040f2b0b9d --- /dev/null +++ b/riscv/insns/vmfeq_vf.h @@ -0,0 +1,12 @@ +// vmfeq.vf vd, vs2, fs1 +VI_VFP_LOOP_CMP +({ + res = f16_eq(vs2, rs1); +}, +{ + res = f32_eq(vs2, rs1); +}, +{ + res = f64_eq(vs2, rs1); +}, +false) diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h new file mode 100644 index 0000000000..fb24d1329c --- /dev/null +++ b/riscv/insns/vmfeq_vv.h @@ -0,0 +1,12 @@ +// vmfeq.vv vd, vs2, vs1 +VI_VFP_LOOP_CMP +({ + res = f16_eq(vs2, vs1); +}, +{ + res = f32_eq(vs2, vs1); +}, +{ + res = f64_eq(vs2, vs1); +}, +true) diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h new file mode 100644 index 0000000000..9e69855b51 --- /dev/null +++ b/riscv/insns/vmfge_vf.h @@ -0,0 +1,12 @@ +// vmfge.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f16_le(rs1, vs2); +}, +{ + res = f32_le(rs1, vs2); +}, +{ + res = f64_le(rs1, vs2); +}, +false) diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h new file mode 100644 index 0000000000..bd5d99b70d --- /dev/null +++ b/riscv/insns/vmfgt_vf.h @@ -0,0 +1,12 @@ +// vmfgt.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f16_lt(rs1, vs2); +}, +{ + res = f32_lt(rs1, vs2); +}, +{ + res = f64_lt(rs1, vs2); +}, +false) diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h new file mode 100644 index 0000000000..3d2852fca9 --- /dev/null +++ b/riscv/insns/vmfle_vf.h @@ -0,0 +1,12 @@ +// vmfle.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f16_le(vs2, rs1); +}, +{ + res = f32_le(vs2, rs1); +}, +{ + res = f64_le(vs2, rs1); +}, +false) diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h new file mode 100644 index 0000000000..203ef210ff --- /dev/null +++ b/riscv/insns/vmfle_vv.h @@ -0,0 +1,12 @@ +// vmfle.vv vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f16_le(vs2, vs1); +}, +{ + res = f32_le(vs2, vs1); +}, +{ + res = f64_le(vs2, vs1); +}, +true) diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h new file mode 100644 index 0000000000..4780adc556 --- /dev/null +++ b/riscv/insns/vmflt_vf.h @@ -0,0 +1,12 @@ +// vmflt.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = f16_lt(vs2, rs1); +}, +{ + res = f32_lt(vs2, rs1); +}, +{ + res = f64_lt(vs2, rs1); +}, +false) diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h new file mode 100644 index 0000000000..cdfc3fae90 --- /dev/null +++ b/riscv/insns/vmflt_vv.h @@ -0,0 +1,12 @@ +// vmflt.vv vd, vs2, vs1 +VI_VFP_LOOP_CMP +({ + res = f16_lt(vs2, vs1); +}, +{ + res = f32_lt(vs2, vs1); +}, +{ + res = f64_lt(vs2, vs1); +}, +true) diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h new file mode 100644 index 0000000000..84016993ae --- /dev/null +++ b/riscv/insns/vmfne_vf.h @@ -0,0 +1,12 @@ +// vmfne.vf vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = !f16_eq(vs2, rs1); +}, +{ + res = !f32_eq(vs2, rs1); +}, +{ + res = !f64_eq(vs2, rs1); +}, +false) diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h new file mode 100644 index 0000000000..50dfa9c047 --- /dev/null +++ b/riscv/insns/vmfne_vv.h @@ -0,0 +1,12 @@ +// vmfne.vv vd, vs2, rs1 +VI_VFP_LOOP_CMP +({ + res = !f16_eq(vs2, vs1); +}, +{ + res = !f32_eq(vs2, vs1); +}, +{ + res = !f64_eq(vs2, vs1); +}, +true) diff --git a/riscv/insns/vmin_vv.h b/riscv/insns/vmin_vv.h new file mode 100644 index 0000000000..21da0b3c5b --- /dev/null +++ b/riscv/insns/vmin_vv.h @@ -0,0 +1,11 @@ +// vmin.vv vd, vs2, vs1, vm # Vector-vector +VI_VV_LOOP +({ + if (vs1 <= vs2) { + vd = vs1; + } else { + vd = vs2; + } + + +}) diff --git a/riscv/insns/vmin_vx.h b/riscv/insns/vmin_vx.h new file mode 100644 index 0000000000..3291776d05 --- /dev/null +++ b/riscv/insns/vmin_vx.h @@ -0,0 +1,11 @@ +// vminx.vx vd, vs2, rs1, vm # vector-scalar +VI_VX_LOOP +({ + if (rs1 <= vs2) { + vd = rs1; + } else { + vd = vs2; + } + + +}) diff --git a/riscv/insns/vminu_vv.h b/riscv/insns/vminu_vv.h new file mode 100644 index 0000000000..c0ab1958d0 --- /dev/null +++ b/riscv/insns/vminu_vv.h @@ -0,0 +1,9 @@ +// vminu.vv vd, vs2, vs1, vm # Vector-vector +VI_VV_ULOOP +({ + if (vs1 <= vs2) { + vd = vs1; + } else { + vd = vs2; + } +}) diff --git a/riscv/insns/vminu_vx.h b/riscv/insns/vminu_vx.h new file mode 100644 index 0000000000..1055895ac3 --- /dev/null +++ b/riscv/insns/vminu_vx.h @@ -0,0 +1,10 @@ +// vminu.vx vd, vs2, rs1, vm # vector-scalar +VI_VX_ULOOP +({ + if (rs1 <= vs2) { + vd = rs1; + } else { + vd = vs2; + } + +}) diff --git a/riscv/insns/vmnand_mm.h b/riscv/insns/vmnand_mm.h new file mode 100644 index 0000000000..5a3ab090ae --- /dev/null +++ b/riscv/insns/vmnand_mm.h @@ -0,0 +1,2 @@ +// vmnand.mm vd, vs2, vs1 +VI_LOOP_MASK(~(vs2 & vs1)); diff --git a/riscv/insns/vmnor_mm.h b/riscv/insns/vmnor_mm.h new file mode 100644 index 0000000000..ab933786c9 --- /dev/null +++ b/riscv/insns/vmnor_mm.h @@ -0,0 +1,2 @@ +// vmnor.mm vd, vs2, vs1 +VI_LOOP_MASK(~(vs2 | vs1)); diff --git a/riscv/insns/vmor_mm.h b/riscv/insns/vmor_mm.h new file mode 100644 index 0000000000..32e71b934a --- /dev/null +++ b/riscv/insns/vmor_mm.h @@ -0,0 +1,2 @@ +// vmor.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 | vs1); diff --git a/riscv/insns/vmornot_mm.h b/riscv/insns/vmornot_mm.h new file mode 100644 index 0000000000..bdc1d8b6e7 --- /dev/null +++ b/riscv/insns/vmornot_mm.h @@ -0,0 +1,2 @@ +// vmornot.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 | ~vs1); diff --git a/riscv/insns/vmsbc_vvm.h b/riscv/insns/vmsbc_vvm.h new file mode 100644 index 0000000000..ff95464d05 --- /dev/null +++ b/riscv/insns/vmsbc_vvm.h @@ -0,0 +1,13 @@ +// vmsbc.vvm vd, vs2, rs1 +VI_VV_LOOP_CARRY +({ + auto v0 = P.VU.elt(0, midx); + const uint64_t mmask = UINT64_C(1) << mpos; + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; + + uint128_t res = (op_mask & vs2) - (op_mask & vs1) - carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmsbc_vxm.h b/riscv/insns/vmsbc_vxm.h new file mode 100644 index 0000000000..29fa012af9 --- /dev/null +++ b/riscv/insns/vmsbc_vxm.h @@ -0,0 +1,13 @@ +// vmsbc.vxm vd, vs2, rs1 +VI_XI_LOOP_CARRY +({ + auto &v0 = P.VU.elt(0, midx); + const uint64_t mmask = UINT64_C(1) << mpos; \ + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; + + uint128_t res = (op_mask & vs2) - (op_mask & rs1) - carry; + + carry = (res >> sew) & 0x1u; + vd = (vd & ~mmask) | ((carry << mpos) & mmask); +}) diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h new file mode 100644 index 0000000000..a4195cfd08 --- /dev/null +++ b/riscv/insns/vmsbf_m.h @@ -0,0 +1,32 @@ +// vmsbf.m vd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +require(P.VU.vstart == 0); +require_vm; +require(insn.rd() != insn.rs2()); + +reg_t vl = P.VU.vl; +reg_t rd_num = insn.rd(); +reg_t rs2_num = insn.rs2(); + +bool has_one = false; +for (reg_t i = P.VU.vstart; i < vl; ++i) { + const int midx = i / 64; + const int mpos = i % 64; + const uint64_t mmask = UINT64_C(1) << mpos; \ + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx) >> mpos) & 0x1) == 1; + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + + + if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { + auto &vd = P.VU.elt(rd_num, midx, true); + uint64_t res = 0; + if (!has_one && !vs2_lsb) { + res = 1; + } else if(!has_one && vs2_lsb) { + has_one = true; + } + vd = (vd & ~mmask) | ((res << mpos) & mmask); + } +} diff --git a/riscv/insns/vmseq_vi.h b/riscv/insns/vmseq_vi.h new file mode 100644 index 0000000000..cfc16825f2 --- /dev/null +++ b/riscv/insns/vmseq_vi.h @@ -0,0 +1,5 @@ +// vseq.vi vd, vs2, simm5 +VI_VI_LOOP_CMP +({ + res = simm5 == vs2; +}) diff --git a/riscv/insns/vmseq_vv.h b/riscv/insns/vmseq_vv.h new file mode 100644 index 0000000000..91fd204a50 --- /dev/null +++ b/riscv/insns/vmseq_vv.h @@ -0,0 +1,6 @@ +// vseq.vv vd, vs2, vs1 +VI_VV_LOOP_CMP +({ + res = vs2 == vs1; +}) + diff --git a/riscv/insns/vmseq_vx.h b/riscv/insns/vmseq_vx.h new file mode 100644 index 0000000000..ab63323134 --- /dev/null +++ b/riscv/insns/vmseq_vx.h @@ -0,0 +1,5 @@ +// vseq.vx vd, vs2, rs1 +VI_VX_LOOP_CMP +({ + res = rs1 == vs2; +}) diff --git a/riscv/insns/vmsgt_vi.h b/riscv/insns/vmsgt_vi.h new file mode 100644 index 0000000000..4f7dea8e4b --- /dev/null +++ b/riscv/insns/vmsgt_vi.h @@ -0,0 +1,5 @@ +// vsgt.vi vd, vs2, simm5 +VI_VI_LOOP_CMP +({ + res = vs2 > simm5; +}) diff --git a/riscv/insns/vmsgt_vx.h b/riscv/insns/vmsgt_vx.h new file mode 100644 index 0000000000..5f24db6964 --- /dev/null +++ b/riscv/insns/vmsgt_vx.h @@ -0,0 +1,5 @@ +// vsgt.vx vd, vs2, rs1 +VI_VX_LOOP_CMP +({ + res = vs2 > rs1; +}) diff --git a/riscv/insns/vmsgtu_vi.h b/riscv/insns/vmsgtu_vi.h new file mode 100644 index 0000000000..be28fee1e7 --- /dev/null +++ b/riscv/insns/vmsgtu_vi.h @@ -0,0 +1,5 @@ +// vmsgtu.vi vd, vd2, simm5 +VI_VI_ULOOP_CMP +({ + res = vs2 > (insn.v_simm5() & (UINT64_MAX >> (64 - P.VU.vsew))); +}) diff --git a/riscv/insns/vmsgtu_vx.h b/riscv/insns/vmsgtu_vx.h new file mode 100644 index 0000000000..7f39800804 --- /dev/null +++ b/riscv/insns/vmsgtu_vx.h @@ -0,0 +1,5 @@ +// vsgtu.vx vd, vs2, rs1 +VI_VX_ULOOP_CMP +({ + res = vs2 > rs1; +}) diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h new file mode 100644 index 0000000000..a16ef681c4 --- /dev/null +++ b/riscv/insns/vmsif_m.h @@ -0,0 +1,32 @@ +// vmsif.m rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +require(P.VU.vstart == 0); +require_vm; +require(insn.rd() != insn.rs2()); + +reg_t vl = P.VU.vl; +reg_t rd_num = insn.rd(); +reg_t rs2_num = insn.rs2(); + +bool has_one = false; +for (reg_t i = P.VU.vstart ; i < vl; ++i) { + const int midx = i / 64; + const int mpos = i % 64; + const uint64_t mmask = UINT64_C(1) << mpos; \ + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx ) >> mpos) & 0x1) == 1; + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + + if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { + auto &vd = P.VU.elt(rd_num, midx, true); + uint64_t res = 0; + if (!has_one && !vs2_lsb) { + res = 1; + } else if(!has_one && vs2_lsb) { + has_one = true; + res = 1; + } + vd = (vd & ~mmask) | ((res << mpos) & mmask); + } +} diff --git a/riscv/insns/vmsle_vi.h b/riscv/insns/vmsle_vi.h new file mode 100644 index 0000000000..f0f67d0213 --- /dev/null +++ b/riscv/insns/vmsle_vi.h @@ -0,0 +1,5 @@ +// vsle.vi vd, vs2, simm5 +VI_VI_LOOP_CMP +({ + res = vs2 <= simm5; +}) diff --git a/riscv/insns/vmsle_vv.h b/riscv/insns/vmsle_vv.h new file mode 100644 index 0000000000..30aba06d41 --- /dev/null +++ b/riscv/insns/vmsle_vv.h @@ -0,0 +1,5 @@ +// vsle.vv vd, vs2, vs1 +VI_VV_LOOP_CMP +({ + res = vs2 <= vs1; +}) diff --git a/riscv/insns/vmsle_vx.h b/riscv/insns/vmsle_vx.h new file mode 100644 index 0000000000..c26d59692e --- /dev/null +++ b/riscv/insns/vmsle_vx.h @@ -0,0 +1,5 @@ +// vsle.vx vd, vs2, rs1 +VI_VX_LOOP_CMP +({ + res = vs2 <= rs1; +}) diff --git a/riscv/insns/vmsleu_vi.h b/riscv/insns/vmsleu_vi.h new file mode 100644 index 0000000000..0e66b781a0 --- /dev/null +++ b/riscv/insns/vmsleu_vi.h @@ -0,0 +1,5 @@ +// vmsleu.vi vd, vs2, simm5 +VI_VI_ULOOP_CMP +({ + res = vs2 <= (insn.v_simm5() & (UINT64_MAX >> (64 - P.VU.vsew))); +}) diff --git a/riscv/insns/vmsleu_vv.h b/riscv/insns/vmsleu_vv.h new file mode 100644 index 0000000000..0e460326f8 --- /dev/null +++ b/riscv/insns/vmsleu_vv.h @@ -0,0 +1,5 @@ +// vsleu.vv vd, vs2, vs1 +VI_VV_ULOOP_CMP +({ + res = vs2 <= vs1; +}) diff --git a/riscv/insns/vmsleu_vx.h b/riscv/insns/vmsleu_vx.h new file mode 100644 index 0000000000..935b17681c --- /dev/null +++ b/riscv/insns/vmsleu_vx.h @@ -0,0 +1,5 @@ +// vsleu.vx vd, vs2, rs1 +VI_VX_ULOOP_CMP +({ + res = vs2 <= rs1; +}) diff --git a/riscv/insns/vmslt_vv.h b/riscv/insns/vmslt_vv.h new file mode 100644 index 0000000000..71e6f87f1f --- /dev/null +++ b/riscv/insns/vmslt_vv.h @@ -0,0 +1,5 @@ +// vslt.vv vd, vd2, vs1 +VI_VV_LOOP_CMP +({ + res = vs2 < vs1; +}) diff --git a/riscv/insns/vmslt_vx.h b/riscv/insns/vmslt_vx.h new file mode 100644 index 0000000000..b32bb14537 --- /dev/null +++ b/riscv/insns/vmslt_vx.h @@ -0,0 +1,5 @@ +// vslt.vx vd, vs2, vs1 +VI_VX_LOOP_CMP +({ + res = vs2 < rs1; +}) diff --git a/riscv/insns/vmsltu_vv.h b/riscv/insns/vmsltu_vv.h new file mode 100644 index 0000000000..53a570ae9d --- /dev/null +++ b/riscv/insns/vmsltu_vv.h @@ -0,0 +1,5 @@ +// vsltu.vv vd, vs2, vs1 +VI_VV_ULOOP_CMP +({ + res = vs2 < vs1; +}) diff --git a/riscv/insns/vmsltu_vx.h b/riscv/insns/vmsltu_vx.h new file mode 100644 index 0000000000..8082544876 --- /dev/null +++ b/riscv/insns/vmsltu_vx.h @@ -0,0 +1,5 @@ +// vsltu.vx vd, vs2, vs1 +VI_VX_ULOOP_CMP +({ + res = vs2 < rs1; +}) diff --git a/riscv/insns/vmsne_vi.h b/riscv/insns/vmsne_vi.h new file mode 100644 index 0000000000..5e9758ef94 --- /dev/null +++ b/riscv/insns/vmsne_vi.h @@ -0,0 +1,5 @@ +// vsne.vi vd, vs2, simm5 +VI_VI_LOOP_CMP +({ + res = vs2 != simm5; +}) diff --git a/riscv/insns/vmsne_vv.h b/riscv/insns/vmsne_vv.h new file mode 100644 index 0000000000..e6a7174a48 --- /dev/null +++ b/riscv/insns/vmsne_vv.h @@ -0,0 +1,5 @@ +// vneq.vv vd, vs2, vs1 +VI_VV_LOOP_CMP +({ + res = vs2 != vs1; +}) diff --git a/riscv/insns/vmsne_vx.h b/riscv/insns/vmsne_vx.h new file mode 100644 index 0000000000..9e4c155387 --- /dev/null +++ b/riscv/insns/vmsne_vx.h @@ -0,0 +1,5 @@ +// vsne.vx vd, vs2, rs1 +VI_VX_LOOP_CMP +({ + res = vs2 != rs1; +}) diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h new file mode 100644 index 0000000000..5ef0bfd4ac --- /dev/null +++ b/riscv/insns/vmsof_m.h @@ -0,0 +1,30 @@ +// vmsof.m rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +require(P.VU.vstart == 0); +require_vm; +require(insn.rd() != insn.rs2()); + +reg_t vl = P.VU.vl; +reg_t rd_num = insn.rd(); +reg_t rs2_num = insn.rs2(); + +bool has_one = false; +for (reg_t i = P.VU.vstart ; i < vl; ++i) { + const int midx = i / 64; + const int mpos = i % 64; + const uint64_t mmask = UINT64_C(1) << mpos; \ + + bool vs2_lsb = ((P.VU.elt(rs2_num, midx ) >> mpos) & 0x1) == 1; + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + + if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { + uint64_t &vd = P.VU.elt(rd_num, midx, true); + uint64_t res = 0; + if(!has_one && vs2_lsb) { + has_one = true; + res = 1; + } + vd = (vd & ~mmask) | ((res << mpos) & mmask); + } +} diff --git a/riscv/insns/vmul_vv.h b/riscv/insns/vmul_vv.h new file mode 100644 index 0000000000..a3278171dd --- /dev/null +++ b/riscv/insns/vmul_vv.h @@ -0,0 +1,5 @@ +// vmul vd, vs2, vs1 +VI_VV_LOOP +({ + vd = vs2 * vs1; +}) diff --git a/riscv/insns/vmul_vx.h b/riscv/insns/vmul_vx.h new file mode 100644 index 0000000000..8d68390276 --- /dev/null +++ b/riscv/insns/vmul_vx.h @@ -0,0 +1,5 @@ +// vmul vd, vs2, rs1 +VI_VX_LOOP +({ + vd = vs2 * rs1; +}) diff --git a/riscv/insns/vmulh_vv.h b/riscv/insns/vmulh_vv.h new file mode 100644 index 0000000000..e861a3397a --- /dev/null +++ b/riscv/insns/vmulh_vv.h @@ -0,0 +1,5 @@ +// vmulh vd, vs2, vs1 +VI_VV_LOOP +({ + vd = ((int128_t)vs2 * vs1) >> sew; +}) diff --git a/riscv/insns/vmulh_vx.h b/riscv/insns/vmulh_vx.h new file mode 100644 index 0000000000..b6b5503674 --- /dev/null +++ b/riscv/insns/vmulh_vx.h @@ -0,0 +1,5 @@ +// vmulh vd, vs2, rs1 +VI_VX_LOOP +({ + vd = ((int128_t)vs2 * rs1) >> sew; +}) diff --git a/riscv/insns/vmulhsu_vv.h b/riscv/insns/vmulhsu_vv.h new file mode 100644 index 0000000000..f77a7d3f21 --- /dev/null +++ b/riscv/insns/vmulhsu_vv.h @@ -0,0 +1,38 @@ +// vmulhsu.vv vd, vs2, vs1 +VI_CHECK_SSS(true); +VI_LOOP_BASE +switch(sew) { +case e8: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + auto vs1 = P.VU.elt(rs1_num, i); + + vd = ((int16_t)vs2 * (uint16_t)vs1) >> sew; + break; +} +case e16: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + auto vs1 = P.VU.elt(rs1_num, i); + + vd = ((int32_t)vs2 * (uint32_t)vs1) >> sew; + break; +} +case e32: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + auto vs1 = P.VU.elt(rs1_num, i); + + vd = ((int64_t)vs2 * (uint64_t)vs1) >> sew; + break; +} +default: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + auto vs1 = P.VU.elt(rs1_num, i); + + vd = ((int128_t)vs2 * (uint128_t)vs1) >> sew; + break; +} +} +VI_LOOP_END diff --git a/riscv/insns/vmulhsu_vx.h b/riscv/insns/vmulhsu_vx.h new file mode 100644 index 0000000000..b0699f6f93 --- /dev/null +++ b/riscv/insns/vmulhsu_vx.h @@ -0,0 +1,38 @@ +// vmulhsu.vx vd, vs2, rs1 +VI_CHECK_SSS(false); +VI_LOOP_BASE +switch(sew) { +case e8: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + uint8_t rs1 = RS1; + + vd = ((int16_t)vs2 * (uint16_t)rs1) >> sew; + break; +} +case e16: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + uint16_t rs1 = RS1; + + vd = ((int32_t)vs2 * (uint32_t)rs1) >> sew; + break; +} +case e32: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + uint32_t rs1 = RS1; + + vd = ((int64_t)vs2 * (uint64_t)rs1) >> sew; + break; +} +default: { + auto &vd = P.VU.elt(rd_num, i, true); + auto vs2 = P.VU.elt(rs2_num, i); + uint64_t rs1 = RS1; + + vd = ((int128_t)vs2 * (uint128_t)rs1) >> sew; + break; +} +} +VI_LOOP_END diff --git a/riscv/insns/vmulhu_vv.h b/riscv/insns/vmulhu_vv.h new file mode 100644 index 0000000000..8e318edb75 --- /dev/null +++ b/riscv/insns/vmulhu_vv.h @@ -0,0 +1,5 @@ +// vmulhu vd ,vs2, vs1 +VI_VV_ULOOP +({ + vd = ((uint128_t)vs2 * vs1) >> sew; +}) diff --git a/riscv/insns/vmulhu_vx.h b/riscv/insns/vmulhu_vx.h new file mode 100644 index 0000000000..672ad32df2 --- /dev/null +++ b/riscv/insns/vmulhu_vx.h @@ -0,0 +1,5 @@ +// vmulhu vd ,vs2, rs1 +VI_VX_ULOOP +({ + vd = ((uint128_t)vs2 * rs1) >> sew; +}) diff --git a/riscv/insns/vmv1r_v.h b/riscv/insns/vmv1r_v.h new file mode 100644 index 0000000000..bbdeab9a1d --- /dev/null +++ b/riscv/insns/vmv1r_v.h @@ -0,0 +1,2 @@ +// vmv1r.v vd, vs2 +#include "vmvnfr_v.h" diff --git a/riscv/insns/vmv2r_v.h b/riscv/insns/vmv2r_v.h new file mode 100644 index 0000000000..1ac8e09eb0 --- /dev/null +++ b/riscv/insns/vmv2r_v.h @@ -0,0 +1,2 @@ +// vmv2r.v vd, vs2 +#include "vmvnfr_v.h" diff --git a/riscv/insns/vmv4r_v.h b/riscv/insns/vmv4r_v.h new file mode 100644 index 0000000000..2068731a9e --- /dev/null +++ b/riscv/insns/vmv4r_v.h @@ -0,0 +1,2 @@ +// vmv4r.v vd, vs2 +#include "vmvnfr_v.h" diff --git a/riscv/insns/vmv8r_v.h b/riscv/insns/vmv8r_v.h new file mode 100644 index 0000000000..2b205fc79e --- /dev/null +++ b/riscv/insns/vmv8r_v.h @@ -0,0 +1,2 @@ +// vmv8r.v vd, vs2 +#include "vmvnfr_v.h" diff --git a/riscv/insns/vmv_s_x.h b/riscv/insns/vmv_s_x.h new file mode 100644 index 0000000000..0e6a13e56a --- /dev/null +++ b/riscv/insns/vmv_s_x.h @@ -0,0 +1,29 @@ +// vmv_s_x: vd[0] = rs1 +require_vector(true); +require(insn.v_vm() == 1); +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +reg_t vl = P.VU.vl; + +if (vl > 0 && P.VU.vstart < vl) { + reg_t rd_num = insn.rd(); + reg_t sew = P.VU.vsew; + + switch(sew) { + case e8: + P.VU.elt(rd_num, 0, true) = RS1; + break; + case e16: + P.VU.elt(rd_num, 0, true) = RS1; + break; + case e32: + P.VU.elt(rd_num, 0, true) = RS1; + break; + default: + P.VU.elt(rd_num, 0, true) = RS1; + break; + } + + vl = 0; +} + +P.VU.vstart = 0; diff --git a/riscv/insns/vmv_v_i.h b/riscv/insns/vmv_v_i.h new file mode 100644 index 0000000000..a760779107 --- /dev/null +++ b/riscv/insns/vmv_v_i.h @@ -0,0 +1,7 @@ +// vmv.v.i vd, simm5 +require_vector(true); +VI_CHECK_SSS(false); +VI_VVXI_MERGE_LOOP +({ + vd = simm5; +}) diff --git a/riscv/insns/vmv_v_v.h b/riscv/insns/vmv_v_v.h new file mode 100644 index 0000000000..d7f47d0882 --- /dev/null +++ b/riscv/insns/vmv_v_v.h @@ -0,0 +1,7 @@ +// vvmv.v.v vd, vs1 +require_vector(true); +VI_CHECK_SSS(true); +VI_VVXI_MERGE_LOOP +({ + vd = vs1; +}) diff --git a/riscv/insns/vmv_v_x.h b/riscv/insns/vmv_v_x.h new file mode 100644 index 0000000000..fa7c920be1 --- /dev/null +++ b/riscv/insns/vmv_v_x.h @@ -0,0 +1,7 @@ +// vmv.v.x vd, rs1 +require_vector(true); +VI_CHECK_SSS(false); +VI_VVXI_MERGE_LOOP +({ + vd = rs1; +}) diff --git a/riscv/insns/vmv_x_s.h b/riscv/insns/vmv_x_s.h new file mode 100644 index 0000000000..2c03e43e8c --- /dev/null +++ b/riscv/insns/vmv_x_s.h @@ -0,0 +1,31 @@ +// vmv_x_s: rd = vs2[rs1] +require_vector(true); +require(insn.v_vm() == 1); +uint64_t xmask = UINT64_MAX >> (64 - P.get_max_xlen()); +reg_t rs1 = RS1; +reg_t sew = P.VU.vsew; +reg_t rs2_num = insn.rs2(); + +if (!(rs1 >= 0 && rs1 < (P.VU.get_vlen() / sew))) { + WRITE_RD(0); +} else { + switch(sew) { + case e8: + WRITE_RD(P.VU.elt(rs2_num, rs1)); + break; + case e16: + WRITE_RD(P.VU.elt(rs2_num, rs1)); + break; + case e32: + WRITE_RD(P.VU.elt(rs2_num, rs1)); + break; + case e64: + if (P.get_max_xlen() <= sew) + WRITE_RD(P.VU.elt(rs2_num, rs1) & xmask); + else + WRITE_RD(P.VU.elt(rs2_num, rs1)); + break; + } +} + +P.VU.vstart = 0; diff --git a/riscv/insns/vmvnfr_v.h b/riscv/insns/vmvnfr_v.h new file mode 100644 index 0000000000..96f0074ce1 --- /dev/null +++ b/riscv/insns/vmvnfr_v.h @@ -0,0 +1,27 @@ +// vmv1r.v vd, vs2 +require_vector_novtype(true, true); +const reg_t baseAddr = RS1; +const reg_t vd = insn.rd(); +const reg_t vs2 = insn.rs2(); +const reg_t len = insn.rs1() + 1; +require_align(vd, len); +require_align(vs2, len); +const reg_t size = len * P.VU.vlenb; + +//register needs one-by-one copy to keep commitlog correct +if (vd != vs2 && P.VU.vstart < size) { + reg_t i = P.VU.vstart / P.VU.vlenb; + reg_t off = P.VU.vstart % P.VU.vlenb; + if (off) { + memcpy(&P.VU.elt(vd + i, off, true), + &P.VU.elt(vs2 + i, off), P.VU.vlenb - off); + i++; + } + + for (; i < len; ++i) { + memcpy(&P.VU.elt(vd + i, 0, true), + &P.VU.elt(vs2 + i, 0), P.VU.vlenb); + } +} + +P.VU.vstart = 0; diff --git a/riscv/insns/vmxnor_mm.h b/riscv/insns/vmxnor_mm.h new file mode 100644 index 0000000000..0736d5b21e --- /dev/null +++ b/riscv/insns/vmxnor_mm.h @@ -0,0 +1,2 @@ +// vmnxor.mm vd, vs2, vs1 +VI_LOOP_MASK(~(vs2 ^ vs1)); diff --git a/riscv/insns/vmxor_mm.h b/riscv/insns/vmxor_mm.h new file mode 100644 index 0000000000..7f0c576e37 --- /dev/null +++ b/riscv/insns/vmxor_mm.h @@ -0,0 +1,2 @@ +// vmxor.mm vd, vs2, vs1 +VI_LOOP_MASK(vs2 ^ vs1); diff --git a/riscv/insns/vnclip_wi.h b/riscv/insns/vnclip_wi.h new file mode 100644 index 0000000000..1647212392 --- /dev/null +++ b/riscv/insns/vnclip_wi.h @@ -0,0 +1,25 @@ +// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> simm) +VRM xrm = P.VU.get_vround_mode(); +int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); +int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); +VI_VVXI_LOOP_NARROW +({ + int128_t result = vs2; + unsigned shift = zimm5 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); + + result = result >> shift; + + // saturation + if (result < int_min) { + result = int_min; + P.VU.vxsat = 1; + } else if (result > int_max) { + result = int_max; + P.VU.vxsat = 1; + } + + vd = result; +}, false) diff --git a/riscv/insns/vnclip_wv.h b/riscv/insns/vnclip_wv.h new file mode 100644 index 0000000000..d87a3378e7 --- /dev/null +++ b/riscv/insns/vnclip_wv.h @@ -0,0 +1,25 @@ +// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> vs1[i]) +VRM xrm = P.VU.get_vround_mode(); +int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); +int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); +VI_VVXI_LOOP_NARROW +({ + int128_t result = vs2; + unsigned shift = vs1 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); + + result = result >> shift; + + // saturation + if (result < int_min) { + result = int_min; + P.VU.vxsat = 1; + } else if (result > int_max) { + result = int_max; + P.VU.vxsat = 1; + } + + vd = result; +}, true) diff --git a/riscv/insns/vnclip_wx.h b/riscv/insns/vnclip_wx.h new file mode 100644 index 0000000000..9dbfcd7084 --- /dev/null +++ b/riscv/insns/vnclip_wx.h @@ -0,0 +1,25 @@ +// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> rs1[i]) +VRM xrm = P.VU.get_vround_mode(); +int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); +int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); +VI_VVXI_LOOP_NARROW +({ + int128_t result = vs2; + unsigned shift = rs1 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); + + result = result >> shift; + + // saturation + if (result < int_min) { + result = int_min; + P.VU.vxsat = 1; + } else if (result > int_max) { + result = int_max; + P.VU.vxsat = 1; + } + + vd = result; +}, false) diff --git a/riscv/insns/vnclipu_wi.h b/riscv/insns/vnclipu_wi.h new file mode 100644 index 0000000000..8e4e0dad27 --- /dev/null +++ b/riscv/insns/vnclipu_wi.h @@ -0,0 +1,23 @@ +// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> simm) +VRM xrm = P.VU.get_vround_mode(); +uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew); +uint64_t sign_mask = UINT64_MAX << P.VU.vsew; +VI_VVXI_LOOP_NARROW +({ + uint128_t result = vs2_u; + unsigned shift = zimm5 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); + + // unsigned shifting to rs1 + result = result >> shift; + + // saturation + if (result & sign_mask) { + result = uint_max; + P.VU.vxsat = 1; + } + + vd = result; +}, false) diff --git a/riscv/insns/vnclipu_wv.h b/riscv/insns/vnclipu_wv.h new file mode 100644 index 0000000000..f045964f71 --- /dev/null +++ b/riscv/insns/vnclipu_wv.h @@ -0,0 +1,22 @@ +// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> vs1[i]) +VRM xrm = P.VU.get_vround_mode(); +uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew); +uint64_t sign_mask = UINT64_MAX << P.VU.vsew; +VI_VVXI_LOOP_NARROW +({ + uint128_t result = vs2_u; + unsigned shift = vs1 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); + + result = result >> shift; + + // saturation + if (result & sign_mask) { + result = uint_max; + P.VU.vxsat = 1; + } + + vd = result; +}, true) diff --git a/riscv/insns/vnclipu_wx.h b/riscv/insns/vnclipu_wx.h new file mode 100644 index 0000000000..d5155c11b3 --- /dev/null +++ b/riscv/insns/vnclipu_wx.h @@ -0,0 +1,22 @@ +// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> rs1[i]) +VRM xrm = P.VU.get_vround_mode(); +uint64_t uint_max = UINT64_MAX >> (64 - P.VU.vsew); +uint64_t sign_mask = UINT64_MAX << P.VU.vsew; +VI_VVXI_LOOP_NARROW +({ + uint128_t result = vs2_u; + unsigned shift = rs1 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); + + result = result >> shift; + + // saturation + if (result & sign_mask) { + result = uint_max; + P.VU.vxsat = 1; + } + + vd = result; +}, false) diff --git a/riscv/insns/vnmsac_vv.h b/riscv/insns/vnmsac_vv.h new file mode 100644 index 0000000000..7c10f29af7 --- /dev/null +++ b/riscv/insns/vnmsac_vv.h @@ -0,0 +1,5 @@ +// vmsac.vv: vd[i] = -(vs1[i] * vs2[i]) + vd[i] +VI_VV_LOOP +({ + vd = -(vs1 * vs2) + vd; +}) diff --git a/riscv/insns/vnmsac_vx.h b/riscv/insns/vnmsac_vx.h new file mode 100644 index 0000000000..44920be4b2 --- /dev/null +++ b/riscv/insns/vnmsac_vx.h @@ -0,0 +1,5 @@ +// vmsac: vd[i] = -(x[rs1] * vs2[i]) + vd[i] +VI_VX_LOOP +({ + vd = -(rs1 * vs2) + vd; +}) diff --git a/riscv/insns/vnmsub_vv.h b/riscv/insns/vnmsub_vv.h new file mode 100644 index 0000000000..37f82286c4 --- /dev/null +++ b/riscv/insns/vnmsub_vv.h @@ -0,0 +1,5 @@ +// vnmsub.vv: vd[i] = -(vd[i] * vs1[i]) + vs2[i] +VI_VV_LOOP +({ + vd = -(vd * vs1) + vs2; +}) diff --git a/riscv/insns/vnmsub_vx.h b/riscv/insns/vnmsub_vx.h new file mode 100644 index 0000000000..2e00d22e4a --- /dev/null +++ b/riscv/insns/vnmsub_vx.h @@ -0,0 +1,5 @@ +// vnmsub.vx: vd[i] = -(vd[i] * x[rs1]) + vs2[i] +VI_VX_LOOP +({ + vd = -(vd * rs1) + vs2; +}) diff --git a/riscv/insns/vnsra_wi.h b/riscv/insns/vnsra_wi.h new file mode 100644 index 0000000000..f41979edff --- /dev/null +++ b/riscv/insns/vnsra_wi.h @@ -0,0 +1,5 @@ +// vnsra.vi vd, vs2, zimm5 +VI_VI_LOOP_NSHIFT +({ + vd = vs2 >> (zimm5 & (sew * 2 - 1) & 0x1f); +}, false) diff --git a/riscv/insns/vnsra_wv.h b/riscv/insns/vnsra_wv.h new file mode 100644 index 0000000000..59f255ef30 --- /dev/null +++ b/riscv/insns/vnsra_wv.h @@ -0,0 +1,5 @@ +// vnsra.vv vd, vs2, vs1 +VI_VV_LOOP_NSHIFT +({ + vd = vs2 >> (vs1 & (sew * 2 - 1)); +}, true) diff --git a/riscv/insns/vnsra_wx.h b/riscv/insns/vnsra_wx.h new file mode 100644 index 0000000000..adaa24c384 --- /dev/null +++ b/riscv/insns/vnsra_wx.h @@ -0,0 +1,5 @@ +// vnsra.vx vd, vs2, rs1 +VI_VX_LOOP_NSHIFT +({ + vd = vs2 >> (rs1 & (sew * 2 - 1)); +}, false) diff --git a/riscv/insns/vnsrl_wi.h b/riscv/insns/vnsrl_wi.h new file mode 100644 index 0000000000..91402c0c2a --- /dev/null +++ b/riscv/insns/vnsrl_wi.h @@ -0,0 +1,5 @@ +// vnsrl.vi vd, vs2, zimm5 +VI_VI_LOOP_NSHIFT +({ + vd = vs2_u >> (zimm5 & (sew * 2 - 1)); +}, false) diff --git a/riscv/insns/vnsrl_wv.h b/riscv/insns/vnsrl_wv.h new file mode 100644 index 0000000000..609299faf8 --- /dev/null +++ b/riscv/insns/vnsrl_wv.h @@ -0,0 +1,5 @@ +// vnsrl.vv vd, vs2, vs1 +VI_VV_LOOP_NSHIFT +({ + vd = vs2_u >> (vs1 & (sew * 2 - 1)); +}, true) diff --git a/riscv/insns/vnsrl_wx.h b/riscv/insns/vnsrl_wx.h new file mode 100644 index 0000000000..8356a2bd77 --- /dev/null +++ b/riscv/insns/vnsrl_wx.h @@ -0,0 +1,5 @@ +// vnsrl.vx vd, vs2, rs1 +VI_VX_LOOP_NSHIFT +({ + vd = vs2_u >> (rs1 & (sew * 2 - 1)); +}, false) diff --git a/riscv/insns/vor_vi.h b/riscv/insns/vor_vi.h new file mode 100644 index 0000000000..f759607497 --- /dev/null +++ b/riscv/insns/vor_vi.h @@ -0,0 +1,5 @@ +// vor +VI_VI_LOOP +({ + vd = simm5 | vs2; +}) diff --git a/riscv/insns/vor_vv.h b/riscv/insns/vor_vv.h new file mode 100644 index 0000000000..0c460662bf --- /dev/null +++ b/riscv/insns/vor_vv.h @@ -0,0 +1,5 @@ +// vor +VI_VV_LOOP +({ + vd = vs1 | vs2; +}) diff --git a/riscv/insns/vor_vx.h b/riscv/insns/vor_vx.h new file mode 100644 index 0000000000..01c003ab35 --- /dev/null +++ b/riscv/insns/vor_vx.h @@ -0,0 +1,5 @@ +// vor +VI_VX_LOOP +({ + vd = rs1 | vs2; +}) diff --git a/riscv/insns/vpopc_m.h b/riscv/insns/vpopc_m.h new file mode 100644 index 0000000000..c204b2c0e2 --- /dev/null +++ b/riscv/insns/vpopc_m.h @@ -0,0 +1,23 @@ +// vmpopc rd, vs2, vm +require(P.VU.vsew >= e8 && P.VU.vsew <= e64); +require_vector(true); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs2_num = insn.rs2(); +require(P.VU.vstart == 0); +reg_t popcount = 0; +for (reg_t i=P.VU.vstart; i(rs2_num, midx ) >> mpos) & 0x1) == 1; + if (insn.v_vm() == 1) { + popcount += vs2_lsb; + } else { + bool do_mask = (P.VU.elt(0, midx) >> mpos) & 0x1; + popcount += (vs2_lsb && do_mask); + } +} +P.VU.vstart = 0; +WRITE_RD(popcount); diff --git a/riscv/insns/vredand_vs.h b/riscv/insns/vredand_vs.h new file mode 100644 index 0000000000..6c2d9089fa --- /dev/null +++ b/riscv/insns/vredand_vs.h @@ -0,0 +1,5 @@ +// vredand.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res &= vs2; +}) diff --git a/riscv/insns/vredmax_vs.h b/riscv/insns/vredmax_vs.h new file mode 100644 index 0000000000..be2e76ab3a --- /dev/null +++ b/riscv/insns/vredmax_vs.h @@ -0,0 +1,5 @@ +// vredmax.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res = (vd_0_res >= vs2) ? vd_0_res : vs2; +}) diff --git a/riscv/insns/vredmaxu_vs.h b/riscv/insns/vredmaxu_vs.h new file mode 100644 index 0000000000..960f486181 --- /dev/null +++ b/riscv/insns/vredmaxu_vs.h @@ -0,0 +1,5 @@ +// vredmaxu.vs vd, vs2 ,vs1 +VI_VV_ULOOP_REDUCTION +({ + vd_0_res = (vd_0_res >= vs2) ? vd_0_res : vs2; +}) diff --git a/riscv/insns/vredmin_vs.h b/riscv/insns/vredmin_vs.h new file mode 100644 index 0000000000..50359b7a53 --- /dev/null +++ b/riscv/insns/vredmin_vs.h @@ -0,0 +1,5 @@ +// vredmin.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res = (vd_0_res <= vs2) ? vd_0_res : vs2; +}) diff --git a/riscv/insns/vredminu_vs.h b/riscv/insns/vredminu_vs.h new file mode 100644 index 0000000000..708247592f --- /dev/null +++ b/riscv/insns/vredminu_vs.h @@ -0,0 +1,5 @@ +// vredminu.vs vd, vs2 ,vs1 +VI_VV_ULOOP_REDUCTION +({ + vd_0_res = (vd_0_res <= vs2) ? vd_0_res : vs2; +}) diff --git a/riscv/insns/vredor_vs.h b/riscv/insns/vredor_vs.h new file mode 100644 index 0000000000..f7acd9aa9d --- /dev/null +++ b/riscv/insns/vredor_vs.h @@ -0,0 +1,5 @@ +// vredor.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res |= vs2; +}) diff --git a/riscv/insns/vredsum_vs.h b/riscv/insns/vredsum_vs.h new file mode 100644 index 0000000000..c4fefe57f6 --- /dev/null +++ b/riscv/insns/vredsum_vs.h @@ -0,0 +1,5 @@ +// vredsum.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res += vs2; +}) diff --git a/riscv/insns/vredxor_vs.h b/riscv/insns/vredxor_vs.h new file mode 100644 index 0000000000..bb81ad9a4f --- /dev/null +++ b/riscv/insns/vredxor_vs.h @@ -0,0 +1,5 @@ +// vredxor.vs vd, vs2 ,vs1 +VI_VV_LOOP_REDUCTION +({ + vd_0_res ^= vs2; +}) diff --git a/riscv/insns/vrem_vv.h b/riscv/insns/vrem_vv.h new file mode 100644 index 0000000000..260716a0eb --- /dev/null +++ b/riscv/insns/vrem_vv.h @@ -0,0 +1,11 @@ +// vrem.vv vd, vs2, vs1 +VI_VV_LOOP +({ + if (vs1 == 0) + vd = vs2; + else if(vs2 == -(((intmax_t)1) << (sew - 1)) && vs1 == -1) + vd = 0; + else { + vd = vs2 % vs1; + } +}) diff --git a/riscv/insns/vrem_vx.h b/riscv/insns/vrem_vx.h new file mode 100644 index 0000000000..3702f02f41 --- /dev/null +++ b/riscv/insns/vrem_vx.h @@ -0,0 +1,10 @@ +// vrem.vx vd, vs2, rs1 +VI_VX_LOOP +({ + if (rs1 == 0) + vd = vs2; + else if (vs2 == -(((intmax_t)1) << (sew - 1)) && rs1 == -1) + vd = 0; + else + vd = vs2 % rs1; +}) diff --git a/riscv/insns/vremu_vv.h b/riscv/insns/vremu_vv.h new file mode 100644 index 0000000000..7e1507235a --- /dev/null +++ b/riscv/insns/vremu_vv.h @@ -0,0 +1,8 @@ +// vremu.vv vd, vs2, vs1 +VI_VV_ULOOP +({ + if (vs1 == 0) + vd = vs2; + else + vd = vs2 % vs1; +}) diff --git a/riscv/insns/vremu_vx.h b/riscv/insns/vremu_vx.h new file mode 100644 index 0000000000..a87a8200a8 --- /dev/null +++ b/riscv/insns/vremu_vx.h @@ -0,0 +1,8 @@ +// vremu.vx vd, vs2, rs1 +VI_VX_ULOOP +({ + if (rs1 == 0) + vd = vs2; + else + vd = vs2 % rs1; +}) diff --git a/riscv/insns/vrgather_vi.h b/riscv/insns/vrgather_vi.h new file mode 100644 index 0000000000..385e9be973 --- /dev/null +++ b/riscv/insns/vrgather_vi.h @@ -0,0 +1,30 @@ +// vrgather.vi vd, vs2, zimm5 vm # vd[i] = (zimm5 >= VLMAX) ? 0 : vs2[zimm5]; +require_align(insn.rd(), P.VU.vflmul); +require_align(insn.rs2(), P.VU.vflmul); +require(insn.rd() != insn.rs2()); +require_vm; + +reg_t zimm5 = insn.v_zimm5(); + +VI_LOOP_BASE + +for (reg_t i = P.VU.vstart; i < vl; ++i) { + VI_LOOP_ELEMENT_SKIP(); + + switch (sew) { + case e8: + P.VU.elt(rd_num, i, true) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, zimm5); + break; + case e16: + P.VU.elt(rd_num, i, true) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, zimm5); + break; + case e32: + P.VU.elt(rd_num, i, true) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, zimm5); + break; + default: + P.VU.elt(rd_num, i, true) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, zimm5); + break; + } +} + +VI_LOOP_END; diff --git a/riscv/insns/vrgather_vv.h b/riscv/insns/vrgather_vv.h new file mode 100644 index 0000000000..a3a32f560f --- /dev/null +++ b/riscv/insns/vrgather_vv.h @@ -0,0 +1,32 @@ +// vrgather.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; +require_align(insn.rd(), P.VU.vflmul); +require_align(insn.rs2(), P.VU.vflmul); +require_align(insn.rs1(), P.VU.vflmul); +require(insn.rd() != insn.rs2() && insn.rd() != insn.rs1()); +require_vm; + +VI_LOOP_BASE + switch (sew) { + case e8: { + auto vs1 = P.VU.elt(rs1_num, i); + //if (i > 255) continue; + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + case e16: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + case e32: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + default: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + } +VI_LOOP_END; diff --git a/riscv/insns/vrgather_vx.h b/riscv/insns/vrgather_vx.h new file mode 100644 index 0000000000..058ffae104 --- /dev/null +++ b/riscv/insns/vrgather_vx.h @@ -0,0 +1,24 @@ +// vrgather.vx vd, vs2, rs1, vm # vd[i] = (rs1 >= VLMAX) ? 0 : vs2[rs1]; +require_align(insn.rd(), P.VU.vflmul); +require_align(insn.rs2(), P.VU.vflmul); +require(insn.rd() != insn.rs2()); +require_vm; + +reg_t rs1 = RS1; + +VI_LOOP_BASE + switch (sew) { + case e8: + P.VU.elt(rd_num, i, true) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, rs1); + break; + case e16: + P.VU.elt(rd_num, i, true) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, rs1); + break; + case e32: + P.VU.elt(rd_num, i, true) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, rs1); + break; + default: + P.VU.elt(rd_num, i, true) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, rs1); + break; + } +VI_LOOP_END; diff --git a/riscv/insns/vrgatherei16_vv.h b/riscv/insns/vrgatherei16_vv.h new file mode 100644 index 0000000000..3bb166a237 --- /dev/null +++ b/riscv/insns/vrgatherei16_vv.h @@ -0,0 +1,34 @@ +// vrgatherei16.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; +float vemul = (16.0 / P.VU.vsew * P.VU.vflmul); +require(vemul >= 0.125 && vemul <= 8); +require_align(insn.rd(), P.VU.vflmul); +require_align(insn.rs2(), P.VU.vflmul); +require_align(insn.rs1(), vemul); +require_noover(insn.rd(), P.VU.vflmul, insn.rs1(), vemul); +require(insn.rd() != insn.rs2()); +require_vm; + +VI_LOOP_BASE + switch (sew) { + case e8: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + case e16: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + case e32: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + default: { + auto vs1 = P.VU.elt(rs1_num, i); + P.VU.elt(rd_num, i, true) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt(rs2_num, vs1); + break; + } + } +VI_LOOP_END; diff --git a/riscv/insns/vrsub_vi.h b/riscv/insns/vrsub_vi.h new file mode 100644 index 0000000000..198c33f927 --- /dev/null +++ b/riscv/insns/vrsub_vi.h @@ -0,0 +1,5 @@ +// vrsub.vi vd, vs2, imm, vm # vd[i] = imm - vs2[i] +VI_VI_LOOP +({ + vd = simm5 - vs2; +}) diff --git a/riscv/insns/vrsub_vx.h b/riscv/insns/vrsub_vx.h new file mode 100644 index 0000000000..bfd62594db --- /dev/null +++ b/riscv/insns/vrsub_vx.h @@ -0,0 +1,5 @@ +// vrsub.vx vd, vs2, rs1, vm # vd[i] = rs1 - vs2[i] +VI_VX_LOOP +({ + vd = rs1 - vs2; +}) diff --git a/riscv/insns/vs1r_v.h b/riscv/insns/vs1r_v.h new file mode 100644 index 0000000000..1932ec0b88 --- /dev/null +++ b/riscv/insns/vs1r_v.h @@ -0,0 +1,2 @@ +// vs1r.v vs3, (rs1) +VI_ST_WHOLE diff --git a/riscv/insns/vs2r_v.h b/riscv/insns/vs2r_v.h new file mode 100644 index 0000000000..2e515b476c --- /dev/null +++ b/riscv/insns/vs2r_v.h @@ -0,0 +1,2 @@ +// vs2r.v vs3, (rs1) +VI_ST_WHOLE diff --git a/riscv/insns/vs4r_v.h b/riscv/insns/vs4r_v.h new file mode 100644 index 0000000000..161bf89a31 --- /dev/null +++ b/riscv/insns/vs4r_v.h @@ -0,0 +1,2 @@ +// vs4r.v vs3, (rs1) +VI_ST_WHOLE diff --git a/riscv/insns/vs8r_v.h b/riscv/insns/vs8r_v.h new file mode 100644 index 0000000000..1ad2575638 --- /dev/null +++ b/riscv/insns/vs8r_v.h @@ -0,0 +1,2 @@ +// vs8r.v vs3, (rs1) +VI_ST_WHOLE diff --git a/riscv/insns/vsadd_vi.h b/riscv/insns/vsadd_vi.h new file mode 100644 index 0000000000..c361f087f7 --- /dev/null +++ b/riscv/insns/vsadd_vi.h @@ -0,0 +1,28 @@ +// vsadd.vi vd, vs2 simm5 +VI_CHECK_SSS(false); +VI_LOOP_BASE +bool sat = false; +switch(sew) { +case e8: { + VI_PARAMS(e8); + vd = sat_add(vs2, vsext(simm5, sew), sat); + break; +} +case e16: { + VI_PARAMS(e16); + vd = sat_add(vs2, vsext(simm5, sew), sat); + break; +} +case e32: { + VI_PARAMS(e32); + vd = sat_add(vs2, vsext(simm5, sew), sat); + break; +} +default: { + VI_PARAMS(e64); + vd = sat_add(vs2, vsext(simm5, sew), sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vsadd_vv.h b/riscv/insns/vsadd_vv.h new file mode 100644 index 0000000000..ce0ef4071b --- /dev/null +++ b/riscv/insns/vsadd_vv.h @@ -0,0 +1,29 @@ +// vsadd.vv vd, vs2, vs1 +VI_CHECK_SSS(true); +VI_LOOP_BASE +bool sat = false; +switch(sew) { +case e8: { + VV_PARAMS(e8); + vd = sat_add(vs2, vs1, sat); + break; +} +case e16: { + VV_PARAMS(e16); + vd = sat_add(vs2, vs1, sat); + break; +} +case e32: { + VV_PARAMS(e32); + vd = sat_add(vs2, vs1, sat); + break; +} +default: { + VV_PARAMS(e64); + vd = sat_add(vs2, vs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END + diff --git a/riscv/insns/vsadd_vx.h b/riscv/insns/vsadd_vx.h new file mode 100644 index 0000000000..691f017fff --- /dev/null +++ b/riscv/insns/vsadd_vx.h @@ -0,0 +1,28 @@ +// vsadd.vx vd, vs2, rs1 +VI_CHECK_SSS(false); +VI_LOOP_BASE +bool sat = false; +switch(sew) { +case e8: { + VX_PARAMS(e8); + vd = sat_add(vs2, rs1, sat); + break; +} +case e16: { + VX_PARAMS(e16); + vd = sat_add(vs2, rs1, sat); + break; +} +case e32: { + VX_PARAMS(e32); + vd = sat_add(vs2, rs1, sat); + break; +} +default: { + VX_PARAMS(e64); + vd = sat_add(vs2, rs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vsaddu_vi.h b/riscv/insns/vsaddu_vi.h new file mode 100644 index 0000000000..7a200dff74 --- /dev/null +++ b/riscv/insns/vsaddu_vi.h @@ -0,0 +1,11 @@ +// vsaddu vd, vs2, zimm5 +VI_VI_ULOOP +({ + bool sat = false; + vd = vs2 + (insn.v_simm5() & (UINT64_MAX >> (64 - P.VU.vsew))); + + sat = vd < vs2; + vd |= -(vd < vs2); + + P.VU.vxsat |= sat; +}) diff --git a/riscv/insns/vsaddu_vv.h b/riscv/insns/vsaddu_vv.h new file mode 100644 index 0000000000..e5d7025f05 --- /dev/null +++ b/riscv/insns/vsaddu_vv.h @@ -0,0 +1,11 @@ +// vsaddu vd, vs2, vs1 +VI_VV_ULOOP +({ + bool sat = false; + vd = vs2 + vs1; + + sat = vd < vs2; + vd |= -(vd < vs2); + + P.VU.vxsat |= sat; +}) diff --git a/riscv/insns/vsaddu_vx.h b/riscv/insns/vsaddu_vx.h new file mode 100644 index 0000000000..46ec29d9b7 --- /dev/null +++ b/riscv/insns/vsaddu_vx.h @@ -0,0 +1,12 @@ +// vsaddu vd, vs2, rs1 +VI_VX_ULOOP +({ + bool sat = false; + vd = vs2 + rs1; + + sat = vd < vs2; + vd |= -(vd < vs2); + + P.VU.vxsat |= sat; + +}) diff --git a/riscv/insns/vsbc_vvm.h b/riscv/insns/vsbc_vvm.h new file mode 100644 index 0000000000..96b8bb8041 --- /dev/null +++ b/riscv/insns/vsbc_vvm.h @@ -0,0 +1,10 @@ +// vsbc.vvm vd, vs2, rs1, v0 +VI_VV_LOOP_WITH_CARRY +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & vs2) - (op_mask & vs1) - carry; + vd = res; +}) diff --git a/riscv/insns/vsbc_vxm.h b/riscv/insns/vsbc_vxm.h new file mode 100644 index 0000000000..c6f9ca82a2 --- /dev/null +++ b/riscv/insns/vsbc_vxm.h @@ -0,0 +1,10 @@ +// vsbc.vxm vd, vs2, rs1, v0 +VI_XI_LOOP_WITH_CARRY +({ + auto &v0 = P.VU.elt(0, midx); + const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); + uint64_t carry = (v0 >> mpos) & 0x1; + + uint128_t res = (op_mask & vs2) - (op_mask & rs1) - carry; + vd = res; +}) diff --git a/riscv/insns/vse16_v.h b/riscv/insns/vse16_v.h new file mode 100644 index 0000000000..20b04c869b --- /dev/null +++ b/riscv/insns/vse16_v.h @@ -0,0 +1,2 @@ +// vse16.v and vsseg[2-8]e16.v +VI_ST(0, (i * nf + fn), uint16); diff --git a/riscv/insns/vse32_v.h b/riscv/insns/vse32_v.h new file mode 100644 index 0000000000..efd2973d07 --- /dev/null +++ b/riscv/insns/vse32_v.h @@ -0,0 +1,2 @@ +// vse32.v and vsseg[2-8]e32.v +VI_ST(0, (i * nf + fn), uint32); diff --git a/riscv/insns/vse64_v.h b/riscv/insns/vse64_v.h new file mode 100644 index 0000000000..9b36c8d8d2 --- /dev/null +++ b/riscv/insns/vse64_v.h @@ -0,0 +1,2 @@ +// vse64.v and vsseg[2-8]e64.v +VI_ST(0, (i * nf + fn), uint64); diff --git a/riscv/insns/vse8_v.h b/riscv/insns/vse8_v.h new file mode 100644 index 0000000000..32dee14b4c --- /dev/null +++ b/riscv/insns/vse8_v.h @@ -0,0 +1,2 @@ +// vse8.v and vsseg[2-8]e8.v +VI_ST(0, (i * nf + fn), uint8); diff --git a/riscv/insns/vsetvl.h b/riscv/insns/vsetvl.h new file mode 100644 index 0000000000..2969edc6ce --- /dev/null +++ b/riscv/insns/vsetvl.h @@ -0,0 +1,2 @@ +require_vector_novtype(false, false); +WRITE_RD(P.VU.set_vl(insn.rd(), insn.rs1(), RS1, RS2)); diff --git a/riscv/insns/vsetvli.h b/riscv/insns/vsetvli.h new file mode 100644 index 0000000000..7b1f1d716c --- /dev/null +++ b/riscv/insns/vsetvli.h @@ -0,0 +1,2 @@ +require_vector_novtype(false, false); +WRITE_RD(P.VU.set_vl(insn.rd(), insn.rs1(), RS1, insn.v_zimm11())); diff --git a/riscv/insns/vsext_vf2.h b/riscv/insns/vsext_vf2.h new file mode 100644 index 0000000000..16ccfac607 --- /dev/null +++ b/riscv/insns/vsext_vf2.h @@ -0,0 +1 @@ +VI_VV_EXT(2, int); diff --git a/riscv/insns/vsext_vf4.h b/riscv/insns/vsext_vf4.h new file mode 100644 index 0000000000..d4476a310f --- /dev/null +++ b/riscv/insns/vsext_vf4.h @@ -0,0 +1 @@ +VI_VV_EXT(4, int); diff --git a/riscv/insns/vsext_vf8.h b/riscv/insns/vsext_vf8.h new file mode 100644 index 0000000000..09fdc2c75c --- /dev/null +++ b/riscv/insns/vsext_vf8.h @@ -0,0 +1 @@ +VI_VV_EXT(8, int); diff --git a/riscv/insns/vslide1down_vx.h b/riscv/insns/vslide1down_vx.h new file mode 100644 index 0000000000..e867722fa8 --- /dev/null +++ b/riscv/insns/vslide1down_vx.h @@ -0,0 +1,44 @@ +//vslide1down.vx vd, vs2, rs1 +VI_CHECK_SLIDE(false); + +VI_LOOP_BASE +if (i != vl - 1) { + switch (sew) { + case e8: { + VI_XI_SLIDEDOWN_PARAMS(e8, 1); + vd = vs2; + } + break; + case e16: { + VI_XI_SLIDEDOWN_PARAMS(e16, 1); + vd = vs2; + } + break; + case e32: { + VI_XI_SLIDEDOWN_PARAMS(e32, 1); + vd = vs2; + } + break; + default: { + VI_XI_SLIDEDOWN_PARAMS(e64, 1); + vd = vs2; + } + break; + } +} else { + switch (sew) { + case e8: + P.VU.elt(rd_num, vl - 1, true) = RS1; + break; + case e16: + P.VU.elt(rd_num, vl - 1, true) = RS1; + break; + case e32: + P.VU.elt(rd_num, vl - 1, true) = RS1; + break; + default: + P.VU.elt(rd_num, vl - 1, true) = RS1; + break; + } +} +VI_LOOP_END diff --git a/riscv/insns/vslide1up_vx.h b/riscv/insns/vslide1up_vx.h new file mode 100644 index 0000000000..33cb9ed641 --- /dev/null +++ b/riscv/insns/vslide1up_vx.h @@ -0,0 +1,30 @@ +//vslide1up.vx vd, vs2, rs1 +VI_CHECK_SLIDE(true); + +VI_LOOP_BASE +if (i != 0) { + if (sew == e8) { + VI_XI_SLIDEUP_PARAMS(e8, 1); + vd = vs2; + } else if(sew == e16) { + VI_XI_SLIDEUP_PARAMS(e16, 1); + vd = vs2; + } else if(sew == e32) { + VI_XI_SLIDEUP_PARAMS(e32, 1); + vd = vs2; + } else if(sew == e64) { + VI_XI_SLIDEUP_PARAMS(e64, 1); + vd = vs2; + } +} else { + if (sew == e8) { + P.VU.elt(rd_num, 0, true) = RS1; + } else if(sew == e16) { + P.VU.elt(rd_num, 0, true) = RS1; + } else if(sew == e32) { + P.VU.elt(rd_num, 0, true) = RS1; + } else if(sew == e64) { + P.VU.elt(rd_num, 0, true) = RS1; + } +} +VI_LOOP_END diff --git a/riscv/insns/vslidedown_vi.h b/riscv/insns/vslidedown_vi.h new file mode 100644 index 0000000000..bc440cf2bd --- /dev/null +++ b/riscv/insns/vslidedown_vi.h @@ -0,0 +1,36 @@ +// vslidedown.vi vd, vs2, rs1 +VI_CHECK_SLIDE(false); + +const reg_t sh = insn.v_zimm5(); +VI_LOOP_BASE + +reg_t offset = 0; +bool is_valid = (i + sh) < P.VU.vlmax; + +if (is_valid) { + offset = sh; +} + +switch (sew) { +case e8: { + VI_XI_SLIDEDOWN_PARAMS(e8, offset); + vd = is_valid ? vs2 : 0; +} +break; +case e16: { + VI_XI_SLIDEDOWN_PARAMS(e16, offset); + vd = is_valid ? vs2 : 0; +} +break; +case e32: { + VI_XI_SLIDEDOWN_PARAMS(e32, offset); + vd = is_valid ? vs2 : 0; +} +break; +default: { + VI_XI_SLIDEDOWN_PARAMS(e64, offset); + vd = is_valid ? vs2 : 0; +} +break; +} +VI_LOOP_END diff --git a/riscv/insns/vslidedown_vx.h b/riscv/insns/vslidedown_vx.h new file mode 100644 index 0000000000..074aa50868 --- /dev/null +++ b/riscv/insns/vslidedown_vx.h @@ -0,0 +1,36 @@ +//vslidedown.vx vd, vs2, rs1 +VI_CHECK_SLIDE(false); + +const uint128_t sh = RS1; +VI_LOOP_BASE + +reg_t offset = 0; +bool is_valid = (i + sh) < P.VU.vlmax; + +if (is_valid) { + offset = sh; +} + +switch (sew) { +case e8: { + VI_XI_SLIDEDOWN_PARAMS(e8, offset); + vd = is_valid ? vs2 : 0; +} +break; +case e16: { + VI_XI_SLIDEDOWN_PARAMS(e16, offset); + vd = is_valid ? vs2 : 0; +} +break; +case e32: { + VI_XI_SLIDEDOWN_PARAMS(e32, offset); + vd = is_valid ? vs2 : 0; +} +break; +default: { + VI_XI_SLIDEDOWN_PARAMS(e64, offset); + vd = is_valid ? vs2 : 0; +} +break; +} +VI_LOOP_END diff --git a/riscv/insns/vslideup_vi.h b/riscv/insns/vslideup_vi.h new file mode 100644 index 0000000000..42657892af --- /dev/null +++ b/riscv/insns/vslideup_vi.h @@ -0,0 +1,31 @@ +// vslideup.vi vd, vs2, rs1 +VI_CHECK_SLIDE(true); + +const reg_t offset = insn.v_zimm5(); +VI_LOOP_BASE +if (P.VU.vstart < offset && i < offset) + continue; + +switch (sew) { +case e8: { + VI_XI_SLIDEUP_PARAMS(e8, offset); + vd = vs2; +} +break; +case e16: { + VI_XI_SLIDEUP_PARAMS(e16, offset); + vd = vs2; +} +break; +case e32: { + VI_XI_SLIDEUP_PARAMS(e32, offset); + vd = vs2; +} +break; +default: { + VI_XI_SLIDEUP_PARAMS(e64, offset); + vd = vs2; +} +break; +} +VI_LOOP_END diff --git a/riscv/insns/vslideup_vx.h b/riscv/insns/vslideup_vx.h new file mode 100644 index 0000000000..720d2ab53b --- /dev/null +++ b/riscv/insns/vslideup_vx.h @@ -0,0 +1,31 @@ +//vslideup.vx vd, vs2, rs1 +VI_CHECK_SLIDE(true); + +const reg_t offset = RS1; +VI_LOOP_BASE +if (P.VU.vstart < offset && i < offset) + continue; + +switch (sew) { +case e8: { + VI_XI_SLIDEUP_PARAMS(e8, offset); + vd = vs2; +} +break; +case e16: { + VI_XI_SLIDEUP_PARAMS(e16, offset); + vd = vs2; +} +break; +case e32: { + VI_XI_SLIDEUP_PARAMS(e32, offset); + vd = vs2; +} +break; +default: { + VI_XI_SLIDEUP_PARAMS(e64, offset); + vd = vs2; +} +break; +} +VI_LOOP_END diff --git a/riscv/insns/vsll_vi.h b/riscv/insns/vsll_vi.h new file mode 100644 index 0000000000..be4650669f --- /dev/null +++ b/riscv/insns/vsll_vi.h @@ -0,0 +1,5 @@ +// vsll.vi vd, vs2, zimm5 +VI_VI_LOOP +({ + vd = vs2 << (simm5 & (sew - 1) & 0x1f); +}) diff --git a/riscv/insns/vsll_vv.h b/riscv/insns/vsll_vv.h new file mode 100644 index 0000000000..ce82022504 --- /dev/null +++ b/riscv/insns/vsll_vv.h @@ -0,0 +1,5 @@ +// vsll +VI_VV_LOOP +({ + vd = vs2 << (vs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsll_vx.h b/riscv/insns/vsll_vx.h new file mode 100644 index 0000000000..823510b2c5 --- /dev/null +++ b/riscv/insns/vsll_vx.h @@ -0,0 +1,5 @@ +// vsll +VI_VX_LOOP +({ + vd = vs2 << (rs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsmul_vv.h b/riscv/insns/vsmul_vv.h new file mode 100644 index 0000000000..e7ce306e9e --- /dev/null +++ b/riscv/insns/vsmul_vv.h @@ -0,0 +1,32 @@ +// vsmul.vv vd, vs2, vs1 +VRM xrm = P.VU.get_vround_mode(); +int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); +int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); +int64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1); + +VI_VV_LOOP +({ + int64_t vs1_sign; + int64_t vs2_sign; + int64_t result_sign; + + vs1_sign = vs1 & sign_mask; + vs2_sign = vs2 & sign_mask; + bool overflow = vs1 == vs2 && vs1 == int_min; + + int128_t result = (int128_t)vs1 * (int128_t)vs2; + result_sign = (vs1_sign ^ vs2_sign) & sign_mask; + + // rounding + INT_ROUNDING(result, xrm, sew - 1); + // remove guard bits + result = result >> (sew - 1); + + // saturation + if (overflow) { + result = int_max; + P.VU.vxsat |= 1; + } + + vd = result; +}) diff --git a/riscv/insns/vsmul_vx.h b/riscv/insns/vsmul_vx.h new file mode 100644 index 0000000000..cf4b511423 --- /dev/null +++ b/riscv/insns/vsmul_vx.h @@ -0,0 +1,33 @@ +// vsmul.vx vd, vs2, rs1 +VRM xrm = P.VU.get_vround_mode(); +int64_t int_max = INT64_MAX >> (64 - P.VU.vsew); +int64_t int_min = INT64_MIN >> (64 - P.VU.vsew); +int64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1); + +VI_VX_LOOP +({ + int64_t rs1_sign; + int64_t vs2_sign; + int64_t result_sign; + + rs1_sign = rs1 & sign_mask; + vs2_sign = vs2 & sign_mask; + bool overflow = rs1 == vs2 && rs1 == int_min; + + int128_t result = (int128_t)rs1 * (int128_t)vs2; + result_sign = (rs1_sign ^ vs2_sign) & sign_mask; + + // rounding + INT_ROUNDING(result, xrm, sew - 1); + + // remove guard bits + result = result >> (sew - 1); + + // max saturation + if (overflow) { + result = int_max; + P.VU.vxsat |= 1; + } + + vd = result; +}) diff --git a/riscv/insns/vsra_vi.h b/riscv/insns/vsra_vi.h new file mode 100644 index 0000000000..5c589274ef --- /dev/null +++ b/riscv/insns/vsra_vi.h @@ -0,0 +1,5 @@ +// vsra.vi vd, vs2, zimm5 +VI_VI_LOOP +({ + vd = vs2 >> (simm5 & (sew - 1) & 0x1f); +}) diff --git a/riscv/insns/vsra_vv.h b/riscv/insns/vsra_vv.h new file mode 100644 index 0000000000..8889af9c08 --- /dev/null +++ b/riscv/insns/vsra_vv.h @@ -0,0 +1,5 @@ +// vsra.vv vd, vs2, vs1 +VI_VV_LOOP +({ + vd = vs2 >> (vs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsra_vx.h b/riscv/insns/vsra_vx.h new file mode 100644 index 0000000000..c1b0c10728 --- /dev/null +++ b/riscv/insns/vsra_vx.h @@ -0,0 +1,5 @@ +// vsra.vx vd, vs2, rs1 +VI_VX_LOOP +({ + vd = vs2 >> (rs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsrl_vi.h b/riscv/insns/vsrl_vi.h new file mode 100644 index 0000000000..fe5d272025 --- /dev/null +++ b/riscv/insns/vsrl_vi.h @@ -0,0 +1,5 @@ +// vsrl.vi vd, vs2, zimm5 +VI_VI_ULOOP +({ + vd = vs2 >> (zimm5 & (sew - 1) & 0x1f); +}) diff --git a/riscv/insns/vsrl_vv.h b/riscv/insns/vsrl_vv.h new file mode 100644 index 0000000000..6376af36bc --- /dev/null +++ b/riscv/insns/vsrl_vv.h @@ -0,0 +1,5 @@ +// vsrl.vv vd, vs2, vs1 +VI_VV_ULOOP +({ + vd = vs2 >> (vs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsrl_vx.h b/riscv/insns/vsrl_vx.h new file mode 100644 index 0000000000..a4f899ca2c --- /dev/null +++ b/riscv/insns/vsrl_vx.h @@ -0,0 +1,5 @@ +// vsrl.vx vd, vs2, rs1 +VI_VX_ULOOP +({ + vd = vs2 >> (rs1 & (sew - 1)); +}) diff --git a/riscv/insns/vsse16_v.h b/riscv/insns/vsse16_v.h new file mode 100644 index 0000000000..adbbcf5c5d --- /dev/null +++ b/riscv/insns/vsse16_v.h @@ -0,0 +1,2 @@ +// vsse16v and vssseg[2-8]e16.v +VI_ST(i * RS2, fn, uint16); diff --git a/riscv/insns/vsse32_v.h b/riscv/insns/vsse32_v.h new file mode 100644 index 0000000000..73bd272b0a --- /dev/null +++ b/riscv/insns/vsse32_v.h @@ -0,0 +1,2 @@ +// vsse32.v and vssseg[2-8]e32.v +VI_ST(i * RS2, fn, uint32); diff --git a/riscv/insns/vsse64_v.h b/riscv/insns/vsse64_v.h new file mode 100644 index 0000000000..1785a56877 --- /dev/null +++ b/riscv/insns/vsse64_v.h @@ -0,0 +1,2 @@ +// vsse64.v and vssseg[2-8]e64.v +VI_ST(i * RS2, fn, uint64); diff --git a/riscv/insns/vsse8_v.h b/riscv/insns/vsse8_v.h new file mode 100644 index 0000000000..c5daf0bce8 --- /dev/null +++ b/riscv/insns/vsse8_v.h @@ -0,0 +1,2 @@ +// vsse8.v and vssseg[2-8]e8.v +VI_ST(i * RS2, fn, uint8); diff --git a/riscv/insns/vssra_vi.h b/riscv/insns/vssra_vi.h new file mode 100644 index 0000000000..ff2e1c585c --- /dev/null +++ b/riscv/insns/vssra_vi.h @@ -0,0 +1,10 @@ +// vssra.vi vd, vs2, simm5 +VRM xrm = P.VU.get_vround_mode(); +VI_VI_LOOP +({ + int sh = simm5 & (sew - 1) & 0x1f; + int128_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; +}) diff --git a/riscv/insns/vssra_vv.h b/riscv/insns/vssra_vv.h new file mode 100644 index 0000000000..7bbc766ff1 --- /dev/null +++ b/riscv/insns/vssra_vv.h @@ -0,0 +1,10 @@ +// vssra.vv vd, vs2, vs1 +VRM xrm = P.VU.get_vround_mode(); +VI_VV_LOOP +({ + int sh = vs1 & (sew - 1); + int128_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; +}) diff --git a/riscv/insns/vssra_vx.h b/riscv/insns/vssra_vx.h new file mode 100644 index 0000000000..068a22b692 --- /dev/null +++ b/riscv/insns/vssra_vx.h @@ -0,0 +1,10 @@ +// vssra.vx vd, vs2, rs1 +VRM xrm = P.VU.get_vround_mode(); +VI_VX_LOOP +({ + int sh = rs1 & (sew - 1); + int128_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; +}) diff --git a/riscv/insns/vssrl_vi.h b/riscv/insns/vssrl_vi.h new file mode 100644 index 0000000000..d125164d6a --- /dev/null +++ b/riscv/insns/vssrl_vi.h @@ -0,0 +1,10 @@ +// vssra.vi vd, vs2, simm5 +VRM xrm = P.VU.get_vround_mode(); +VI_VI_ULOOP +({ + int sh = zimm5 & (sew - 1) & 0x1f; + uint128_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; +}) diff --git a/riscv/insns/vssrl_vv.h b/riscv/insns/vssrl_vv.h new file mode 100644 index 0000000000..a8e5d16423 --- /dev/null +++ b/riscv/insns/vssrl_vv.h @@ -0,0 +1,10 @@ +// vssrl.vv vd, vs2, vs1 +VRM xrm = P.VU.get_vround_mode(); +VI_VV_ULOOP +({ + int sh = vs1 & (sew - 1); + uint128_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; +}) diff --git a/riscv/insns/vssrl_vx.h b/riscv/insns/vssrl_vx.h new file mode 100644 index 0000000000..ee3cb3462f --- /dev/null +++ b/riscv/insns/vssrl_vx.h @@ -0,0 +1,10 @@ +// vssrl.vx vd, vs2, rs1 +VRM xrm = P.VU.get_vround_mode(); +VI_VX_ULOOP +({ + int sh = rs1 & (sew - 1); + uint128_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; +}) diff --git a/riscv/insns/vssub_vv.h b/riscv/insns/vssub_vv.h new file mode 100644 index 0000000000..18fe4fb5f0 --- /dev/null +++ b/riscv/insns/vssub_vv.h @@ -0,0 +1,29 @@ +// vssub.vv vd, vs2, vs1 +VI_CHECK_SSS(true); +VI_LOOP_BASE +bool sat = false; + +switch (sew) { +case e8: { + VV_PARAMS(e8); + vd = sat_sub(vs2, vs1, sat); + break; +} +case e16: { + VV_PARAMS(e16); + vd = sat_sub(vs2, vs1, sat); + break; +} +case e32: { + VV_PARAMS(e32); + vd = sat_sub(vs2, vs1, sat); + break; +} +default: { + VV_PARAMS(e64); + vd = sat_sub(vs2, vs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vssub_vx.h b/riscv/insns/vssub_vx.h new file mode 100644 index 0000000000..7a01125644 --- /dev/null +++ b/riscv/insns/vssub_vx.h @@ -0,0 +1,29 @@ +// vssub.vx vd, vs2, rs1 +VI_CHECK_SSS(false); +VI_LOOP_BASE +bool sat = false; + +switch (sew) { +case e8: { + VX_PARAMS(e8); + vd = sat_sub(vs2, rs1, sat); + break; +} +case e16: { + VX_PARAMS(e16); + vd = sat_sub(vs2, rs1, sat); + break; +} +case e32: { + VX_PARAMS(e32); + vd = sat_sub(vs2, rs1, sat); + break; +} +default: { + VX_PARAMS(e64); + vd = sat_sub(vs2, rs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vssubu_vv.h b/riscv/insns/vssubu_vv.h new file mode 100644 index 0000000000..e58076ebe4 --- /dev/null +++ b/riscv/insns/vssubu_vv.h @@ -0,0 +1,30 @@ +// vssubu.vv vd, vs2, vs1 +VI_CHECK_SSS(true); +VI_LOOP_BASE +bool sat = false; + +switch (sew) { +case e8: { + VV_U_PARAMS(e8); + vd = sat_subu(vs2, vs1, sat); + break; +} +case e16: { + VV_U_PARAMS(e16); + vd = sat_subu(vs2, vs1, sat); + break; +} +case e32: { + VV_U_PARAMS(e32); + vd = sat_subu(vs2, vs1, sat); + break; +} +default: { + VV_U_PARAMS(e64); + vd = sat_subu(vs2, vs1, sat); + break; +} +} +P.VU.vxsat |= sat; + +VI_LOOP_END diff --git a/riscv/insns/vssubu_vx.h b/riscv/insns/vssubu_vx.h new file mode 100644 index 0000000000..556c759f59 --- /dev/null +++ b/riscv/insns/vssubu_vx.h @@ -0,0 +1,29 @@ +// vssubu.vx vd, vs2, rs1 +VI_CHECK_SSS(false); +VI_LOOP_BASE +bool sat = false; + +switch (sew) { +case e8: { + VX_U_PARAMS(e8); + vd = sat_subu(vs2, rs1, sat); + break; +} +case e16: { + VX_U_PARAMS(e16); + vd = sat_subu(vs2, rs1, sat); + break; +} +case e32: { + VX_U_PARAMS(e32); + vd = sat_subu(vs2, rs1, sat); + break; +} +default: { + VX_U_PARAMS(e64); + vd = sat_subu(vs2, rs1, sat); + break; +} +} +P.VU.vxsat |= sat; +VI_LOOP_END diff --git a/riscv/insns/vsub_vv.h b/riscv/insns/vsub_vv.h new file mode 100644 index 0000000000..7d119d50fd --- /dev/null +++ b/riscv/insns/vsub_vv.h @@ -0,0 +1,5 @@ +// vsub +VI_VV_LOOP +({ + vd = vs2 - vs1; +}) diff --git a/riscv/insns/vsub_vx.h b/riscv/insns/vsub_vx.h new file mode 100644 index 0000000000..e075b42370 --- /dev/null +++ b/riscv/insns/vsub_vx.h @@ -0,0 +1,5 @@ +// vsub: vd[i] = (vd[i] * x[rs1]) - vs2[i] +VI_VX_LOOP +({ + vd = vs2 - rs1; +}) diff --git a/riscv/insns/vsuxei16_v.h b/riscv/insns/vsuxei16_v.h new file mode 100644 index 0000000000..f361c03f81 --- /dev/null +++ b/riscv/insns/vsuxei16_v.h @@ -0,0 +1,2 @@ +// vsuxe16.v +VI_ST_INDEX(e16, false); diff --git a/riscv/insns/vsuxei32_v.h b/riscv/insns/vsuxei32_v.h new file mode 100644 index 0000000000..c1c8dc7c51 --- /dev/null +++ b/riscv/insns/vsuxei32_v.h @@ -0,0 +1,2 @@ +// vsuxe32.v +VI_ST_INDEX(e32, false); diff --git a/riscv/insns/vsuxei64_v.h b/riscv/insns/vsuxei64_v.h new file mode 100644 index 0000000000..0c619cf1d8 --- /dev/null +++ b/riscv/insns/vsuxei64_v.h @@ -0,0 +1,2 @@ +// vsuxe64.v +VI_ST_INDEX(e64, false); diff --git a/riscv/insns/vsuxei8_v.h b/riscv/insns/vsuxei8_v.h new file mode 100644 index 0000000000..36d2a11c01 --- /dev/null +++ b/riscv/insns/vsuxei8_v.h @@ -0,0 +1,2 @@ +// vsuxe8.v +VI_ST_INDEX(e8, false); diff --git a/riscv/insns/vsxei16_v.h b/riscv/insns/vsxei16_v.h new file mode 100644 index 0000000000..42c3c78dbb --- /dev/null +++ b/riscv/insns/vsxei16_v.h @@ -0,0 +1,2 @@ +// vsxei16.v and vsxseg[2-8]ei16.v +VI_ST_INDEX(e16, true); diff --git a/riscv/insns/vsxei32_v.h b/riscv/insns/vsxei32_v.h new file mode 100644 index 0000000000..f0aed6bd6e --- /dev/null +++ b/riscv/insns/vsxei32_v.h @@ -0,0 +1,2 @@ +// vsxei32.v and vsxseg[2-8]ei32.v +VI_ST_INDEX(e32, true); diff --git a/riscv/insns/vsxei64_v.h b/riscv/insns/vsxei64_v.h new file mode 100644 index 0000000000..88ddaf3fd7 --- /dev/null +++ b/riscv/insns/vsxei64_v.h @@ -0,0 +1,2 @@ +// vsxei64.v and vsxseg[2-8]ei64.v +VI_ST_INDEX(e64, true); diff --git a/riscv/insns/vsxei8_v.h b/riscv/insns/vsxei8_v.h new file mode 100644 index 0000000000..621512c50c --- /dev/null +++ b/riscv/insns/vsxei8_v.h @@ -0,0 +1,2 @@ +// vsxei8.v and vsxseg[2-8]ei8.v +VI_ST_INDEX(e8, true); diff --git a/riscv/insns/vwadd_vv.h b/riscv/insns/vwadd_vv.h new file mode 100644 index 0000000000..df4a13534d --- /dev/null +++ b/riscv/insns/vwadd_vv.h @@ -0,0 +1,6 @@ +// vwadd.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, +, +, int); +}) diff --git a/riscv/insns/vwadd_vx.h b/riscv/insns/vwadd_vx.h new file mode 100644 index 0000000000..c226389342 --- /dev/null +++ b/riscv/insns/vwadd_vx.h @@ -0,0 +1,6 @@ +// vwadd.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, +, +, int); +}) diff --git a/riscv/insns/vwadd_wv.h b/riscv/insns/vwadd_wv.h new file mode 100644 index 0000000000..54d2ba4072 --- /dev/null +++ b/riscv/insns/vwadd_wv.h @@ -0,0 +1,6 @@ +// vwadd.wv vd, vs2, vs1 +VI_CHECK_DDS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(vs1, +, int); +}) diff --git a/riscv/insns/vwadd_wx.h b/riscv/insns/vwadd_wx.h new file mode 100644 index 0000000000..bb4cee5100 --- /dev/null +++ b/riscv/insns/vwadd_wx.h @@ -0,0 +1,6 @@ +// vwaddu.wx vd, vs2, rs1 +VI_CHECK_DDS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(rs1, +, int); +}) diff --git a/riscv/insns/vwaddu_vv.h b/riscv/insns/vwaddu_vv.h new file mode 100644 index 0000000000..286ebc858e --- /dev/null +++ b/riscv/insns/vwaddu_vv.h @@ -0,0 +1,6 @@ +// vwaddu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, +, +, uint); +}) diff --git a/riscv/insns/vwaddu_vx.h b/riscv/insns/vwaddu_vx.h new file mode 100644 index 0000000000..61cddfc897 --- /dev/null +++ b/riscv/insns/vwaddu_vx.h @@ -0,0 +1,6 @@ +// vwaddu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, +, +, uint); +}) diff --git a/riscv/insns/vwaddu_wv.h b/riscv/insns/vwaddu_wv.h new file mode 100644 index 0000000000..fee813657e --- /dev/null +++ b/riscv/insns/vwaddu_wv.h @@ -0,0 +1,6 @@ +// vwaddu.wv vd, vs2, vs1 +VI_CHECK_DDS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(vs1, +, uint); +}) diff --git a/riscv/insns/vwaddu_wx.h b/riscv/insns/vwaddu_wx.h new file mode 100644 index 0000000000..0073ac35c5 --- /dev/null +++ b/riscv/insns/vwaddu_wx.h @@ -0,0 +1,6 @@ +// vwaddu.wx vd, vs2, rs1 +VI_CHECK_DDS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(rs1, +, uint); +}) diff --git a/riscv/insns/vwmacc_vv.h b/riscv/insns/vwmacc_vv.h new file mode 100644 index 0000000000..7208c6d696 --- /dev/null +++ b/riscv/insns/vwmacc_vv.h @@ -0,0 +1,6 @@ +// vwmacc.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, vd_w, *, +, int); +}) diff --git a/riscv/insns/vwmacc_vx.h b/riscv/insns/vwmacc_vx.h new file mode 100644 index 0000000000..5ae597a267 --- /dev/null +++ b/riscv/insns/vwmacc_vx.h @@ -0,0 +1,6 @@ +// vwmacc.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, vd_w, *, +, int); +}) diff --git a/riscv/insns/vwmaccsu_vv.h b/riscv/insns/vwmaccsu_vv.h new file mode 100644 index 0000000000..3aa43ef44d --- /dev/null +++ b/riscv/insns/vwmaccsu_vv.h @@ -0,0 +1,6 @@ +// vwmaccsu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN_MIX(vs2, vs1, vd_w, *, +, int, uint, int); +}) diff --git a/riscv/insns/vwmaccsu_vx.h b/riscv/insns/vwmaccsu_vx.h new file mode 100644 index 0000000000..e00a21ddc1 --- /dev/null +++ b/riscv/insns/vwmaccsu_vx.h @@ -0,0 +1,6 @@ +// vwmaccsu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN_MIX(vs2, rs1, vd_w, *, +, int, uint, int); +}) diff --git a/riscv/insns/vwmaccu_vv.h b/riscv/insns/vwmaccu_vv.h new file mode 100644 index 0000000000..2cbdaa312b --- /dev/null +++ b/riscv/insns/vwmaccu_vv.h @@ -0,0 +1,6 @@ +// vwmaccu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, vd_w, *, +, uint); +}) diff --git a/riscv/insns/vwmaccu_vx.h b/riscv/insns/vwmaccu_vx.h new file mode 100644 index 0000000000..533297f3fa --- /dev/null +++ b/riscv/insns/vwmaccu_vx.h @@ -0,0 +1,6 @@ +// vwmaccu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, vd_w, *, +, uint); +}) diff --git a/riscv/insns/vwmaccus_vx.h b/riscv/insns/vwmaccus_vx.h new file mode 100644 index 0000000000..5310f0e9be --- /dev/null +++ b/riscv/insns/vwmaccus_vx.h @@ -0,0 +1,6 @@ +// vwmaccus.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN_MIX(vs2, rs1, vd_w, *, +, int, int, uint); +}) diff --git a/riscv/insns/vwmul_vv.h b/riscv/insns/vwmul_vv.h new file mode 100644 index 0000000000..2197edbfbf --- /dev/null +++ b/riscv/insns/vwmul_vv.h @@ -0,0 +1,6 @@ +// vwmul.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, *, +, int); +}) diff --git a/riscv/insns/vwmul_vx.h b/riscv/insns/vwmul_vx.h new file mode 100644 index 0000000000..bc1422d400 --- /dev/null +++ b/riscv/insns/vwmul_vx.h @@ -0,0 +1,6 @@ +// vwmul.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, *, +, int); +}) diff --git a/riscv/insns/vwmulsu_vv.h b/riscv/insns/vwmulsu_vv.h new file mode 100644 index 0000000000..5f84721f6f --- /dev/null +++ b/riscv/insns/vwmulsu_vv.h @@ -0,0 +1,16 @@ +// vwmulsu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + switch(P.VU.vsew) { + case e8: + P.VU.elt(rd_num, i, true) = (int16_t)(int8_t)vs2 * (int16_t)(uint8_t)vs1; + break; + case e16: + P.VU.elt(rd_num, i, true) = (int32_t)(int16_t)vs2 * (int32_t)(uint16_t)vs1; + break; + default: + P.VU.elt(rd_num, i, true) = (int64_t)(int32_t)vs2 * (int64_t)(uint32_t)vs1; + break; + } +}) diff --git a/riscv/insns/vwmulsu_vx.h b/riscv/insns/vwmulsu_vx.h new file mode 100644 index 0000000000..68d6d276ea --- /dev/null +++ b/riscv/insns/vwmulsu_vx.h @@ -0,0 +1,16 @@ +// vwmulsu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + switch(P.VU.vsew) { + case e8: + P.VU.elt(rd_num, i, true) = (int16_t)(int8_t)vs2 * (int16_t)(uint8_t)rs1; + break; + case e16: + P.VU.elt(rd_num, i, true) = (int32_t)(int16_t)vs2 * (int32_t)(uint16_t)rs1; + break; + default: + P.VU.elt(rd_num, i, true) = (int64_t)(int32_t)vs2 * (int64_t)(uint32_t)rs1; + break; + } +}) diff --git a/riscv/insns/vwmulu_vv.h b/riscv/insns/vwmulu_vv.h new file mode 100644 index 0000000000..8ddbb4b488 --- /dev/null +++ b/riscv/insns/vwmulu_vv.h @@ -0,0 +1,6 @@ +// vwmulu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, *, +, uint); +}) diff --git a/riscv/insns/vwmulu_vx.h b/riscv/insns/vwmulu_vx.h new file mode 100644 index 0000000000..1ce77eefdc --- /dev/null +++ b/riscv/insns/vwmulu_vx.h @@ -0,0 +1,6 @@ +// vwmul.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, *, +, uint); +}) diff --git a/riscv/insns/vwredsum_vs.h b/riscv/insns/vwredsum_vs.h new file mode 100644 index 0000000000..c7a87db431 --- /dev/null +++ b/riscv/insns/vwredsum_vs.h @@ -0,0 +1,5 @@ +// vwredsum.vs vd, vs2, vs1 +VI_VV_LOOP_WIDE_REDUCTION +({ + vd_0_res += vs2; +}) diff --git a/riscv/insns/vwredsumu_vs.h b/riscv/insns/vwredsumu_vs.h new file mode 100644 index 0000000000..889a77d310 --- /dev/null +++ b/riscv/insns/vwredsumu_vs.h @@ -0,0 +1,5 @@ +// vwredsum.vs vd, vs2, vs1 +VI_VV_ULOOP_WIDE_REDUCTION +({ + vd_0_res += vs2; +}) diff --git a/riscv/insns/vwsub_vv.h b/riscv/insns/vwsub_vv.h new file mode 100644 index 0000000000..99f9348985 --- /dev/null +++ b/riscv/insns/vwsub_vv.h @@ -0,0 +1,6 @@ +// vwsub.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, -, +, int); +}) diff --git a/riscv/insns/vwsub_vx.h b/riscv/insns/vwsub_vx.h new file mode 100644 index 0000000000..affdf62ce0 --- /dev/null +++ b/riscv/insns/vwsub_vx.h @@ -0,0 +1,6 @@ +// vwsub.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, -, +, int); +}) diff --git a/riscv/insns/vwsub_wv.h b/riscv/insns/vwsub_wv.h new file mode 100644 index 0000000000..10db7308e2 --- /dev/null +++ b/riscv/insns/vwsub_wv.h @@ -0,0 +1,6 @@ +// vwsub.wv vd, vs2, vs1 +VI_CHECK_DDS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(vs1, -, int); +}) diff --git a/riscv/insns/vwsub_wx.h b/riscv/insns/vwsub_wx.h new file mode 100644 index 0000000000..f72341ba80 --- /dev/null +++ b/riscv/insns/vwsub_wx.h @@ -0,0 +1,6 @@ +// vwsub.wx vd, vs2, rs1 +VI_CHECK_DDS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(rs1, -, int); +}) diff --git a/riscv/insns/vwsubu_vv.h b/riscv/insns/vwsubu_vv.h new file mode 100644 index 0000000000..cf68adb9fa --- /dev/null +++ b/riscv/insns/vwsubu_vv.h @@ -0,0 +1,6 @@ +// vwsubu.vv vd, vs2, vs1 +VI_CHECK_DSS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, -, +, uint); +}) diff --git a/riscv/insns/vwsubu_vx.h b/riscv/insns/vwsubu_vx.h new file mode 100644 index 0000000000..3e972dd211 --- /dev/null +++ b/riscv/insns/vwsubu_vx.h @@ -0,0 +1,6 @@ +// vwsubu.vx vd, vs2, rs1 +VI_CHECK_DSS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, -, +, uint); +}) diff --git a/riscv/insns/vwsubu_wv.h b/riscv/insns/vwsubu_wv.h new file mode 100644 index 0000000000..3687c3d237 --- /dev/null +++ b/riscv/insns/vwsubu_wv.h @@ -0,0 +1,6 @@ +// vwsubu.wv vd, vs2, vs1 +VI_CHECK_DDS(true); +VI_VV_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(vs1, -, uint); +}) diff --git a/riscv/insns/vwsubu_wx.h b/riscv/insns/vwsubu_wx.h new file mode 100644 index 0000000000..c7f20edd79 --- /dev/null +++ b/riscv/insns/vwsubu_wx.h @@ -0,0 +1,6 @@ +// vwsubu.wx vd, vs2, rs1 +VI_CHECK_DDS(false); +VI_VX_LOOP_WIDEN +({ + VI_WIDE_WVX_OP(rs1, -, uint); +}) diff --git a/riscv/insns/vxor_vi.h b/riscv/insns/vxor_vi.h new file mode 100644 index 0000000000..b2dcf946dc --- /dev/null +++ b/riscv/insns/vxor_vi.h @@ -0,0 +1,5 @@ +// vxor +VI_VI_LOOP +({ + vd = simm5 ^ vs2; +}) diff --git a/riscv/insns/vxor_vv.h b/riscv/insns/vxor_vv.h new file mode 100644 index 0000000000..c37b6ab729 --- /dev/null +++ b/riscv/insns/vxor_vv.h @@ -0,0 +1,5 @@ +// vxor +VI_VV_LOOP +({ + vd = vs1 ^ vs2; +}) diff --git a/riscv/insns/vxor_vx.h b/riscv/insns/vxor_vx.h new file mode 100644 index 0000000000..8021e0e851 --- /dev/null +++ b/riscv/insns/vxor_vx.h @@ -0,0 +1,5 @@ +// vxor +VI_VX_LOOP +({ + vd = rs1 ^ vs2; +}) diff --git a/riscv/insns/vzext_vf2.h b/riscv/insns/vzext_vf2.h new file mode 100644 index 0000000000..100f2e359a --- /dev/null +++ b/riscv/insns/vzext_vf2.h @@ -0,0 +1 @@ +VI_VV_EXT(2, uint); diff --git a/riscv/insns/vzext_vf4.h b/riscv/insns/vzext_vf4.h new file mode 100644 index 0000000000..6ff920e0bc --- /dev/null +++ b/riscv/insns/vzext_vf4.h @@ -0,0 +1 @@ +VI_VV_EXT(4, uint); diff --git a/riscv/insns/vzext_vf8.h b/riscv/insns/vzext_vf8.h new file mode 100644 index 0000000000..b1762fbf67 --- /dev/null +++ b/riscv/insns/vzext_vf8.h @@ -0,0 +1 @@ +VI_VV_EXT(8, uint); diff --git a/riscv/insns/wfi.h b/riscv/insns/wfi.h index 6504b78c60..59ed35bb6f 100644 --- a/riscv/insns/wfi.h +++ b/riscv/insns/wfi.h @@ -1,2 +1,11 @@ -require_privilege(get_field(STATE.mstatus, MSTATUS_TW) ? PRV_M : PRV_S); +if (STATE.v && STATE.prv == PRV_U) { + require_novirt(); +} else if (get_field(STATE.mstatus, MSTATUS_TW)) { + require_privilege(PRV_M); +} else if (STATE.v) { // VS-mode + if (get_field(STATE.hstatus, HSTATUS_VTW)) + require_novirt(); +} else { + require_privilege(PRV_S); +} wfi(); diff --git a/riscv/interactive.cc b/riscv/interactive.cc index c96c71ace7..00e505d896 100644 --- a/riscv/interactive.cc +++ b/riscv/interactive.cc @@ -66,8 +66,10 @@ void sim_t::interactive() funcs["run"] = &sim_t::interactive_run_noisy; funcs["r"] = funcs["run"]; funcs["rs"] = &sim_t::interactive_run_silent; + funcs["vreg"] = &sim_t::interactive_vreg; funcs["reg"] = &sim_t::interactive_reg; funcs["freg"] = &sim_t::interactive_freg; + funcs["fregh"] = &sim_t::interactive_fregh; funcs["fregs"] = &sim_t::interactive_fregs; funcs["fregd"] = &sim_t::interactive_fregd; funcs["pc"] = &sim_t::interactive_pc; @@ -107,7 +109,7 @@ void sim_t::interactive() else fprintf(stderr, "Unknown command %s\n", cmd.c_str()); } - catch(trap_t t) {} + catch(trap_t& t) {} } ctrlc_pressed = false; } @@ -117,8 +119,10 @@ void sim_t::interactive_help(const std::string& cmd, const std::vector [reg] # Display [reg] (all if omitted) in \n" + "fregh # Display half precision in \n" "fregs # Display single precision in \n" "fregd # Display double precision in \n" + "vreg [reg] # Display vector [reg] (all if omitted) in \n" "pc # Show current PC in \n" "mem # Show contents of physical memory\n" "str # Show NUL-terminated C string\n" @@ -218,6 +222,54 @@ freg_t sim_t::get_freg(const std::vector& args) return p->get_state()->FPR[r]; } +void sim_t::interactive_vreg(const std::string& cmd, const std::vector& args) +{ + int rstart = 0; + int rend = NVPR; + if (args.size() >= 2) { + rstart = strtol(args[1].c_str(), NULL, 0); + if (!(rstart >= 0 && rstart < NVPR)) { + rstart = 0; + } else { + rend = rstart + 1; + } + } + + // Show all the regs! + processor_t *p = get_core(args[0]); + const int vlen = (int)(p->VU.get_vlen()) >> 3; + const int elen = (int)(p->VU.get_elen()) >> 3; + const int num_elem = vlen/elen; + fprintf(stderr, "VLEN=%d bits; ELEN=%d bits\n", vlen << 3, elen << 3); + + for (int r = rstart; r < rend; ++r) { + fprintf(stderr, "%-4s: ", vr_name[r]); + for (int e = num_elem-1; e >= 0; --e){ + uint64_t val; + switch(elen){ + case 8: + val = P.VU.elt(r, e); + fprintf(stderr, "[%d]: 0x%016" PRIx64 " ", e, val); + break; + case 4: + val = P.VU.elt(r, e); + fprintf(stderr, "[%d]: 0x%08" PRIx32 " ", e, (uint32_t)val); + break; + case 2: + val = P.VU.elt(r, e); + fprintf(stderr, "[%d]: 0x%08" PRIx16 " ", e, (uint16_t)val); + break; + case 1: + val = P.VU.elt(r, e); + fprintf(stderr, "[%d]: 0x%08" PRIx8 " ", e, (uint8_t)val); + break; + } + } + fprintf(stderr, "\n"); + } +} + + void sim_t::interactive_reg(const std::string& cmd, const std::vector& args) { if (args.size() == 1) { @@ -246,6 +298,13 @@ void sim_t::interactive_freg(const std::string& cmd, const std::vector& args) +{ + fpr f; + f.r = freg(f16_to_f32(f16(get_freg(args)))); + fprintf(stderr, "%g\n", isBoxedF32(f.r) ? (double)f.s : NAN); +} + void sim_t::interactive_fregs(const std::string& cmd, const std::vector& args) { fpr f; @@ -361,7 +420,7 @@ void sim_t::interactive_until(const std::string& cmd, const std::vector 0) + rti_remaining--; + dm->run_test_idle(); + break; case TEST_LOGIC_RESET: ir = IR_IDCODE; break; @@ -136,7 +145,12 @@ void jtag_dtm_t::capture_dr() dr_length = 32; break; case IR_DBUS: - dr = dmi; + if (rti_remaining > 0 || busy_stuck) { + dr = DMI_OP_STATUS_BUSY; + busy_stuck = true; + } else { + dr = dmi; + } dr_length = abits + 34; break; default: @@ -151,34 +165,37 @@ void jtag_dtm_t::update_dr() { D(fprintf(stderr, "Update DR; IR=0x%x, DR=0x%lx (%d bits)\n", ir, dr, dr_length)); - switch (ir) { - case IR_DBUS: - { - unsigned op = get_field(dr, DMI_OP); - uint32_t data = get_field(dr, DMI_DATA); - unsigned address = get_field(dr, DMI_ADDRESS); - - dmi = dr; - - bool success = true; - if (op == DMI_OP_READ) { - uint32_t value; - if (dm->dmi_read(address, &value)) { - dmi = set_field(dmi, DMI_DATA, value); - } else { - success = false; - } - } else if (op == DMI_OP_WRITE) { - success = dm->dmi_write(address, data); - } - - if (success) { - dmi = set_field(dmi, DMI_OP, DMI_OP_STATUS_SUCCESS); - } else { - dmi = set_field(dmi, DMI_OP, DMI_OP_STATUS_FAILED); - } - D(fprintf(stderr, "dmi=0x%lx\n", dmi)); + if (ir == IR_DTMCONTROL) { + if (dr & DTMCONTROL_DMIRESET) + busy_stuck = false; + if (dr & DTMCONTROL_DMIHARDRESET) + reset(); + } else if (ir == IR_DBUS && !busy_stuck) { + unsigned op = get_field(dr, DMI_OP); + uint32_t data = get_field(dr, DMI_DATA); + unsigned address = get_field(dr, DMI_ADDRESS); + + dmi = dr; + + bool success = true; + if (op == DMI_OP_READ) { + uint32_t value; + if (dm->dmi_read(address, &value)) { + dmi = set_field(dmi, DMI_DATA, value); + } else { + success = false; } - break; + } else if (op == DMI_OP_WRITE) { + success = dm->dmi_write(address, data); + } + + if (success) { + dmi = set_field(dmi, DMI_OP, DMI_OP_STATUS_SUCCESS); + } else { + dmi = set_field(dmi, DMI_OP, DMI_OP_STATUS_FAILED); + } + D(fprintf(stderr, "dmi=0x%lx\n", dmi)); + + rti_remaining = required_rti_cycles; } } diff --git a/riscv/jtag_dtm.h b/riscv/jtag_dtm.h index 063e3f40b3..3482b8a06b 100644 --- a/riscv/jtag_dtm.h +++ b/riscv/jtag_dtm.h @@ -29,7 +29,7 @@ class jtag_dtm_t static const unsigned idcode = 0xdeadbeef; public: - jtag_dtm_t(debug_module_t *dm); + jtag_dtm_t(debug_module_t *dm, unsigned required_rti_cycles); void reset(); void set_pins(bool tck, bool tms, bool tdi); @@ -40,6 +40,9 @@ class jtag_dtm_t private: debug_module_t *dm; + // The number of Run-Test/Idle cycles required before a DMI access is + // complete. + unsigned required_rti_cycles; bool _tck, _tms, _tdi, _tdo; uint32_t ir; const unsigned ir_length = 5; @@ -51,6 +54,10 @@ class jtag_dtm_t const unsigned abits = 6; uint32_t dtmcontrol; uint64_t dmi; + // Number of Run-Test/Idle cycles needed before we call this access + // complete. + unsigned rti_remaining; + bool busy_stuck; jtag_state_t _state; diff --git a/riscv/log_file.h b/riscv/log_file.h new file mode 100644 index 0000000000..d039859dc6 --- /dev/null +++ b/riscv/log_file.h @@ -0,0 +1,37 @@ +// See LICENSE for license details. +#ifndef _RISCV_LOGFILE_H +#define _RISCV_LOGFILE_H + +#include +#include +#include +#include + +// Header-only class wrapping a log file. When constructed with an +// actual path, it opens the named file for writing. When constructed +// with the null path, it wraps stderr. +class log_file_t +{ +public: + log_file_t(const char *path) + : wrapped_file (nullptr, &fclose) + { + if (!path) + return; + + wrapped_file.reset(fopen(path, "w")); + if (! wrapped_file) { + std::ostringstream oss; + oss << "Failed to open log file at `" << path << "': " + << strerror (errno); + throw std::runtime_error(oss.str()); + } + } + + FILE *get() { return wrapped_file ? wrapped_file.get() : stderr; } + +private: + std::unique_ptr wrapped_file; +}; + +#endif diff --git a/riscv/mmio_plugin.h b/riscv/mmio_plugin.h new file mode 100644 index 0000000000..f14470bf38 --- /dev/null +++ b/riscv/mmio_plugin.h @@ -0,0 +1,91 @@ +#ifndef _RISCV_MMIO_PLUGIN_H +#define _RISCV_MMIO_PLUGIN_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + +typedef uint64_t reg_t; + +typedef struct { + // Allocate user data for an instance of the plugin. The parameter is a simple + // c-string containing arguments used to construct the plugin. It returns a + // void* to the allocated data. + void* (*alloc)(const char*); + + // Load a memory address of the MMIO plugin. The parameters are the user_data + // (void*), memory offset (reg_t), number of bytes to load (size_t), and the + // buffer into which the loaded data should be written (uint8_t*). Return true + // if the load is successful and false otherwise. + bool (*load)(void*, reg_t, size_t, uint8_t*); + + // Store some bytes to a memory address of the MMIO plugin. The parameters are + // the user_data (void*), memory offset (reg_t), number of bytes to store + // (size_t), and the buffer containing the data to be stored (const uint8_t*). + // Return true if the store is successful and false otherwise. + bool (*store)(void*, reg_t, size_t, const uint8_t*); + + // Deallocate the data allocated during the call to alloc. The parameter is a + // pointer to the user data allocated during the call to alloc. + void (*dealloc)(void*); +} mmio_plugin_t; + +// Register an mmio plugin with the application. This should be called by +// plugins as part of their loading process. +extern void register_mmio_plugin(const char* name_cstr, + const mmio_plugin_t* mmio_plugin); + +#ifdef __cplusplus +} + +#include + +// Wrapper around the C plugin API that makes registering a C++ class with +// correctly formed constructor, load, and store functions easier. The template +// type should be the type that implements the MMIO plugin interface. Simply +// make a global mmio_plugin_registration_t and your plugin should register +// itself with the application when it is loaded because the +// mmio_plugin_registration_t constructor will be called. +template +struct mmio_plugin_registration_t +{ + static void* alloc(const char* args) + { + return reinterpret_cast(new T(std::string(args))); + } + + static bool load(void* self, reg_t addr, size_t len, uint8_t* bytes) + { + return reinterpret_cast(self)->load(addr, len, bytes); + } + + static bool store(void* self, reg_t addr, size_t len, const uint8_t* bytes) + { + return reinterpret_cast(self)->store(addr, len, bytes); + } + + static void dealloc(void* self) + { + delete reinterpret_cast(self); + } + + mmio_plugin_registration_t(const std::string& name) + { + mmio_plugin_t plugin = { + mmio_plugin_registration_t::alloc, + mmio_plugin_registration_t::load, + mmio_plugin_registration_t::store, + mmio_plugin_registration_t::dealloc, + }; + + register_mmio_plugin(name.c_str(), &plugin); + } +}; +#endif // __cplusplus + +#endif diff --git a/riscv/mmu.cc b/riscv/mmu.cc index 3e1fc25552..e8dca6a85d 100644 --- a/riscv/mmu.cc +++ b/riscv/mmu.cc @@ -37,39 +37,51 @@ void mmu_t::flush_tlb() static void throw_access_exception(reg_t addr, access_type type) { switch (type) { - case FETCH: throw trap_instruction_access_fault(addr); - case LOAD: throw trap_load_access_fault(addr); - case STORE: throw trap_store_access_fault(addr); + case FETCH: throw trap_instruction_access_fault(addr, 0, 0); + case LOAD: throw trap_load_access_fault(addr, 0, 0); + case STORE: throw trap_store_access_fault(addr, 0, 0); default: abort(); } } -reg_t mmu_t::translate(reg_t addr, reg_t len, access_type type) +reg_t mmu_t::translate(reg_t addr, reg_t len, access_type type, uint32_t xlate_flags) { if (!proc) return addr; + bool mxr = get_field(proc->state.mstatus, MSTATUS_MXR); + bool virt = proc->state.v; reg_t mode = proc->state.prv; if (type != FETCH) { - if (!proc->state.dcsr.cause && get_field(proc->state.mstatus, MSTATUS_MPRV)) + if (!proc->state.debug_mode && get_field(proc->state.mstatus, MSTATUS_MPRV)) { mode = get_field(proc->state.mstatus, MSTATUS_MPP); + if (get_field(proc->state.mstatus, MSTATUS_MPV)) + virt = true; + } + if (!proc->state.debug_mode && (xlate_flags & RISCV_XLATE_VIRT)) { + virt = true; + mode = get_field(proc->state.hstatus, HSTATUS_SPVP); + if (type == LOAD && (xlate_flags & RISCV_XLATE_VIRT_MXR)) { + mxr = true; + } + } } - reg_t paddr = walk(addr, type, mode) | (addr & (PGSIZE-1)); - if (!pmp_ok(paddr, type, mode) || !pmp_homogeneous(paddr, len)) + reg_t paddr = walk(addr, type, mode, virt, mxr) | (addr & (PGSIZE-1)); + if (!pmp_ok(paddr, len, type, mode)) throw_access_exception(addr, type); return paddr; } tlb_entry_t mmu_t::fetch_slow_path(reg_t vaddr) { - reg_t paddr = translate(vaddr, sizeof(fetch_temp), FETCH); + reg_t paddr = translate(vaddr, sizeof(fetch_temp), FETCH, 0); if (auto host_addr = sim->addr_to_mem(paddr)) { return refill_tlb(vaddr, paddr, host_addr, FETCH); } else { - if (!sim->mmio_load(paddr, sizeof fetch_temp, (uint8_t*)&fetch_temp)) - throw trap_instruction_access_fault(vaddr); + if (!mmio_load(paddr, sizeof fetch_temp, (uint8_t*)&fetch_temp)) + throw trap_instruction_access_fault(vaddr, 0, 0); tlb_entry_t entry = {(char*)&fetch_temp - vaddr, paddr - vaddr}; return entry; } @@ -101,9 +113,34 @@ reg_t reg_from_bytes(size_t len, const uint8_t* bytes) abort(); } -void mmu_t::load_slow_path(reg_t addr, reg_t len, uint8_t* bytes) +bool mmu_t::mmio_ok(reg_t addr, access_type type) +{ + // Disallow access to debug region when not in debug mode + if (addr >= DEBUG_START && addr <= DEBUG_END && proc && !proc->state.debug_mode) + return false; + + return true; +} + +bool mmu_t::mmio_load(reg_t addr, size_t len, uint8_t* bytes) +{ + if (!mmio_ok(addr, LOAD)) + return false; + + return sim->mmio_load(addr, len, bytes); +} + +bool mmu_t::mmio_store(reg_t addr, size_t len, const uint8_t* bytes) +{ + if (!mmio_ok(addr, STORE)) + return false; + + return sim->mmio_store(addr, len, bytes); +} + +void mmu_t::load_slow_path(reg_t addr, reg_t len, uint8_t* bytes, uint32_t xlate_flags) { - reg_t paddr = translate(addr, len, LOAD); + reg_t paddr = translate(addr, len, LOAD, xlate_flags); if (auto host_addr = sim->addr_to_mem(paddr)) { memcpy(bytes, host_addr, len); @@ -111,8 +148,8 @@ void mmu_t::load_slow_path(reg_t addr, reg_t len, uint8_t* bytes) tracer.trace(paddr, len, LOAD); else refill_tlb(addr, paddr, host_addr, LOAD); - } else if (!sim->mmio_load(paddr, len, bytes)) { - throw trap_load_access_fault(addr); + } else if (!mmio_load(paddr, len, bytes)) { + throw trap_load_access_fault(addr, 0, 0); } if (!matched_trigger) { @@ -123,9 +160,9 @@ void mmu_t::load_slow_path(reg_t addr, reg_t len, uint8_t* bytes) } } -void mmu_t::store_slow_path(reg_t addr, reg_t len, const uint8_t* bytes) +void mmu_t::store_slow_path(reg_t addr, reg_t len, const uint8_t* bytes, uint32_t xlate_flags) { - reg_t paddr = translate(addr, len, STORE); + reg_t paddr = translate(addr, len, STORE, xlate_flags); if (!matched_trigger) { reg_t data = reg_from_bytes(len, bytes); @@ -140,8 +177,8 @@ void mmu_t::store_slow_path(reg_t addr, reg_t len, const uint8_t* bytes) tracer.trace(paddr, len, STORE); else refill_tlb(addr, paddr, host_addr, STORE); - } else if (!sim->mmio_store(paddr, len, bytes)) { - throw trap_store_access_fault(addr); + } else if (!mmio_store(paddr, len, bytes)) { + throw trap_store_access_fault(addr, 0, 0); } } @@ -173,26 +210,40 @@ tlb_entry_t mmu_t::refill_tlb(reg_t vaddr, reg_t paddr, char* host_addr, access_ return entry; } -reg_t mmu_t::pmp_ok(reg_t addr, access_type type, reg_t mode) +reg_t mmu_t::pmp_ok(reg_t addr, reg_t len, access_type type, reg_t mode) { - if (!proc) + if (!proc || proc->n_pmp == 0) return true; reg_t base = 0; - for (size_t i = 0; i < proc->state.n_pmp; i++) { - reg_t tor = proc->state.pmpaddr[i] << PMP_SHIFT; + for (size_t i = 0; i < proc->n_pmp; i++) { + reg_t tor = (proc->state.pmpaddr[i] & proc->pmp_tor_mask()) << PMP_SHIFT; uint8_t cfg = proc->state.pmpcfg[i]; if (cfg & PMP_A) { bool is_tor = (cfg & PMP_A) == PMP_TOR; bool is_na4 = (cfg & PMP_A) == PMP_NA4; - reg_t mask = (proc->state.pmpaddr[i] << 1) | (!is_na4); + reg_t mask = (proc->state.pmpaddr[i] << 1) | (!is_na4) | ~proc->pmp_tor_mask(); mask = ~(mask & ~(mask + 1)) << PMP_SHIFT; - bool napot_match = ((addr ^ tor) & mask) == 0; - bool tor_match = base <= addr && addr < tor; - if (is_tor ? tor_match : napot_match) { + // Check each 4-byte sector of the access + bool any_match = false; + bool all_match = true; + for (reg_t offset = 0; offset < len; offset += 1 << PMP_SHIFT) { + reg_t cur_addr = addr + offset; + bool napot_match = ((cur_addr ^ tor) & mask) == 0; + bool tor_match = base <= cur_addr && cur_addr < tor; + bool match = is_tor ? tor_match : napot_match; + any_match |= match; + all_match &= match; + } + + if (any_match) { + // If the PMP matches only a strict subset of the access, fail it + if (!all_match) + return false; + return (mode == PRV_M && !(cfg & PMP_L)) || (type == LOAD && (cfg & PMP_R)) || @@ -216,8 +267,8 @@ reg_t mmu_t::pmp_homogeneous(reg_t addr, reg_t len) return true; reg_t base = 0; - for (size_t i = 0; i < proc->state.n_pmp; i++) { - reg_t tor = proc->state.pmpaddr[i] << PMP_SHIFT; + for (size_t i = 0; i < proc->n_pmp; i++) { + reg_t tor = (proc->state.pmpaddr[i] & proc->pmp_tor_mask()) << PMP_SHIFT; uint8_t cfg = proc->state.pmpcfg[i]; if (cfg & PMP_A) { @@ -231,7 +282,7 @@ reg_t mmu_t::pmp_homogeneous(reg_t addr, reg_t len) bool tor_homogeneous = ends_before_lower || begins_after_upper || (begins_after_lower && ends_before_upper); - reg_t mask = (proc->state.pmpaddr[i] << 1) | (!is_na4); + reg_t mask = (proc->state.pmpaddr[i] << 1) | (!is_na4) | ~proc->pmp_tor_mask(); mask = ~(mask & ~(mask + 1)) << PMP_SHIFT; bool mask_homogeneous = ~(mask << 1) & len; bool napot_homogeneous = mask_homogeneous || ((addr ^ tor) / len) != 0; @@ -246,15 +297,82 @@ reg_t mmu_t::pmp_homogeneous(reg_t addr, reg_t len) return true; } -reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode) +reg_t mmu_t::s2xlate(reg_t gva, reg_t gpa, access_type type, bool virt, bool mxr) +{ + if (!virt) + return gpa; + + vm_info vm = decode_vm_info(proc->max_xlen, true, 0, proc->get_state()->hgatp); + if (vm.levels == 0) + return gpa; + + reg_t base = vm.ptbase; + for (int i = vm.levels - 1; i >= 0; i--) { + int ptshift = i * vm.idxbits; + int idxbits = (i == (vm.levels - 1)) ? vm.idxbits + vm.widenbits : vm.idxbits; + reg_t idx = (gpa >> (PGSHIFT + ptshift)) & ((reg_t(1) << idxbits) - 1); + + // check that physical address of PTE is legal + auto pte_paddr = base + idx * vm.ptesize; + auto ppte = sim->addr_to_mem(pte_paddr); + if (!ppte || !pmp_ok(pte_paddr, vm.ptesize, LOAD, PRV_S)) { + throw_access_exception(gva, type); + } + + reg_t pte = vm.ptesize == 4 ? from_le(*(uint32_t*)ppte) : from_le(*(uint64_t*)ppte); + reg_t ppn = pte >> PTE_PPN_SHIFT; + + if (PTE_TABLE(pte)) { // next level of page table + base = ppn << PGSHIFT; + } else if (!(pte & PTE_V) || (!(pte & PTE_R) && (pte & PTE_W))) { + break; + } else if (!(pte & PTE_U)) { + break; + } else if (type == FETCH ? !(pte & PTE_X) : + type == LOAD ? !(pte & PTE_R) && !(mxr && (pte & PTE_X)) : + !((pte & PTE_R) && (pte & PTE_W))) { + break; + } else if ((ppn & ((reg_t(1) << ptshift) - 1)) != 0) { + break; + } else { + reg_t ad = PTE_A | ((type == STORE) * PTE_D); +#ifdef RISCV_ENABLE_DIRTY + // set accessed and possibly dirty bits. + if ((pte & ad) != ad) { + if (!pmp_ok(pte_paddr, vm.ptesize, STORE, PRV_S)) + throw_access_exception(gva, type); + *(uint32_t*)ppte |= to_le((uint32_t)ad); + } +#else + // take exception if access or possibly dirty bit is not set. + if ((pte & ad) != ad) + break; +#endif + reg_t vpn = gpa >> PGSHIFT; + reg_t page_mask = (reg_t(1) << PGSHIFT) - 1; + reg_t page_base = (ppn | (vpn & ((reg_t(1) << ptshift) - 1))) << PGSHIFT; + return page_base | (gpa & page_mask); + } + } + + switch (type) { + case FETCH: throw trap_instruction_guest_page_fault(gva, gpa >> 2, 0); + case LOAD: throw trap_load_guest_page_fault(gva, gpa >> 2, 0); + case STORE: throw trap_store_guest_page_fault(gva, gpa >> 2, 0); + default: abort(); + } +} + +reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode, bool virt, bool mxr) { - vm_info vm = decode_vm_info(proc->max_xlen, mode, proc->get_state()->satp); + reg_t page_mask = (reg_t(1) << PGSHIFT) - 1; + reg_t satp = (virt) ? proc->get_state()->vsatp : proc->get_state()->satp; + vm_info vm = decode_vm_info(proc->max_xlen, false, mode, satp); if (vm.levels == 0) - return addr & ((reg_t(2) << (proc->xlen-1))-1); // zero-extend from xlen + return s2xlate(addr, addr & ((reg_t(2) << (proc->xlen-1))-1), type, virt, mxr) & ~page_mask; // zero-extend from xlen bool s_mode = mode == PRV_S; bool sum = get_field(proc->state.mstatus, MSTATUS_SUM); - bool mxr = get_field(proc->state.mstatus, MSTATUS_MXR); // verify bits xlen-1:va_bits-1 are all equal int va_bits = PGSHIFT + vm.levels * vm.idxbits; @@ -269,12 +387,12 @@ reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode) reg_t idx = (addr >> (PGSHIFT + ptshift)) & ((1 << vm.idxbits) - 1); // check that physical address of PTE is legal - auto pte_paddr = base + idx * vm.ptesize; + auto pte_paddr = s2xlate(addr, base + idx * vm.ptesize, LOAD, virt, false); auto ppte = sim->addr_to_mem(pte_paddr); - if (!ppte || !pmp_ok(pte_paddr, LOAD, PRV_S)) + if (!ppte || !pmp_ok(pte_paddr, vm.ptesize, LOAD, PRV_S)) throw_access_exception(addr, type); - reg_t pte = vm.ptesize == 4 ? *(uint32_t*)ppte : *(uint64_t*)ppte; + reg_t pte = vm.ptesize == 4 ? from_le(*(uint32_t*)ppte) : from_le(*(uint64_t*)ppte); reg_t ppn = pte >> PTE_PPN_SHIFT; if (PTE_TABLE(pte)) { // next level of page table @@ -294,9 +412,9 @@ reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode) #ifdef RISCV_ENABLE_DIRTY // set accessed and possibly dirty bits. if ((pte & ad) != ad) { - if (!pmp_ok(pte_paddr, STORE, PRV_S)) + if (!pmp_ok(pte_paddr, vm.ptesize, STORE, PRV_S)) throw_access_exception(addr, type); - *(uint32_t*)ppte |= ad; + *(uint32_t*)ppte |= to_le((uint32_t)ad); } #else // take exception if access or possibly dirty bit is not set. @@ -305,15 +423,16 @@ reg_t mmu_t::walk(reg_t addr, access_type type, reg_t mode) #endif // for superpage mappings, make a fake leaf PTE for the TLB's benefit. reg_t vpn = addr >> PGSHIFT; - reg_t value = (ppn | (vpn & ((reg_t(1) << ptshift) - 1))) << PGSHIFT; - return value; + reg_t page_base = (ppn | (vpn & ((reg_t(1) << ptshift) - 1))) << PGSHIFT; + reg_t phys = page_base | (addr & page_mask); + return s2xlate(addr, phys, type, virt, mxr) & ~page_mask; } } switch (type) { - case FETCH: throw trap_instruction_page_fault(addr); - case LOAD: throw trap_load_page_fault(addr); - case STORE: throw trap_store_page_fault(addr); + case FETCH: throw trap_instruction_page_fault(addr, 0, 0); + case LOAD: throw trap_load_page_fault(addr, 0, 0); + case STORE: throw trap_store_page_fault(addr, 0, 0); default: abort(); } } diff --git a/riscv/mmu.h b/riscv/mmu.h index 7617367436..990f137287 100644 --- a/riscv/mmu.h +++ b/riscv/mmu.h @@ -10,6 +10,7 @@ #include "simif.h" #include "processor.h" #include "memtracer.h" +#include "byteorder.h" #include #include @@ -17,6 +18,7 @@ #define PGSHIFT 12 const reg_t PGSIZE = 1 << PGSHIFT; const reg_t PGMASK = ~(PGSIZE-1); +#define MAX_PADDR_BITS 56 // imposed by Sv39 / Sv48 struct insn_fetch_t { @@ -64,7 +66,7 @@ class mmu_t res += (reg_t)load_uint8(addr + i) << (i * 8); return res; #else - throw trap_load_address_misaligned(addr); + throw trap_load_address_misaligned(addr, 0, 0); #endif } @@ -74,80 +76,131 @@ class mmu_t for (size_t i = 0; i < size; i++) store_uint8(addr + i, data >> (i * 8)); #else - throw trap_store_address_misaligned(addr); + throw trap_store_address_misaligned(addr, 0, 0); #endif } +#ifndef RISCV_ENABLE_COMMITLOG +# define READ_MEM(addr, size) ({}) +#else +# define READ_MEM(addr, size) \ + proc->state.log_mem_read.push_back(std::make_tuple(addr, 0, size)); +#endif + +#define RISCV_XLATE_VIRT (1U << 0) +#define RISCV_XLATE_VIRT_MXR (1U << 1) + // template for functions that load an aligned value from memory - #define load_func(type) \ - inline type##_t load_##type(reg_t addr) { \ + #define load_func(type, prefix, xlate_flags) \ + inline type##_t prefix##_##type(reg_t addr) { \ + if (xlate_flags) \ + flush_tlb(); \ if (unlikely(addr & (sizeof(type##_t)-1))) \ return misaligned_load(addr, sizeof(type##_t)); \ reg_t vpn = addr >> PGSHIFT; \ - if (likely(tlb_load_tag[vpn % TLB_ENTRIES] == vpn)) \ - return *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr); \ + size_t size = sizeof(type##_t); \ + if (likely(tlb_load_tag[vpn % TLB_ENTRIES] == vpn)) { \ + if (proc) READ_MEM(addr, size); \ + return from_le(*(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr)); \ + } \ if (unlikely(tlb_load_tag[vpn % TLB_ENTRIES] == (vpn | TLB_CHECK_TRIGGERS))) { \ - type##_t data = *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr); \ + type##_t data = from_le(*(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr)); \ if (!matched_trigger) { \ matched_trigger = trigger_exception(OPERATION_LOAD, addr, data); \ if (matched_trigger) \ throw *matched_trigger; \ } \ + if (proc) READ_MEM(addr, size); \ return data; \ } \ type##_t res; \ - load_slow_path(addr, sizeof(type##_t), (uint8_t*)&res); \ - return res; \ + load_slow_path(addr, sizeof(type##_t), (uint8_t*)&res, (xlate_flags)); \ + if (proc) READ_MEM(addr, size); \ + if (xlate_flags) \ + flush_tlb(); \ + return from_le(res); \ } // load value from memory at aligned address; zero extend to register width - load_func(uint8) - load_func(uint16) - load_func(uint32) - load_func(uint64) + load_func(uint8, load, 0) + load_func(uint16, load, 0) + load_func(uint32, load, 0) + load_func(uint64, load, 0) + + // load value from guest memory at aligned address; zero extend to register width + load_func(uint8, guest_load, RISCV_XLATE_VIRT) + load_func(uint16, guest_load, RISCV_XLATE_VIRT) + load_func(uint32, guest_load, RISCV_XLATE_VIRT) + load_func(uint64, guest_load, RISCV_XLATE_VIRT) + load_func(uint16, guest_load_x, RISCV_XLATE_VIRT|RISCV_XLATE_VIRT_MXR) + load_func(uint32, guest_load_x, RISCV_XLATE_VIRT|RISCV_XLATE_VIRT_MXR) // load value from memory at aligned address; sign extend to register width - load_func(int8) - load_func(int16) - load_func(int32) - load_func(int64) + load_func(int8, load, 0) + load_func(int16, load, 0) + load_func(int32, load, 0) + load_func(int64, load, 0) + + // load value from guest memory at aligned address; sign extend to register width + load_func(int8, guest_load, RISCV_XLATE_VIRT) + load_func(int16, guest_load, RISCV_XLATE_VIRT) + load_func(int32, guest_load, RISCV_XLATE_VIRT) + load_func(int64, guest_load, RISCV_XLATE_VIRT) + +#ifndef RISCV_ENABLE_COMMITLOG +# define WRITE_MEM(addr, value, size) ({}) +#else +# define WRITE_MEM(addr, val, size) \ + proc->state.log_mem_write.push_back(std::make_tuple(addr, val, size)); +#endif // template for functions that store an aligned value to memory - #define store_func(type) \ - void store_##type(reg_t addr, type##_t val) { \ + #define store_func(type, prefix, xlate_flags) \ + void prefix##_##type(reg_t addr, type##_t val) { \ + if (xlate_flags) \ + flush_tlb(); \ if (unlikely(addr & (sizeof(type##_t)-1))) \ return misaligned_store(addr, val, sizeof(type##_t)); \ reg_t vpn = addr >> PGSHIFT; \ - if (likely(tlb_store_tag[vpn % TLB_ENTRIES] == vpn)) \ - *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = val; \ + size_t size = sizeof(type##_t); \ + if (likely(tlb_store_tag[vpn % TLB_ENTRIES] == vpn)) { \ + if (proc) WRITE_MEM(addr, val, size); \ + *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = to_le(val); \ + } \ else if (unlikely(tlb_store_tag[vpn % TLB_ENTRIES] == (vpn | TLB_CHECK_TRIGGERS))) { \ if (!matched_trigger) { \ matched_trigger = trigger_exception(OPERATION_STORE, addr, val); \ if (matched_trigger) \ throw *matched_trigger; \ } \ - *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = val; \ + if (proc) WRITE_MEM(addr, val, size); \ + *(type##_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = to_le(val); \ } \ - else \ - store_slow_path(addr, sizeof(type##_t), (const uint8_t*)&val); \ - } + else { \ + type##_t le_val = to_le(val); \ + store_slow_path(addr, sizeof(type##_t), (const uint8_t*)&le_val, (xlate_flags)); \ + if (proc) WRITE_MEM(addr, val, size); \ + } \ + if (xlate_flags) \ + flush_tlb(); \ + } // template for functions that perform an atomic memory operation #define amo_func(type) \ template \ type##_t amo_##type(reg_t addr, op f) { \ if (addr & (sizeof(type##_t)-1)) \ - throw trap_store_address_misaligned(addr); \ + throw trap_store_address_misaligned(addr, 0, 0); \ try { \ auto lhs = load_##type(addr); \ store_##type(addr, f(lhs)); \ return lhs; \ } catch (trap_load_page_fault& t) { \ /* AMO faults should be reported as store faults */ \ - throw trap_store_page_fault(t.get_tval()); \ + throw trap_store_page_fault(t.get_tval(), t.get_tval2(), t.get_tinst()); \ } catch (trap_load_access_fault& t) { \ /* AMO faults should be reported as store faults */ \ - throw trap_store_access_fault(t.get_tval()); \ + throw trap_store_access_fault(t.get_tval(), t.get_tval2(), t.get_tinst()); \ } \ } @@ -155,7 +208,7 @@ class mmu_t { #ifndef RISCV_ENABLE_MISALIGNED if (unlikely(addr & (sizeof(float128_t)-1))) - throw trap_store_address_misaligned(addr); + throw trap_store_address_misaligned(addr, 0, 0); #endif store_uint64(addr, val.v[0]); store_uint64(addr + 8, val.v[1]); @@ -165,16 +218,22 @@ class mmu_t { #ifndef RISCV_ENABLE_MISALIGNED if (unlikely(addr & (sizeof(float128_t)-1))) - throw trap_load_address_misaligned(addr); + throw trap_load_address_misaligned(addr, 0, 0); #endif return (float128_t){load_uint64(addr), load_uint64(addr + 8)}; } // store value to memory at aligned address - store_func(uint8) - store_func(uint16) - store_func(uint32) - store_func(uint64) + store_func(uint8, store, 0) + store_func(uint16, store, 0) + store_func(uint32, store, 0) + store_func(uint64, store, 0) + + // store value to guest memory at aligned address + store_func(uint8, guest_store, RISCV_XLATE_VIRT) + store_func(uint16, guest_store, RISCV_XLATE_VIRT) + store_func(uint32, guest_store, RISCV_XLATE_VIRT) + store_func(uint64, guest_store, RISCV_XLATE_VIRT) // perform an atomic memory operation at an aligned address amo_func(uint32) @@ -187,20 +246,23 @@ class mmu_t inline void acquire_load_reservation(reg_t vaddr) { - reg_t paddr = translate(vaddr, 1, LOAD); + reg_t paddr = translate(vaddr, 1, LOAD, 0); if (auto host_addr = sim->addr_to_mem(paddr)) load_reservation_address = refill_tlb(vaddr, paddr, host_addr, LOAD).target_offset + vaddr; else - throw trap_load_access_fault(vaddr); // disallow LR to I/O space + throw trap_load_access_fault(vaddr, 0, 0); // disallow LR to I/O space } - inline bool check_load_reservation(reg_t vaddr) + inline bool check_load_reservation(reg_t vaddr, size_t size) { - reg_t paddr = translate(vaddr, 1, STORE); + if (vaddr & (size-1)) + throw trap_store_address_misaligned(vaddr, 0, 0); + + reg_t paddr = translate(vaddr, 1, STORE, 0); if (auto host_addr = sim->addr_to_mem(paddr)) return load_reservation_address == refill_tlb(vaddr, paddr, host_addr, STORE).target_offset + vaddr; else - throw trap_store_access_fault(vaddr); // disallow SC to I/O space + throw trap_store_access_fault(vaddr, 0, 0); // disallow SC to I/O space } static const reg_t ICACHE_ENTRIES = 1024; @@ -213,21 +275,21 @@ class mmu_t inline icache_entry_t* refill_icache(reg_t addr, icache_entry_t* entry) { auto tlb_entry = translate_insn_addr(addr); - insn_bits_t insn = *(uint16_t*)(tlb_entry.host_offset + addr); + insn_bits_t insn = from_le(*(uint16_t*)(tlb_entry.host_offset + addr)); int length = insn_length(insn); if (likely(length == 4)) { - insn |= (insn_bits_t)*(const int16_t*)translate_insn_addr_to_host(addr + 2) << 16; + insn |= (insn_bits_t)from_le(*(const int16_t*)translate_insn_addr_to_host(addr + 2)) << 16; } else if (length == 2) { insn = (int16_t)insn; } else if (length == 6) { - insn |= (insn_bits_t)*(const int16_t*)translate_insn_addr_to_host(addr + 4) << 32; - insn |= (insn_bits_t)*(const uint16_t*)translate_insn_addr_to_host(addr + 2) << 16; + insn |= (insn_bits_t)from_le(*(const int16_t*)translate_insn_addr_to_host(addr + 4)) << 32; + insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 2)) << 16; } else { static_assert(sizeof(insn_bits_t) == 8, "insn_bits_t must be uint64_t"); - insn |= (insn_bits_t)*(const int16_t*)translate_insn_addr_to_host(addr + 6) << 48; - insn |= (insn_bits_t)*(const uint16_t*)translate_insn_addr_to_host(addr + 4) << 32; - insn |= (insn_bits_t)*(const uint16_t*)translate_insn_addr_to_host(addr + 2) << 16; + insn |= (insn_bits_t)from_le(*(const int16_t*)translate_insn_addr_to_host(addr + 6)) << 48; + insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 4)) << 32; + insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 2)) << 16; } insn_fetch_t fetch = {proc->decode_insn(insn), insn}; @@ -304,14 +366,20 @@ class mmu_t tlb_entry_t refill_tlb(reg_t vaddr, reg_t paddr, char* host_addr, access_type type); const char* fill_from_mmio(reg_t vaddr, reg_t paddr); + // perform a stage2 translation for a given guest address + reg_t s2xlate(reg_t gva, reg_t gpa, access_type type, bool virt, bool mxr); + // perform a page table walk for a given VA; set referenced/dirty bits - reg_t walk(reg_t addr, access_type type, reg_t prv); + reg_t walk(reg_t addr, access_type type, reg_t prv, bool virt, bool mxr); // handle uncommon cases: TLB misses, page faults, MMIO tlb_entry_t fetch_slow_path(reg_t addr); - void load_slow_path(reg_t addr, reg_t len, uint8_t* bytes); - void store_slow_path(reg_t addr, reg_t len, const uint8_t* bytes); - reg_t translate(reg_t addr, reg_t len, access_type type); + void load_slow_path(reg_t addr, reg_t len, uint8_t* bytes, uint32_t xlate_flags); + void store_slow_path(reg_t addr, reg_t len, const uint8_t* bytes, uint32_t xlate_flags); + bool mmio_load(reg_t addr, size_t len, uint8_t* bytes); + bool mmio_store(reg_t addr, size_t len, const uint8_t* bytes); + bool mmio_ok(reg_t addr, access_type type); + reg_t translate(reg_t addr, reg_t len, access_type type, uint32_t xlate_flags); // ITLB lookup inline tlb_entry_t translate_insn_addr(reg_t addr) { @@ -326,9 +394,9 @@ class mmu_t } if (unlikely(tlb_insn_tag[vpn % TLB_ENTRIES] == (vpn | TLB_CHECK_TRIGGERS))) { uint16_t* ptr = (uint16_t*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr); - int match = proc->trigger_match(OPERATION_EXECUTE, addr, *ptr); + int match = proc->trigger_match(OPERATION_EXECUTE, addr, from_le(*ptr)); if (match >= 0) { - throw trigger_matched_t(match, OPERATION_EXECUTE, addr, *ptr); + throw trigger_matched_t(match, OPERATION_EXECUTE, addr, from_le(*ptr)); } } return result; @@ -354,7 +422,7 @@ class mmu_t } reg_t pmp_homogeneous(reg_t addr, reg_t len); - reg_t pmp_ok(reg_t addr, access_type type, reg_t mode); + reg_t pmp_ok(reg_t addr, reg_t len, access_type type, reg_t mode); bool check_triggers_fetch; bool check_triggers_load; @@ -368,27 +436,41 @@ class mmu_t struct vm_info { int levels; int idxbits; + int widenbits; int ptesize; reg_t ptbase; }; -inline vm_info decode_vm_info(int xlen, reg_t prv, reg_t satp) +inline vm_info decode_vm_info(int xlen, bool stage2, reg_t prv, reg_t satp) { if (prv == PRV_M) { - return {0, 0, 0, 0}; - } else if (prv <= PRV_S && xlen == 32) { + return {0, 0, 0, 0, 0}; + } else if (!stage2 && prv <= PRV_S && xlen == 32) { switch (get_field(satp, SATP32_MODE)) { - case SATP_MODE_OFF: return {0, 0, 0, 0}; - case SATP_MODE_SV32: return {2, 10, 4, (satp & SATP32_PPN) << PGSHIFT}; + case SATP_MODE_OFF: return {0, 0, 0, 0, 0}; + case SATP_MODE_SV32: return {2, 10, 0, 4, (satp & SATP32_PPN) << PGSHIFT}; default: abort(); } - } else if (prv <= PRV_S && xlen == 64) { + } else if (!stage2 && prv <= PRV_S && xlen == 64) { switch (get_field(satp, SATP64_MODE)) { - case SATP_MODE_OFF: return {0, 0, 0, 0}; - case SATP_MODE_SV39: return {3, 9, 8, (satp & SATP64_PPN) << PGSHIFT}; - case SATP_MODE_SV48: return {4, 9, 8, (satp & SATP64_PPN) << PGSHIFT}; - case SATP_MODE_SV57: return {5, 9, 8, (satp & SATP64_PPN) << PGSHIFT}; - case SATP_MODE_SV64: return {6, 9, 8, (satp & SATP64_PPN) << PGSHIFT}; + case SATP_MODE_OFF: return {0, 0, 0, 0, 0}; + case SATP_MODE_SV39: return {3, 9, 0, 8, (satp & SATP64_PPN) << PGSHIFT}; + case SATP_MODE_SV48: return {4, 9, 0, 8, (satp & SATP64_PPN) << PGSHIFT}; + case SATP_MODE_SV57: return {5, 9, 0, 8, (satp & SATP64_PPN) << PGSHIFT}; + case SATP_MODE_SV64: return {6, 9, 0, 8, (satp & SATP64_PPN) << PGSHIFT}; + default: abort(); + } + } else if (stage2 && xlen == 32) { + switch (get_field(satp, HGATP32_MODE)) { + case HGATP_MODE_OFF: return {0, 0, 0, 0, 0}; + case HGATP_MODE_SV32X4: return {2, 10, 2, 4, (satp & HGATP32_PPN) << PGSHIFT}; + default: abort(); + } + } else if (stage2 && xlen == 64) { + switch (get_field(satp, HGATP64_MODE)) { + case HGATP_MODE_OFF: return {0, 0, 0, 0, 0}; + case HGATP_MODE_SV39X4: return {3, 9, 2, 8, (satp & HGATP64_PPN) << PGSHIFT}; + case HGATP_MODE_SV48X4: return {4, 9, 2, 8, (satp & HGATP64_PPN) << PGSHIFT}; default: abort(); } } else { diff --git a/riscv/mulhi.h b/riscv/mulhi.h deleted file mode 100644 index bb4a484a6d..0000000000 --- a/riscv/mulhi.h +++ /dev/null @@ -1,43 +0,0 @@ -// See LICENSE for license details. - -#ifndef _RISCV_MULHI_H -#define _RISCV_MULHI_H - -#include - -inline uint64_t mulhu(uint64_t a, uint64_t b) -{ - uint64_t t; - uint32_t y1, y2, y3; - uint64_t a0 = (uint32_t)a, a1 = a >> 32; - uint64_t b0 = (uint32_t)b, b1 = b >> 32; - - t = a1*b0 + ((a0*b0) >> 32); - y1 = t; - y2 = t >> 32; - - t = a0*b1 + y1; - y1 = t; - - t = a1*b1 + y2 + (t >> 32); - y2 = t; - y3 = t >> 32; - - return ((uint64_t)y3 << 32) | y2; -} - -inline int64_t mulh(int64_t a, int64_t b) -{ - int negate = (a < 0) != (b < 0); - uint64_t res = mulhu(a < 0 ? -a : a, b < 0 ? -b : b); - return negate ? ~res + (a * b == 0) : res; -} - -inline int64_t mulhsu(int64_t a, uint64_t b) -{ - int negate = a < 0; - uint64_t res = mulhu(a < 0 ? -a : a, b); - return negate ? ~res + (a * b == 0) : res; -} - -#endif diff --git a/riscv/opcodes.h b/riscv/opcodes.h index 34c089ebb7..065934a238 100644 --- a/riscv/opcodes.h +++ b/riscv/opcodes.h @@ -125,6 +125,11 @@ static uint32_t csrr(unsigned int rd, unsigned int csr) { return (csr << 20) | (rd << 7) | MATCH_CSRRS; } +static uint32_t csrrs(unsigned int rd, unsigned int rs1, unsigned int csr) __attribute__ ((unused)); +static uint32_t csrrs(unsigned int rd, unsigned int rs1, unsigned int csr) { + return (csr << 20) | (rs1 << 15) | (rd << 7) | MATCH_CSRRS; +} + static uint32_t fsw(unsigned int src, unsigned int base, uint16_t offset) __attribute__ ((unused)); static uint32_t fsw(unsigned int src, unsigned int base, uint16_t offset) { @@ -177,7 +182,6 @@ static uint32_t fence_i(void) return MATCH_FENCE_I; } -/* static uint32_t lui(unsigned int dest, uint32_t imm) __attribute__ ((unused)); static uint32_t lui(unsigned int dest, uint32_t imm) { @@ -186,6 +190,7 @@ static uint32_t lui(unsigned int dest, uint32_t imm) MATCH_LUI; } +/* static uint32_t csrci(unsigned int csr, uint16_t imm) __attribute__ ((unused)); static uint32_t csrci(unsigned int csr, uint16_t imm) { return (csr << 20) | diff --git a/riscv/processor.cc b/riscv/processor.cc index 00612f0944..b601f1fbf7 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -1,5 +1,6 @@ // See LICENSE for license details. +#include "arith.h" #include "processor.h" #include "extension.h" #include "common.h" @@ -14,19 +15,29 @@ #include #include #include +#include #include #undef STATE #define STATE state -processor_t::processor_t(const char* isa, simif_t* sim, uint32_t id, - bool halt_on_reset) - : debug(false), halt_request(false), sim(sim), ext(NULL), id(id), - halt_on_reset(halt_on_reset), last_pc(1), executions(1) +processor_t::processor_t(const char* isa, const char* priv, const char* varch, + simif_t* sim, uint32_t id, bool halt_on_reset, + FILE* log_file) + : debug(false), halt_request(HR_NONE), sim(sim), ext(NULL), id(id), xlen(0), + histogram_enabled(false), log_commits_enabled(false), + log_file(log_file), halt_on_reset(halt_on_reset), + extension_table(256, false), last_pc(1), executions(1) { + VU.p = this; + + hwLoops.p = this; + parse_isa_string(isa); - register_base_instructions(); + parse_priv_string(priv); + parse_varch_string(varch); + register_base_instructions(); mmu = new mmu_t(sim, this); disassembler = new disassembler_t(max_xlen); @@ -34,6 +45,8 @@ processor_t::processor_t(const char* isa, simif_t* sim, uint32_t id, for (auto disasm_insn : ext->get_disasms()) disassembler->add_insn(disasm_insn); + set_pmp_granularity(1 << PMP_SHIFT); + set_pmp_num(state.max_pmp); reset(); } @@ -52,26 +65,160 @@ processor_t::~processor_t() delete disassembler; } -static void bad_isa_string(const char* isa) +static void bad_option_string(const char *option, const char *value, + const char *msg) { - fprintf(stderr, "error: bad --isa option %s\n", isa); + fprintf(stderr, "error: bad %s option '%s'. %s\n", option, value, msg); abort(); } -void processor_t::parse_isa_string(const char* str) +static void bad_isa_string(const char* isa, const char* msg) +{ + bad_option_string("--isa", isa, msg); +} + +static void bad_priv_string(const char* priv) +{ + fprintf(stderr, "error: bad --priv option %s\n", priv); + abort(); +} + +static void bad_varch_string(const char* varch, const char *msg) +{ + bad_option_string("--varch", varch, msg); +} + +static std::string get_string_token(std::string str, const char delimiter, size_t& pos) +{ + size_t _pos = pos; + while (pos < str.length() && str[pos] != delimiter) ++pos; + return str.substr(_pos, pos - _pos); +} + +static int get_int_token(std::string str, const char delimiter, size_t& pos) +{ + size_t _pos = pos; + while (pos < str.length() && str[pos] != delimiter) { + if (!isdigit(str[pos])) + bad_varch_string(str.c_str(), "Unsupported value"); // An integer is expected + ++pos; + } + return (pos == _pos) ? 0 : stoi(str.substr(_pos, pos - _pos)); +} + +static bool check_pow2(int val) { - std::string lowercase, tmp; + return ((val & (val - 1))) == 0; +} + +void processor_t::parse_varch_string(const char* s) +{ + std::string str, tmp; + for (const char *r = s; *r; r++) + str += std::tolower(*r); + + size_t pos = 0; + size_t len = str.length(); + int vlen = 0; + int elen = 0; + int slen = 0; + int vstart_alu = 1; + + while (pos < len) { + std::string attr = get_string_token(str, ':', pos); + + ++pos; + + if (attr == "vlen") + vlen = get_int_token(str, ',', pos); + else if (attr == "slen") + slen = get_int_token(str, ',', pos); + else if (attr == "elen") + elen = get_int_token(str, ',', pos); + else if (attr == "vstartalu") + vstart_alu = get_int_token(str, ',', pos); + else + bad_varch_string(s, "Unsupported token"); + + ++pos; + } + + // The integer should be the power of 2 + if (!check_pow2(vlen) || !check_pow2(elen) || !check_pow2(slen)){ + bad_varch_string(s, "The integer value should be the power of 2"); + } + + if (slen == 0) + slen = vlen; + + /* Vector spec requirements. */ + if (vlen < elen) + bad_varch_string(s, "vlen must be >= elen"); + if ((unsigned) elen < std::max(max_xlen, get_flen())) + bad_varch_string(s, "elen must be >= max(xlen, flen)"); + if (vlen != slen) + bad_varch_string(s, "vlen must be == slen for current limitation"); + + /* spike requirements. */ + if (vlen > 4096) + bad_varch_string(s, "vlen must be <= 4096"); + + VU.VLEN = vlen; + VU.ELEN = elen; + VU.vlenb = vlen / 8; + VU.vstart_alu = vstart_alu; +} + +static std::string strtolower(const char* str) +{ + std::string res; for (const char *r = str; *r; r++) - lowercase += std::tolower(*r); + res += std::tolower(*r); + return res; +} + +void processor_t::parse_priv_string(const char* str) +{ + std::string lowercase = strtolower(str); + bool user = false, supervisor = false; + + if (lowercase == "m") + ; + else if (lowercase == "mu") + user = true; + else if (lowercase == "msu") + user = supervisor = true; + else + bad_priv_string(str); + + if (user) { + max_isa |= reg_t(user) << ('u' - 'a'); + extension_table['U'] = true; + } + + if (supervisor) { + max_isa |= reg_t(supervisor) << ('s' - 'a'); + extension_table['S'] = true; + } +} + +void processor_t::parse_isa_string(const char* str) +{ + std::string lowercase = strtolower(str), tmp; + char error_msg[256]; const char* p = lowercase.c_str(); - const char* all_subsets = "imafdqc"; + const char* all_subsets = "imafdqch" +#ifdef __SIZEOF_INT128__ + "v" +#endif + ""; max_xlen = 64; - state.misa = reg_t(2) << 62; + max_isa = reg_t(2) << 62; if (strncmp(p, "rv32", 4) == 0) - max_xlen = 32, state.misa = reg_t(1) << 30, p += 4; + max_xlen = 32, max_isa = reg_t(1) << 30, p += 4; else if (strncmp(p, "rv64", 4) == 0) p += 4; else if (strncmp(p, "rv", 2) == 0) @@ -82,55 +229,196 @@ void processor_t::parse_isa_string(const char* str) } else if (*p == 'g') { // treat "G" as "IMAFD" tmp = std::string("imafd") + (p+1); p = &tmp[0]; - } else if (*p != 'i') { - bad_isa_string(str); } isa_string = "rv" + std::to_string(max_xlen) + p; - state.misa |= 1L << ('s' - 'a'); // advertise support for supervisor mode - state.misa |= 1L << ('u' - 'a'); // advertise support for user mode while (*p) { - state.misa |= 1L << (*p - 'a'); + if (islower(*p)) { + max_isa |= 1L << (*p - 'a'); + extension_table[toupper(*p)] = true; + + if (strchr(all_subsets, *p)) { + p++; + } else if (*p == 'x') { + const char* ext = p + 1, *end = ext; + while (islower(*end) || *end == '_') + end++; + + auto ext_str = std::string(ext, end - ext); + if (ext_str != "dummy") + register_extension(find_extension(ext_str.c_str())()); + + p = end; + } else { + sprintf(error_msg, "unsupported extension '%c'", *p); + bad_isa_string(str, error_msg); + } + } else if (*p == '_') { + const char* ext = p + 1, *end = ext; + if (*ext == 'x') { + p++; + continue; + } - if (auto next = strchr(all_subsets, *p)) { - all_subsets = next + 1; - p++; - } else if (*p == 'x') { - const char* ext = p+1, *end = ext; while (islower(*end)) end++; - register_extension(find_extension(std::string(ext, end - ext).c_str())()); + + auto ext_str = std::string(ext, end - ext); + if (ext_str == "zfh") { + extension_table[EXT_ZFH] = true; + } else { + sprintf(error_msg, "unsupported extension '%s'", ext_str.c_str()); + bad_isa_string(str, error_msg); + } + p = end; } else { - bad_isa_string(str); + sprintf(error_msg, "can't parse '%c(%d)'", *p, *p); + bad_isa_string(str, error_msg); } } - if (supports_extension('D') && !supports_extension('F')) - bad_isa_string(str); + state.misa = max_isa; - if (supports_extension('Q') && !supports_extension('D')) - bad_isa_string(str); + if (!supports_extension('I')) + bad_isa_string(str, "'I' extension is required"); + + if (supports_extension(EXT_ZFH) && !supports_extension('F')) + bad_isa_string(str, "'Zfh' extension requires 'F'"); - if (supports_extension('Q') && max_xlen < 64) - bad_isa_string(str); + if (supports_extension('D') && !supports_extension('F')) + bad_isa_string(str, "'D' extension requires 'F'"); - max_isa = state.misa; + if (supports_extension('Q') && !supports_extension('D')) + bad_isa_string(str, "'Q' extension requires 'D'"); } void state_t::reset(reg_t max_isa) { - memset(this, 0, sizeof(*this)); - misa = max_isa; - prv = PRV_M; pc = DEFAULT_RSTVEC; + XPR.reset(); + FPR.reset(); + + prv = PRV_M; + v = false; + misa = max_isa; + mstatus = 0; + mepc = 0; + mtval = 0; + mscratch = 0; + mtvec = 0; + mcause = 0; + minstret = 0; + mie = 0; + mip = 0; + medeleg = 0; + mideleg = 0; + mcounteren = 0; + scounteren = 0; + sepc = 0; + stval = 0; + sscratch = 0; + stvec = 0; + satp = 0; + scause = 0; + mtval2 = 0; + mtinst = 0; + hstatus = 0; + hideleg = 0; + hedeleg = 0; + hcounteren = 0; + htval = 0; + htinst = 0; + hgatp = 0; + vsstatus = 0; + vstvec = 0; + vsscratch = 0; + vsepc = 0; + vscause = 0; + vstval = 0; + vsatp = 0; + + dpc = 0; + dscratch0 = 0; + dscratch1 = 0; + memset(&this->dcsr, 0, sizeof(this->dcsr)); + tselect = 0; - for (unsigned int i = 0; i < num_triggers; i++) - mcontrol[i].type = 2; + memset(this->mcontrol, 0, sizeof(this->mcontrol)); + for (auto &item : mcontrol) + item.type = 2; + + memset(this->tdata2, 0, sizeof(this->tdata2)); + debug_mode = false; + single_step = STEP_NONE; + + memset(this->pmpcfg, 0, sizeof(this->pmpcfg)); + memset(this->pmpaddr, 0, sizeof(this->pmpaddr)); + + fflags = 0; + frm = 0; + serialized = false; + +#ifdef RISCV_ENABLE_COMMITLOG + log_reg_write.clear(); + log_mem_read.clear(); + log_mem_write.clear(); + last_inst_priv = 0; + last_inst_xlen = 0; + last_inst_flen = 0; +#endif +} + +void processor_t::vectorUnit_t::reset(){ + free(reg_file); + VLEN = get_vlen(); + ELEN = get_elen(); + reg_file = malloc(NVPR * vlenb); + memset(reg_file, 0, NVPR * vlenb); - pmpcfg[0] = PMP_R | PMP_W | PMP_X | PMP_NAPOT; - pmpaddr[0] = ~reg_t(0); + vtype = 0; + set_vl(0, 0, 0, -1); // default to illegal configuration +} + +reg_t processor_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t newType){ + int new_vlmul = 0; + if (vtype != newType){ + vtype = newType; + vsew = 1 << (extract64(newType, 3, 3) + 3); + new_vlmul = int8_t(extract64(newType, 0, 3) << 5) >> 5; + vflmul = new_vlmul >= 0 ? 1 << new_vlmul : 1.0 / (1 << -new_vlmul); + vlmax = (VLEN/vsew) * vflmul; + vta = extract64(newType, 6, 1); + vma = extract64(newType, 7, 1); + vediv = 1 << extract64(newType, 8, 2); + + vill = !(vflmul >= 0.125 && vflmul <= 8) + || vsew > ELEN + || vflmul < ((float)vsew / ELEN) + || vediv != 1 + || (newType >> 8) != 0; + + if (vill) { + vlmax = 0; + vtype = UINT64_MAX << (p->get_xlen() - 1); + } + } + + // set vl + if (vlmax == 0) { + vl = 0; + } else if (rd == 0 && rs1 == 0) { + vl = vl > vlmax ? vlmax : vl; + } else if (rd != 0 && rs1 == 0) { + vl = vlmax; + } else if (rs1 != 0) { + vl = reqVL > vlmax ? vlmax : reqVL; + } + + vstart = 0; + setvl_count++; + return vl; } void processor_t::set_debug(bool value) @@ -146,17 +434,36 @@ void processor_t::set_histogram(bool value) #ifndef RISCV_ENABLE_HISTOGRAM if (value) { fprintf(stderr, "PC Histogram support has not been properly enabled;"); - fprintf(stderr, " please re-build the riscv-isa-run project using \"configure --enable-histogram\".\n"); + fprintf(stderr, " please re-build the riscv-isa-sim project using \"configure --enable-histogram\".\n"); + abort(); } #endif } +#ifdef RISCV_ENABLE_COMMITLOG +void processor_t::enable_log_commits() +{ + log_commits_enabled = true; +} +#endif + void processor_t::reset() { state.reset(max_isa); + + state.mideleg = supports_extension('H') ? MIDELEG_FORCED_MASK : 0; + state.dcsr.halt = halt_on_reset; halt_on_reset = false; set_csr(CSR_MSTATUS, state.mstatus); + VU.reset(); + + if (n_pmp > 0) { + // For backwards compatibility with software that is unaware of PMP, + // initialize PMP to permit unprivileged access to all of memory. + set_csr(CSR_PMPADDR0, ~reg_t(0)); + set_csr(CSR_PMPCFG0, PMP_R | PMP_W | PMP_X | PMP_NAPOT); + } if (ext) ext->reset(); // reset the extension @@ -175,31 +482,79 @@ static int ctz(reg_t val) return res; } +void processor_t::set_pmp_num(reg_t n) +{ + // check the number of pmp is in a reasonable range + if (n > state.max_pmp) { + fprintf(stderr, "error: bad number of pmp regions: '%ld' from the dtb\n", (unsigned long)n); + abort(); + } + n_pmp = n; +} + +void processor_t::set_pmp_granularity(reg_t gran) { + // check the pmp granularity is set from dtb(!=0) and is power of 2 + if (gran < (1 << PMP_SHIFT) || (gran & (gran - 1)) != 0) { + fprintf(stderr, "error: bad pmp granularity '%ld' from the dtb\n", (unsigned long)gran); + abort(); + } + + lg_pmp_granularity = ctz(gran); +} + void processor_t::take_interrupt(reg_t pending_interrupts) { - reg_t mie = get_field(state.mstatus, MSTATUS_MIE); - reg_t m_enabled = state.prv < PRV_M || (state.prv == PRV_M && mie); - reg_t enabled_interrupts = pending_interrupts & ~state.mideleg & -m_enabled; + reg_t enabled_interrupts, deleg, status, mie, m_enabled; + reg_t hsie, hs_enabled, vsie, vs_enabled; - reg_t sie = get_field(state.mstatus, MSTATUS_SIE); - reg_t s_enabled = state.prv < PRV_S || (state.prv == PRV_S && sie); - // M-ints have highest priority; consider S-ints only if no M-ints pending - if (enabled_interrupts == 0) - enabled_interrupts = pending_interrupts & state.mideleg & -s_enabled; + // Do nothing if no pending interrupts + if (!pending_interrupts) { + return; + } - if (state.dcsr.cause == 0 && enabled_interrupts) { + // M-ints have higher priority over HS-ints and VS-ints + mie = get_field(state.mstatus, MSTATUS_MIE); + m_enabled = state.prv < PRV_M || (state.prv == PRV_M && mie); + enabled_interrupts = pending_interrupts & ~state.mideleg & -m_enabled; + if (enabled_interrupts == 0) { + // HS-ints have higher priority over VS-ints + deleg = state.mideleg & ~MIP_VS_MASK; + status = (state.v) ? state.vsstatus : state.mstatus; + hsie = get_field(status, MSTATUS_SIE); + hs_enabled = state.prv < PRV_S || (state.prv == PRV_S && hsie); + enabled_interrupts = pending_interrupts & deleg & -hs_enabled; + if (state.v && enabled_interrupts == 0) { + // VS-ints have least priority and can only be taken with virt enabled + deleg = state.mideleg & state.hideleg; + vsie = get_field(state.mstatus, MSTATUS_SIE); + vs_enabled = state.prv < PRV_S || (state.prv == PRV_S && vsie); + enabled_interrupts = pending_interrupts & deleg & -vs_enabled; + } + } + + if (!state.debug_mode && enabled_interrupts) { // nonstandard interrupts have highest priority if (enabled_interrupts >> IRQ_M_EXT) enabled_interrupts = enabled_interrupts >> IRQ_M_EXT << IRQ_M_EXT; - // external interrupts have next-highest priority - else if (enabled_interrupts & (MIP_MEIP | MIP_SEIP)) - enabled_interrupts = enabled_interrupts & (MIP_MEIP | MIP_SEIP); - // software interrupts have next-highest priority - else if (enabled_interrupts & (MIP_MSIP | MIP_SSIP)) - enabled_interrupts = enabled_interrupts & (MIP_MSIP | MIP_SSIP); - // timer interrupts have next-highest priority - else if (enabled_interrupts & (MIP_MTIP | MIP_STIP)) - enabled_interrupts = enabled_interrupts & (MIP_MTIP | MIP_STIP); + // standard interrupt priority is MEI, MSI, MTI, SEI, SSI, STI + else if (enabled_interrupts & MIP_MEIP) + enabled_interrupts = MIP_MEIP; + else if (enabled_interrupts & MIP_MSIP) + enabled_interrupts = MIP_MSIP; + else if (enabled_interrupts & MIP_MTIP) + enabled_interrupts = MIP_MTIP; + else if (enabled_interrupts & MIP_SEIP) + enabled_interrupts = MIP_SEIP; + else if (enabled_interrupts & MIP_SSIP) + enabled_interrupts = MIP_SSIP; + else if (enabled_interrupts & MIP_STIP) + enabled_interrupts = MIP_STIP; + else if (enabled_interrupts & MIP_VSEIP) + enabled_interrupts = MIP_VSEIP; + else if (enabled_interrupts & MIP_VSSIP) + enabled_interrupts = MIP_VSSIP; + else if (enabled_interrupts & MIP_VSTIP) + enabled_interrupts = MIP_VSTIP; else abort(); @@ -223,7 +578,7 @@ reg_t processor_t::legalize_privilege(reg_t prv) if (!supports_extension('U')) return PRV_M; - if (prv == PRV_H || !supports_extension('S')) + if ((prv == PRV_HS && !supports_extension('H')) || (prv == PRV_S && !supports_extension('S'))) return PRV_U; return prv; @@ -235,8 +590,52 @@ void processor_t::set_privilege(reg_t prv) state.prv = legalize_privilege(prv); } +void processor_t::set_virt(bool virt) +{ + reg_t tmp, mask; + + if (state.prv == PRV_M) + return; + + if (state.v != virt) { + /* + * Ideally, we should flush TLB here but we don't need it because + * set_virt() is always used in conjucter with set_privilege() and + * set_privilege() will flush TLB unconditionally. + */ + if (state.v and !virt) { + /* + * When transitioning from virt-on (VS/VU) to virt-off (HS/M) + * we should sync Guest/VM FS, VS, and XS state with Host FS, + * VS, and XS state. + */ + if ((state.mstatus & SSTATUS_FS) == SSTATUS_FS) { + state.vsstatus |= SSTATUS_FS; + state.vsstatus |= (xlen == 64 ? SSTATUS64_SD : SSTATUS32_SD); + } + if ((state.mstatus & SSTATUS_VS) == SSTATUS_VS) { + state.vsstatus |= SSTATUS_VS; + state.vsstatus |= (xlen == 64 ? SSTATUS64_SD : SSTATUS32_SD); + } + if ((state.mstatus & SSTATUS_XS) == SSTATUS_XS) { + state.vsstatus |= SSTATUS_XS; + state.vsstatus |= (xlen == 64 ? SSTATUS64_SD : SSTATUS32_SD); + } + } + mask = SSTATUS_VS_MASK; + mask |= (supports_extension('F') ? SSTATUS_FS : 0); + mask |= (supports_extension('V') ? SSTATUS_VS : 0); + mask |= (xlen == 64 ? SSTATUS64_SD : SSTATUS32_SD); + tmp = state.mstatus & mask; + state.mstatus = (state.mstatus & ~mask) | (state.vsstatus & mask); + state.vsstatus = tmp; + state.v = virt; + } +} + void processor_t::enter_debug_mode(uint8_t cause) { + state.debug_mode = true; state.dcsr.cause = cause; state.dcsr.prv = state.prv; set_privilege(PRV_M); @@ -247,14 +646,14 @@ void processor_t::enter_debug_mode(uint8_t cause) void processor_t::take_trap(trap_t& t, reg_t epc) { if (debug) { - fprintf(stderr, "core %3d: exception %s, epc 0x%016" PRIx64 "\n", + fprintf(log_file, "core %3d: exception %s, epc 0x%016" PRIx64 "\n", id, t.name(), epc); if (t.has_tval()) - fprintf(stderr, "core %3d: tval 0x%016" PRIx64 "\n", id, - t.get_tval()); + fprintf(log_file, "core %3d: tval 0x%016" PRIx64 "\n", + id, t.get_tval()); } - if (state.dcsr.cause) { + if (state.debug_mode) { if (t.cause() == CAUSE_BREAKPOINT) { state.pc = DEBUG_ROM_ENTRY; } else { @@ -271,36 +670,72 @@ void processor_t::take_trap(trap_t& t, reg_t epc) return; } - // by default, trap to M-mode, unless delegated to S-mode + // By default, trap to M-mode, unless delegated to HS-mode or VS-mode + reg_t vsdeleg, hsdeleg; reg_t bit = t.cause(); - reg_t deleg = state.medeleg; + bool curr_virt = state.v; bool interrupt = (bit & ((reg_t)1 << (max_xlen-1))) != 0; - if (interrupt) - deleg = state.mideleg, bit &= ~((reg_t)1 << (max_xlen-1)); - if (state.prv <= PRV_S && bit < max_xlen && ((deleg >> bit) & 1)) { - // handle the trap in S-mode - state.pc = state.stvec; + if (interrupt) { + vsdeleg = (curr_virt && state.prv <= PRV_S) ? (state.mideleg & state.hideleg) : 0; + hsdeleg = (state.prv <= PRV_S) ? state.mideleg : 0; + bit &= ~((reg_t)1 << (max_xlen-1)); + } else { + vsdeleg = (curr_virt && state.prv <= PRV_S) ? (state.medeleg & state.hedeleg) : 0; + hsdeleg = (state.prv <= PRV_S) ? state.medeleg : 0; + } + if (state.prv <= PRV_S && bit < max_xlen && ((vsdeleg >> bit) & 1)) { + // Handle the trap in VS-mode + reg_t vector = (state.vstvec & 1) && interrupt ? 4*bit : 0; + state.pc = (state.vstvec & ~(reg_t)1) + vector; + state.vscause = (interrupt) ? (t.cause() - 1) : t.cause(); + state.vsepc = epc; + state.vstval = t.get_tval(); + + reg_t s = state.mstatus; + s = set_field(s, MSTATUS_SPIE, get_field(s, MSTATUS_SIE)); + s = set_field(s, MSTATUS_SPP, state.prv); + s = set_field(s, MSTATUS_SIE, 0); + set_csr(CSR_MSTATUS, s); + set_privilege(PRV_S); + } else if (state.prv <= PRV_S && bit < max_xlen && ((hsdeleg >> bit) & 1)) { + // Handle the trap in HS-mode + set_virt(false); + reg_t vector = (state.stvec & 1) && interrupt ? 4*bit : 0; + state.pc = (state.stvec & ~(reg_t)1) + vector; state.scause = t.cause(); state.sepc = epc; state.stval = t.get_tval(); + state.htval = t.get_tval2(); + state.htinst = t.get_tinst(); reg_t s = state.mstatus; s = set_field(s, MSTATUS_SPIE, get_field(s, MSTATUS_SIE)); s = set_field(s, MSTATUS_SPP, state.prv); s = set_field(s, MSTATUS_SIE, 0); set_csr(CSR_MSTATUS, s); + s = state.hstatus; + s = set_field(s, HSTATUS_SPVP, state.prv); + s = set_field(s, HSTATUS_SPV, curr_virt); + s = set_field(s, HSTATUS_GVA, t.has_gva()); + set_csr(CSR_HSTATUS, s); set_privilege(PRV_S); } else { + // Handle the trap in M-mode + set_virt(false); reg_t vector = (state.mtvec & 1) && interrupt ? 4*bit : 0; state.pc = (state.mtvec & ~(reg_t)1) + vector; state.mepc = epc; state.mcause = t.cause(); state.mtval = t.get_tval(); + state.mtval2 = t.get_tval2(); + state.mtinst = t.get_tinst(); reg_t s = state.mstatus; s = set_field(s, MSTATUS_MPIE, get_field(s, MSTATUS_MIE)); s = set_field(s, MSTATUS_MPP, state.prv); s = set_field(s, MSTATUS_MIE, 0); + s = set_field(s, MSTATUS_MPV, curr_virt); + s = set_field(s, MSTATUS_GVA, t.has_gva()); set_csr(CSR_MSTATUS, s); set_privilege(PRV_M); } @@ -310,11 +745,20 @@ void processor_t::disasm(insn_t insn) { uint64_t bits = insn.bits() & ((1ULL << (8 * insn_length(insn.bits()))) - 1); if (last_pc != state.pc || last_bits != bits) { + +#ifdef RISCV_ENABLE_COMMITLOG + const char* sym = get_symbol(state.pc); + if (sym != nullptr) + { + fprintf(log_file, "core %3d: >>>> %s\n", id, sym); + } +#endif + if (executions != 1) { - fprintf(stderr, "core %3d: Executed %" PRIx64 " times\n", id, executions); + fprintf(log_file, "core %3d: Executed %" PRIx64 " times\n", id, executions); } - fprintf(stderr, "core %3d: 0x%016" PRIx64 " (0x%08" PRIx64 ") %s\n", + fprintf(log_file, "core %3d: 0x%016" PRIx64 " (0x%08" PRIx64 ") %s\n", id, state.pc, bits, disassembler->disassemble(insn).c_str()); last_pc = state.pc; last_bits = bits; @@ -332,26 +776,52 @@ int processor_t::paddr_bits() void processor_t::set_csr(int which, reg_t val) { +#if defined(RISCV_ENABLE_COMMITLOG) +#define LOG_CSR(rd) \ + STATE.log_reg_write[((which) << 4) | 4] = {get_csr(rd), 0}; +#else +#define LOG_CSR(rd) +#endif + val = zext_xlen(val); - reg_t delegable_ints = MIP_SSIP | MIP_STIP | MIP_SEIP - | ((ext != NULL) << IRQ_COP); - reg_t all_ints = delegable_ints | MIP_MSIP | MIP_MTIP; + reg_t supervisor_ints = supports_extension('S') ? MIP_SSIP | MIP_STIP | MIP_SEIP : 0; + reg_t vssip_int = supports_extension('H') ? MIP_VSSIP : 0; + reg_t hypervisor_ints = supports_extension('H') ? MIP_HS_MASK : 0; + reg_t coprocessor_ints = (ext != NULL) << IRQ_COP; + reg_t delegable_ints = supervisor_ints | coprocessor_ints; + reg_t all_ints = delegable_ints | hypervisor_ints | MIP_MSIP | MIP_MTIP | MIP_MEIP; + + if (which >= CSR_PMPADDR0 && which < CSR_PMPADDR0 + state.max_pmp) { + // If no PMPs are configured, disallow access to all. Otherwise, allow + // access to all, but unimplemented ones are hardwired to zero. + if (n_pmp == 0) + return; - if (which >= CSR_PMPADDR0 && which < CSR_PMPADDR0 + state.n_pmp) { size_t i = which - CSR_PMPADDR0; bool locked = state.pmpcfg[i] & PMP_L; - bool next_locked = i+1 < state.n_pmp && (state.pmpcfg[i+1] & PMP_L); - bool next_tor = i+1 < state.n_pmp && (state.pmpcfg[i+1] & PMP_A) == PMP_TOR; - if (!locked && !(next_locked && next_tor)) - state.pmpaddr[i] = val; + bool next_locked = i+1 < state.max_pmp && (state.pmpcfg[i+1] & PMP_L); + bool next_tor = i+1 < state.max_pmp && (state.pmpcfg[i+1] & PMP_A) == PMP_TOR; + if (i < n_pmp && !locked && !(next_locked && next_tor)) { + state.pmpaddr[i] = val & ((reg_t(1) << (MAX_PADDR_BITS - PMP_SHIFT)) - 1); + LOG_CSR(which); + } mmu->flush_tlb(); } - if (which >= CSR_PMPCFG0 && which < CSR_PMPCFG0 + state.n_pmp / 4) { + if (which >= CSR_PMPCFG0 && which < CSR_PMPCFG0 + state.max_pmp / 4) { + if (n_pmp == 0) + return; + for (size_t i0 = (which - CSR_PMPCFG0) * 4, i = i0; i < i0 + xlen / 8; i++) { - if (!(state.pmpcfg[i] & PMP_L)) - state.pmpcfg[i] = (val >> (8 * (i - i0))) & (PMP_R | PMP_W | PMP_X | PMP_A | PMP_L); + if (i < n_pmp && !(state.pmpcfg[i] & PMP_L)) { + uint8_t cfg = (val >> (8 * (i - i0))) & (PMP_R | PMP_W | PMP_X | PMP_A | PMP_L); + cfg &= ~PMP_W | ((cfg & PMP_R) ? PMP_W : 0); // Disallow R=0 W=1 + if (lg_pmp_granularity != PMP_SHIFT && (cfg & PMP_A) == PMP_NA4) + cfg |= PMP_NAPOT; // Disallow A=NA4 when granularity > 4 + state.pmpcfg[i] = cfg; + LOG_CSR(which); + } } mmu->flush_tlb(); } @@ -371,16 +841,30 @@ void processor_t::set_csr(int which, reg_t val) state.fflags = (val & FSR_AEXC) >> FSR_AEXC_SHIFT; state.frm = (val & FSR_RD) >> FSR_RD_SHIFT; break; + case CSR_VCSR: + dirty_vs_state; + VU.vxsat = (val & VCSR_VXSAT) >> VCSR_VXSAT_SHIFT; + VU.vxrm = (val & VCSR_VXRM) >> VCSR_VXRM_SHIFT; + break; case CSR_MSTATUS: { if ((val ^ state.mstatus) & (MSTATUS_MPP | MSTATUS_MPRV | MSTATUS_SUM | MSTATUS_MXR)) mmu->flush_tlb(); - reg_t mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE - | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM - | MSTATUS_MXR | MSTATUS_TW | MSTATUS_TVM - | MSTATUS_TSR | MSTATUS_UXL | MSTATUS_SXL | - (ext ? MSTATUS_XS : 0); + bool has_fs = supports_extension('S') || supports_extension('F') + || supports_extension('V'); + bool has_vs = supports_extension('V'); + bool has_mpv = supports_extension('S') && supports_extension('H'); + bool has_gva = has_mpv; + + reg_t mask = MSTATUS_MIE | MSTATUS_MPIE | MSTATUS_MPRV + | (supports_extension('S') ? (MSTATUS_SUM | MSTATUS_SIE | MSTATUS_SPIE) : 0) + | MSTATUS_MXR | MSTATUS_TW | MSTATUS_TVM | MSTATUS_TSR + | (has_fs ? MSTATUS_FS : 0) + | (has_vs ? MSTATUS_VS : 0) + | (ext ? MSTATUS_XS : 0) + | (has_gva ? MSTATUS_GVA : 0) + | (has_mpv ? MSTATUS_MPV : 0); reg_t requested_mpp = legalize_privilege(get_field(val, MSTATUS_MPP)); state.mstatus = set_field(state.mstatus, MSTATUS_MPP, requested_mpp); @@ -391,20 +875,22 @@ void processor_t::set_csr(int which, reg_t val) bool dirty = (state.mstatus & MSTATUS_FS) == MSTATUS_FS; dirty |= (state.mstatus & MSTATUS_XS) == MSTATUS_XS; + dirty |= (state.mstatus & MSTATUS_VS) == MSTATUS_VS; if (max_xlen == 32) state.mstatus = set_field(state.mstatus, MSTATUS32_SD, dirty); else state.mstatus = set_field(state.mstatus, MSTATUS64_SD, dirty); - state.mstatus = set_field(state.mstatus, MSTATUS_UXL, xlen_to_uxl(max_xlen)); - state.mstatus = set_field(state.mstatus, MSTATUS_UXL, xlen_to_uxl(max_xlen)); - state.mstatus = set_field(state.mstatus, MSTATUS_SXL, xlen_to_uxl(max_xlen)); + if (supports_extension('U')) + state.mstatus = set_field(state.mstatus, MSTATUS_UXL, xlen_to_uxl(max_xlen)); + if (supports_extension('S')) + state.mstatus = set_field(state.mstatus, MSTATUS_SXL, xlen_to_uxl(max_xlen)); // U-XLEN == S-XLEN == M-XLEN xlen = max_xlen; break; } case CSR_MIP: { - reg_t mask = MIP_SSIP | MIP_STIP; + reg_t mask = (supervisor_ints | hypervisor_ints) & (MIP_SSIP | MIP_STIP | vssip_int); state.mip = (state.mip & ~mask) | (val & mask); break; } @@ -419,9 +905,17 @@ void processor_t::set_csr(int which, reg_t val) (1 << CAUSE_MISALIGNED_FETCH) | (1 << CAUSE_BREAKPOINT) | (1 << CAUSE_USER_ECALL) | + (1 << CAUSE_SUPERVISOR_ECALL) | (1 << CAUSE_FETCH_PAGE_FAULT) | (1 << CAUSE_LOAD_PAGE_FAULT) | (1 << CAUSE_STORE_PAGE_FAULT); + mask |= supports_extension('H') ? + (1 << CAUSE_VIRTUAL_SUPERVISOR_ECALL) | + (1 << CAUSE_FETCH_GUEST_PAGE_FAULT) | + (1 << CAUSE_LOAD_GUEST_PAGE_FAULT) | + (1 << CAUSE_VIRTUAL_INSTRUCTION) | + (1 << CAUSE_STORE_GUEST_PAGE_FAULT) + : 0; state.medeleg = (state.medeleg & ~mask) | (val & mask); break; } @@ -450,36 +944,85 @@ void processor_t::set_csr(int which, reg_t val) break; case CSR_SSTATUS: { reg_t mask = SSTATUS_SIE | SSTATUS_SPIE | SSTATUS_SPP | SSTATUS_FS - | SSTATUS_XS | SSTATUS_SUM | SSTATUS_MXR; + | SSTATUS_XS | SSTATUS_SUM | SSTATUS_MXR + | (supports_extension('V') ? SSTATUS_VS : 0); return set_csr(CSR_MSTATUS, (state.mstatus & ~mask) | (val & mask)); } case CSR_SIP: { - reg_t mask = MIP_SSIP & state.mideleg; - return set_csr(CSR_MIP, (state.mip & ~mask) | (val & mask)); + reg_t mask; + if (state.v) { + mask = state.hideleg & MIP_VSSIP; + val = val << 1; + } else { + mask = state.mideleg & MIP_SSIP; + } + state.mip = (state.mip & ~mask) | (val & mask); + break; + } + case CSR_SIE: { + reg_t mask; + if (state.v) { + mask = state.hideleg & MIP_VS_MASK; + val = val << 1; + } else { + mask = state.mideleg & ~MIP_HS_MASK; + } + state.mie = (state.mie & ~mask) | (val & mask); + break; } - case CSR_SIE: - return set_csr(CSR_MIE, - (state.mie & ~state.mideleg) | (val & state.mideleg)); case CSR_SATP: { + reg_t reg_val = 0; + reg_t rv64_ppn_mask = (reg_t(1) << (MAX_PADDR_BITS - PGSHIFT)) - 1; mmu->flush_tlb(); if (max_xlen == 32) - state.satp = val & (SATP32_PPN | SATP32_MODE); + reg_val = val & (SATP32_PPN | SATP32_MODE); if (max_xlen == 64 && (get_field(val, SATP64_MODE) == SATP_MODE_OFF || get_field(val, SATP64_MODE) == SATP_MODE_SV39 || get_field(val, SATP64_MODE) == SATP_MODE_SV48)) - state.satp = val & (SATP64_PPN | SATP64_MODE); + reg_val = val & (SATP64_PPN | SATP64_MODE | rv64_ppn_mask); + if (state.v) + state.vsatp = reg_val; + else + state.satp = reg_val; break; } - case CSR_SEPC: state.sepc = val & ~(reg_t)1; break; - case CSR_STVEC: state.stvec = val >> 2 << 2; break; - case CSR_SSCRATCH: state.sscratch = val; break; - case CSR_SCAUSE: state.scause = val; break; - case CSR_STVAL: state.stval = val; break; + case CSR_SEPC: + if (state.v) + state.vsepc = val & ~(reg_t)1; + else + state.sepc = val & ~(reg_t)1; + break; + case CSR_STVEC: + if (state.v) + state.vstvec = val & ~(reg_t)2; + else + state.stvec = val & ~(reg_t)2; + break; + case CSR_SSCRATCH: + if (state.v) + state.vsscratch = val; + else + state.sscratch = val; + break; + case CSR_SCAUSE: + if (state.v) + state.vscause = val; + else + state.scause = val; + break; + case CSR_STVAL: + if (state.v) + state.vstval = val; + else + state.stval = val; + break; case CSR_MEPC: state.mepc = val & ~(reg_t)1; break; case CSR_MTVEC: state.mtvec = val & ~(reg_t)2; break; case CSR_MSCRATCH: state.mscratch = val; break; case CSR_MCAUSE: state.mcause = val; break; case CSR_MTVAL: state.mtval = val; break; + case CSR_MTVAL2: state.mtval2 = val; break; + case CSR_MTINST: state.mtinst = val; break; case CSR_MISA: { // the write is ignored if increasing IALIGN would misalign the PC if (!(val & (1L << ('C' - 'A'))) && (state.pc & 2)) @@ -495,9 +1038,118 @@ void processor_t::set_csr(int which, reg_t val) mask |= 1L << ('F' - 'A'); mask |= 1L << ('D' - 'A'); mask |= 1L << ('C' - 'A'); + mask |= 1L << ('H' - 'A'); mask &= max_isa; state.misa = (val & mask) | (state.misa & ~mask); + + // update the forced bits in MIDELEG + if (supports_extension('H')) + state.mideleg |= MIDELEG_FORCED_MASK; + else + state.mideleg &= ~MIDELEG_FORCED_MASK; + break; + } + case CSR_HSTATUS: { + reg_t mask = HSTATUS_VTSR | HSTATUS_VTW | HSTATUS_VTVM | + HSTATUS_HU | HSTATUS_SPVP | HSTATUS_SPV | HSTATUS_GVA; + state.hstatus = (state.hstatus & ~mask) | (val & mask); + break; + } + case CSR_HEDELEG: { + reg_t mask = + (1 << CAUSE_MISALIGNED_FETCH) | + (1 << CAUSE_BREAKPOINT) | + (1 << CAUSE_MISALIGNED_LOAD) | + (1 << CAUSE_LOAD_ACCESS) | + (1 << CAUSE_MISALIGNED_STORE) | + (1 << CAUSE_STORE_ACCESS) | + (1 << CAUSE_USER_ECALL) | + (1 << CAUSE_FETCH_PAGE_FAULT) | + (1 << CAUSE_LOAD_PAGE_FAULT) | + (1 << CAUSE_STORE_PAGE_FAULT); + state.hedeleg = (state.hedeleg & ~mask) | (val & mask); + break; + } + case CSR_HIDELEG: { + reg_t mask = MIP_VS_MASK; + state.hideleg = (state.hideleg & ~mask) | (val & mask); + break; + } + case CSR_HIE: { + reg_t mask = MIP_HS_MASK; + state.mie = (state.mie & ~mask) | (val & mask); + break; + } + case CSR_HCOUNTEREN: + state.hcounteren = val; + break; + case CSR_HGEIE: + /* Ignore */ + break; + case CSR_HTVAL: + state.htinst = val; + break; + case CSR_HIP: { + reg_t mask = MIP_VSSIP; + state.mip = (state.mip & ~mask) | (val & mask); + break; + } + case CSR_HVIP: { + reg_t mask = MIP_VS_MASK; + state.mip = (state.mip & ~mask) | (val & mask); + break; + } + case CSR_HTINST: + state.htinst = val; + break; + case CSR_HGATP: { + reg_t reg_val = 0; + reg_t rv64_ppn_mask = (reg_t(1) << (MAX_PADDR_BITS - PGSHIFT)) - 1; + mmu->flush_tlb(); + if (max_xlen == 32) + reg_val = val & (HGATP32_PPN | HGATP32_MODE); + if (max_xlen == 64 && (get_field(val, HGATP64_MODE) == HGATP_MODE_OFF || + get_field(val, HGATP64_MODE) == HGATP_MODE_SV39X4 || + get_field(val, HGATP64_MODE) == HGATP_MODE_SV48X4)) + reg_val = val & (HGATP64_PPN | HGATP64_MODE | rv64_ppn_mask); + state.hgatp = reg_val; + break; + } + case CSR_VSSTATUS: { + reg_t mask = SSTATUS_VS_MASK; + mask |= (supports_extension('F') ? SSTATUS_FS : 0); + mask |= (supports_extension('V') ? SSTATUS_VS : 0); + mask |= (xlen == 64 ? SSTATUS64_SD : SSTATUS32_SD); + state.vsstatus = (state.vsstatus & ~mask) | (val & mask); + break; + } + case CSR_VSIE: { + reg_t mask = state.hideleg & MIP_VS_MASK; + state.mie = (state.mie & ~mask) | ((val << 1) & mask); + break; + } + case CSR_VSTVEC: state.vstvec = val & ~(reg_t)2; break; + case CSR_VSSCRATCH: state.vsscratch = val; break; + case CSR_VSEPC: state.vsepc = val & ~(reg_t)1; break; + case CSR_VSCAUSE: state.vscause = val; break; + case CSR_VSTVAL: state.vstval = val; break; + case CSR_VSIP: { + reg_t mask = state.hideleg & MIP_VSSIP; + state.mip = (state.mip & ~mask) | ((val << 1) & mask); + break; + } + case CSR_VSATP: { + reg_t reg_val = 0; + reg_t rv64_ppn_mask = (reg_t(1) << (MAX_PADDR_BITS - PGSHIFT)) - 1; + mmu->flush_tlb(); + if (max_xlen == 32) + reg_val = val & (SATP32_PPN | SATP32_MODE); + if (max_xlen == 64 && (get_field(val, SATP64_MODE) == SATP_MODE_OFF || + get_field(val, SATP64_MODE) == SATP_MODE_SV39 || + get_field(val, SATP64_MODE) == SATP_MODE_SV48)) + reg_val = val & (SATP64_PPN | SATP64_MODE | rv64_ppn_mask); + state.vsatp = reg_val; break; } case CSR_TSELECT: @@ -508,7 +1160,7 @@ void processor_t::set_csr(int which, reg_t val) case CSR_TDATA1: { mcontrol_t *mc = &state.mcontrol[state.tselect]; - if (mc->dmode && !state.dcsr.cause) { + if (mc->dmode && !state.debug_mode) { break; } mc->dmode = get_field(val, MCONTROL_DMODE(xlen)); @@ -531,7 +1183,7 @@ void processor_t::set_csr(int which, reg_t val) } break; case CSR_TDATA2: - if (state.mcontrol[state.tselect].dmode && !state.dcsr.cause) { + if (state.mcontrol[state.tselect].dmode && !state.debug_mode) { break; } if (state.tselect < state.num_triggers) { @@ -551,47 +1203,180 @@ void processor_t::set_csr(int which, reg_t val) case CSR_DPC: state.dpc = val & ~(reg_t)1; break; - case CSR_DSCRATCH: - state.dscratch = val; + case CSR_DSCRATCH0: + state.dscratch0 = val; + break; + case CSR_DSCRATCH1: + state.dscratch1 = val; + break; + case CSR_VSTART: + dirty_vs_state; + VU.vstart = val & (VU.get_vlen() - 1); + break; + case CSR_VXSAT: + dirty_vs_state; + VU.vxsat = val & 0x1ul; + break; + case CSR_VXRM: + dirty_vs_state; + VU.vxrm = val & 0x3ul; + break; + // xpulphwloop + case CSR_LPSTART0: + hwLoops.set_start(0, val); + break; + case CSR_LPEND0: + hwLoops.set_end(0, val); + break; + case CSR_LPCOUNT0: + hwLoops.set_count(0, val); + break; + case CSR_LPSTART1: + hwLoops.set_start(1, val); + break; + case CSR_LPEND1: + hwLoops.set_end(1, val); + break; + case CSR_LPCOUNT1: + hwLoops.set_count(1, val); + break; + } + +#if defined(RISCV_ENABLE_COMMITLOG) + switch (which) + { + case CSR_FFLAGS: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_FFLAGS); + break; + case CSR_FRM: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_FRM); + break; + case CSR_FCSR: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_FFLAGS); + LOG_CSR(CSR_FRM); + break; + case CSR_VCSR: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_VXSAT); + LOG_CSR(CSR_VXRM); + break; + + case CSR_VSTART: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_VSTART); + break; + case CSR_VXSAT: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_VXSAT); + break; + case CSR_VXRM: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_VXRM); + break; + + case CSR_SSTATUS: + LOG_CSR(CSR_MSTATUS); + LOG_CSR(CSR_SSTATUS); + break; + case CSR_SIP: + LOG_CSR(CSR_MIP); + LOG_CSR(CSR_SIP); + break; + case CSR_SIE: + LOG_CSR(CSR_MIE); + LOG_CSR(CSR_SIE); + break; + + case CSR_MSTATUS: + case CSR_MIP: + case CSR_MIE: + case CSR_MIDELEG: + case CSR_MEDELEG: + case CSR_MINSTRET: + case CSR_MCYCLE: + case CSR_MINSTRETH: + case CSR_MCYCLEH: + case CSR_SCOUNTEREN: + case CSR_MCOUNTEREN: + case CSR_SATP: + case CSR_SEPC: + case CSR_STVEC: + case CSR_SSCRATCH: + case CSR_SCAUSE: + case CSR_STVAL: + case CSR_MEPC: + case CSR_MTVEC: + case CSR_MSCRATCH: + case CSR_MCAUSE: + case CSR_MTVAL: + case CSR_MISA: + case CSR_TSELECT: + case CSR_TDATA1: + case CSR_TDATA2: + case CSR_DCSR: + case CSR_DPC: + case CSR_DSCRATCH0: + case CSR_DSCRATCH1: + LOG_CSR(which); break; } +#endif } // Note that get_csr is sometimes called when read side-effects should not // be actioned. In other words, Spike cannot currently support CSRs with // side effects on reads. -reg_t processor_t::get_csr(int which) +reg_t processor_t::get_csr(int which, insn_t insn, bool write, bool peek) { uint32_t ctr_en = -1; if (state.prv < PRV_M) ctr_en &= state.mcounteren; + if (state.v) + ctr_en &= state.hcounteren; if (state.prv < PRV_S) ctr_en &= state.scounteren; bool ctr_ok = (ctr_en >> (which & 31)) & 1; + reg_t res = 0; +#define ret(n) do { \ + res = (n); \ + goto out; \ + } while (false) + if (ctr_ok) { if (which >= CSR_HPMCOUNTER3 && which <= CSR_HPMCOUNTER31) - return 0; + ret(0); if (xlen == 32 && which >= CSR_HPMCOUNTER3H && which <= CSR_HPMCOUNTER31H) - return 0; + ret(0); } if (which >= CSR_MHPMCOUNTER3 && which <= CSR_MHPMCOUNTER31) - return 0; + ret(0); if (xlen == 32 && which >= CSR_MHPMCOUNTER3H && which <= CSR_MHPMCOUNTER31H) - return 0; + ret(0); if (which >= CSR_MHPMEVENT3 && which <= CSR_MHPMEVENT31) - return 0; - - if (which >= CSR_PMPADDR0 && which < CSR_PMPADDR0 + state.n_pmp) - return state.pmpaddr[which - CSR_PMPADDR0]; + ret(0); + + if (which >= CSR_PMPADDR0 && which < CSR_PMPADDR0 + state.max_pmp) { + // If n_pmp is zero, that means pmp is not implemented hence raise trap if it tries to access the csr + if (n_pmp == 0) + goto throw_illegal; + reg_t i = which - CSR_PMPADDR0; + if ((state.pmpcfg[i] & PMP_A) >= PMP_NAPOT) + ret(state.pmpaddr[i] | (~pmp_tor_mask() >> 1)); + else + ret(state.pmpaddr[i] & pmp_tor_mask()); + } - if (which >= CSR_PMPCFG0 && which < CSR_PMPCFG0 + state.n_pmp / 4) { + if (which >= CSR_PMPCFG0 && which < CSR_PMPCFG0 + state.max_pmp / 4) { require((which & ((xlen / 32) - 1)) == 0); - reg_t res = 0; - for (size_t i0 = (which - CSR_PMPCFG0) * 4, i = i0; i < i0 + xlen / 8 && i < state.n_pmp; i++) - res |= reg_t(state.pmpcfg[i]) << (8 * (i - i0)); - return res; + reg_t cfg_res = 0; + for (size_t i0 = (which - CSR_PMPCFG0) * 4, i = i0; i < i0 + xlen / 8 && i < state.max_pmp; i++) + cfg_res |= reg_t(state.pmpcfg[i]) << (8 * (i - i0)); + ret(cfg_res); } switch (which) @@ -600,76 +1385,191 @@ reg_t processor_t::get_csr(int which) require_fp; if (!supports_extension('F')) break; - return state.fflags; + ret(state.fflags); case CSR_FRM: require_fp; if (!supports_extension('F')) break; - return state.frm; + ret(state.frm); case CSR_FCSR: require_fp; if (!supports_extension('F')) break; - return (state.fflags << FSR_AEXC_SHIFT) | (state.frm << FSR_RD_SHIFT); + ret((state.fflags << FSR_AEXC_SHIFT) | (state.frm << FSR_RD_SHIFT)); + case CSR_VCSR: + require_vector_vs; + if (!supports_extension('V')) + break; + ret((VU.vxsat << VCSR_VXSAT_SHIFT) | (VU.vxrm << VCSR_VXRM_SHIFT)); case CSR_INSTRET: case CSR_CYCLE: if (ctr_ok) - return state.minstret; + ret(state.minstret); + if (state.v && + ((state.mcounteren >> (which & 31)) & 1) && + !((state.hcounteren >> (which & 31)) & 1)) { + goto throw_virtual; + } break; case CSR_MINSTRET: case CSR_MCYCLE: - return state.minstret; + ret(state.minstret); case CSR_INSTRETH: case CSR_CYCLEH: if (ctr_ok && xlen == 32) - return state.minstret >> 32; + ret(state.minstret >> 32); + if (state.v && + ((state.mcounteren >> (which & 31)) & 1) && + !((state.hcounteren >> (which & 31)) & 1)) { + goto throw_virtual; + } break; case CSR_MINSTRETH: case CSR_MCYCLEH: if (xlen == 32) - return state.minstret >> 32; + ret(state.minstret >> 32); break; - case CSR_SCOUNTEREN: return state.scounteren; - case CSR_MCOUNTEREN: return state.mcounteren; + case CSR_SCOUNTEREN: ret(state.scounteren); + case CSR_MCOUNTEREN: + if (!supports_extension('U')) + break; + ret(state.mcounteren); + case CSR_MCOUNTINHIBIT: ret(0); case CSR_SSTATUS: { reg_t mask = SSTATUS_SIE | SSTATUS_SPIE | SSTATUS_SPP | SSTATUS_FS + | (supports_extension('V') ? SSTATUS_VS : 0) | SSTATUS_XS | SSTATUS_SUM | SSTATUS_MXR | SSTATUS_UXL; reg_t sstatus = state.mstatus & mask; if ((sstatus & SSTATUS_FS) == SSTATUS_FS || (sstatus & SSTATUS_XS) == SSTATUS_XS) sstatus |= (xlen == 32 ? SSTATUS32_SD : SSTATUS64_SD); - return sstatus; + ret(sstatus); } - case CSR_SIP: return state.mip & state.mideleg; - case CSR_SIE: return state.mie & state.mideleg; - case CSR_SEPC: return state.sepc & pc_alignment_mask(); - case CSR_STVAL: return state.stval; - case CSR_STVEC: return state.stvec; - case CSR_SCAUSE: - if (max_xlen > xlen) - return state.scause | ((state.scause >> (max_xlen-1)) << (xlen-1)); - return state.scause; - case CSR_SATP: - if (get_field(state.mstatus, MSTATUS_TVM)) + case CSR_SIP: { + if (state.v) { + ret((state.mip & state.hideleg & MIP_VS_MASK) >> 1); + } else { + ret(state.mip & state.mideleg & ~MIP_HS_MASK); + } + } + case CSR_SIE: { + if (state.v) { + ret((state.mie & state.hideleg & MIP_VS_MASK) >> 1); + } else { + ret(state.mie & state.mideleg & ~MIP_HS_MASK); + } + } + case CSR_SEPC: { + if (state.v) { + ret(state.vsepc & pc_alignment_mask()); + } else { + ret(state.sepc & pc_alignment_mask()); + } + } + case CSR_STVAL: { + if (state.v) { + ret(state.vstval); + } else { + ret(state.stval); + } + } + case CSR_STVEC: { + if (state.v) { + ret(state.vstvec); + } else { + ret(state.stvec); + } + } + case CSR_SCAUSE: { + if (state.v) { + if (max_xlen > xlen) + ret(state.vscause | ((state.vscause >> (max_xlen-1)) << (xlen-1))); + ret(state.vscause); + } else { + if (max_xlen > xlen) + ret(state.scause | ((state.scause >> (max_xlen-1)) << (xlen-1))); + ret(state.scause); + } + } + case CSR_SATP: { + if (state.v) { + if (get_field(state.hstatus, HSTATUS_VTVM)) + goto throw_virtual; + ret(state.vsatp); + } else { + if (get_field(state.mstatus, MSTATUS_TVM)) + require_privilege(PRV_M); + ret(state.satp); + } + } + case CSR_SSCRATCH: { + if (state.v) { + ret(state.vsscratch); + } else { + ret(state.sscratch); + } + } + case CSR_MSTATUS: ret(state.mstatus); + case CSR_MIP: ret(state.mip); + case CSR_MIE: ret(state.mie); + case CSR_MEPC: ret(state.mepc & pc_alignment_mask()); + case CSR_MSCRATCH: ret(state.mscratch); + case CSR_MCAUSE: ret(state.mcause); + case CSR_MTVAL: ret(state.mtval); + case CSR_MTVAL2: + if (supports_extension('H')) + ret(state.mtval2); + break; + case CSR_MTINST: + if (supports_extension('H')) + ret(state.mtinst); + break; + case CSR_MISA: ret(state.misa); + case CSR_MARCHID: ret(5); + case CSR_MIMPID: ret(0); + case CSR_MVENDORID: ret(0); + case CSR_MHARTID: ret(id); + case CSR_MTVEC: ret(state.mtvec); + case CSR_MEDELEG: + if (!supports_extension('S')) + break; + ret(state.medeleg); + case CSR_MIDELEG: + if (!supports_extension('S')) + break; + ret(state.mideleg); + case CSR_HSTATUS: ret(state.hstatus); + case CSR_HEDELEG: ret(state.hedeleg); + case CSR_HIDELEG: ret(state.hideleg); + case CSR_HIE: ret(state.mie & MIP_HS_MASK); + case CSR_HCOUNTEREN: ret(state.hcounteren); + case CSR_HGEIE: ret(0); + case CSR_HTVAL: ret(state.htval); + case CSR_HIP: ret(state.mip & MIP_HS_MASK); + case CSR_HVIP: ret(state.mip & MIP_VS_MASK); + case CSR_HTINST: ret(state.htinst); + case CSR_HGATP: { + if (!state.v && get_field(state.mstatus, MSTATUS_TVM)) require_privilege(PRV_M); - return state.satp; - case CSR_SSCRATCH: return state.sscratch; - case CSR_MSTATUS: return state.mstatus; - case CSR_MIP: return state.mip; - case CSR_MIE: return state.mie; - case CSR_MEPC: return state.mepc & pc_alignment_mask(); - case CSR_MSCRATCH: return state.mscratch; - case CSR_MCAUSE: return state.mcause; - case CSR_MTVAL: return state.mtval; - case CSR_MISA: return state.misa; - case CSR_MARCHID: return 5; - case CSR_MIMPID: return 0; - case CSR_MVENDORID: return 0; - case CSR_MHARTID: return id; - case CSR_MTVEC: return state.mtvec; - case CSR_MEDELEG: return state.medeleg; - case CSR_MIDELEG: return state.mideleg; - case CSR_TSELECT: return state.tselect; + ret(state.hgatp); + } + case CSR_HGEIP: ret(0); + case CSR_VSSTATUS: { + reg_t mask = SSTATUS_VS_MASK; + mask |= (supports_extension('F') ? SSTATUS_FS : 0); + mask |= (supports_extension('V') ? SSTATUS_VS : 0); + mask |= (xlen == 64 ? SSTATUS64_SD : SSTATUS32_SD); + ret(state.vsstatus & mask); + } + case CSR_VSIE: ret((state.mie & state.hideleg & MIP_VS_MASK) >> 1); + case CSR_VSTVEC: ret(state.vstvec); + case CSR_VSSCRATCH: ret(state.vsscratch); + case CSR_VSEPC: ret(state.vsepc & pc_alignment_mask()); + case CSR_VSCAUSE: ret(state.vscause); + case CSR_VSTVAL: ret(state.vstval); + case CSR_VSIP: ret((state.mip & state.hideleg & MIP_VS_MASK) >> 1); + case CSR_VSATP: ret(state.vsatp); + case CSR_TSELECT: ret(state.tselect); case CSR_TDATA1: if (state.tselect < state.num_triggers) { reg_t v = 0; @@ -689,21 +1589,23 @@ reg_t processor_t::get_csr(int which) v = set_field(v, MCONTROL_EXECUTE, mc->execute); v = set_field(v, MCONTROL_STORE, mc->store); v = set_field(v, MCONTROL_LOAD, mc->load); - return v; + ret(v); } else { - return 0; + ret(0); } break; case CSR_TDATA2: if (state.tselect < state.num_triggers) { - return state.tdata2[state.tselect]; + ret(state.tdata2[state.tselect]); } else { - return 0; + ret(0); } break; - case CSR_TDATA3: return 0; + case CSR_TDATA3: ret(0); case CSR_DCSR: { + if (!state.debug_mode) + break; uint32_t v = 0; v = set_field(v, DCSR_XDEBUGVER, 1); v = set_field(v, DCSR_EBREAKM, state.dcsr.ebreakm); @@ -715,19 +1617,109 @@ reg_t processor_t::get_csr(int which) v = set_field(v, DCSR_CAUSE, state.dcsr.cause); v = set_field(v, DCSR_STEP, state.dcsr.step); v = set_field(v, DCSR_PRV, state.dcsr.prv); - return v; + ret(v); } case CSR_DPC: - return state.dpc & pc_alignment_mask(); - case CSR_DSCRATCH: - return state.dscratch; + if (!state.debug_mode) + break; + ret(state.dpc & pc_alignment_mask()); + case CSR_DSCRATCH0: + if (!state.debug_mode) + break; + ret(state.dscratch0); + case CSR_DSCRATCH1: + if (!state.debug_mode) + break; + ret(state.dscratch1); + case CSR_VSTART: + require_vector_vs; + if (!supports_extension('V')) + break; + ret(VU.vstart); + case CSR_VXSAT: + require_vector_vs; + if (!supports_extension('V')) + break; + ret(VU.vxsat); + case CSR_VXRM: + require_vector_vs; + if (!supports_extension('V')) + break; + ret(VU.vxrm); + case CSR_VL: + require_vector_vs; + if (!supports_extension('V')) + break; + ret(VU.vl); + case CSR_VTYPE: + require_vector_vs; + if (!supports_extension('V')) + break; + ret(VU.vtype); + case CSR_VLENB: + require_vector_vs; + if (!supports_extension('V')) + break; + ret(VU.vlenb); + // xpulphwloop + case CSR_LPSTART0: + ret(state.lpstart0); + break; + case CSR_LPEND0: + ret(state.lpend0); + break; + case CSR_LPCOUNT0: + ret(state.lpcount0); + break; + case CSR_LPSTART1: + ret(state.lpstart1); + break; + case CSR_LPEND1: + ret(state.lpend1); + break; + case CSR_LPCOUNT1: + ret(state.lpcount1); + break; + } + +#undef ret + + // If we get here, the CSR doesn't exist. Unimplemented CSRs always throw + // illegal-instruction exceptions, not virtual-instruction exceptions. +throw_illegal: + throw trap_illegal_instruction(insn.bits()); + +throw_virtual: + throw trap_virtual_instruction(insn.bits()); + +out: + // Check permissions. Raise virtual-instruction exception if V=1, + // privileges are insufficient, and the CSR belongs to supervisor or + // hypervisor. Raise illegal-instruction exception otherwise. + + if (peek) + return res; + + unsigned csr_priv = get_field(which, 0x300); + bool csr_read_only = get_field(which, 0xC00) == 3; + unsigned priv = state.prv == PRV_S && !state.v ? PRV_HS : state.prv; + + if ((csr_priv == PRV_S && !supports_extension('S')) || + (csr_priv == PRV_HS && !supports_extension('H'))) + goto throw_illegal; + + if ((write && csr_read_only) || priv < csr_priv) { + if (state.v && csr_priv <= PRV_HS) + goto throw_virtual; + goto throw_illegal; } - throw trap_illegal_instruction(0); + + return res; } reg_t illegal_instruction(processor_t* p, insn_t insn, reg_t pc) { - throw trap_illegal_instruction(0); + throw trap_illegal_instruction(insn.bits()); } insn_func_t processor_t::decode_insn(insn_t insn) @@ -784,8 +1776,11 @@ void processor_t::register_extension(extension_t* x) for (auto insn : x->get_instructions()) register_insn(insn); build_opcode_map(); - for (auto disasm_insn : x->get_disasms()) - disassembler->add_insn(disasm_insn); + + if (disassembler) + for (auto disasm_insn : x->get_disasms()) + disassembler->add_insn(disasm_insn); + if (ext != NULL) throw std::logic_error("only one extension may be registered"); ext = x; @@ -858,3 +1853,92 @@ void processor_t::trigger_updated() } } } + + +// PULP HW-Loop extension (xpulphwloop) + +// sets activate flag for loops and overall unit +void processor_t::hwLoopUnit_t::set_active(int i) +{ + bool valid_body = get_start(i) < get_end(i); + lp_active[i] = valid_body && (get_count(i) > 0); + any_active |= lp_active[i]; + + // Constraint: loop body must be at least 3 instructions + if(lp_active[i] && (get_start(i) +8 > get_end(i))) { + throw trap_illegal_instruction(0); + } +} + +// CSR write methods with the side-effects (set activate, check constraints) +void processor_t::hwLoopUnit_t::set_start(int loopNr, reg_t val) { + if(loopNr) { + p->state.lpstart1 = val; + } else { + p->state.lpstart0 = val; + } + set_active(loopNr); +} + +void processor_t::hwLoopUnit_t::set_end(int loopNr, reg_t val) { + if(loopNr) { + p->state.lpend1 = val; + } else { + p->state.lpend0 = val; + } + // Constraint: outer-end must be at least 2 instructions after inner-end + if(p->state.lpend0 +8 > p->state.lpend1) { + throw trap_illegal_instruction(0); + } + set_active(loopNr); +} + +void processor_t::hwLoopUnit_t::set_count(int loopNr, reg_t val) { + if(loopNr) { + p->state.lpcount1 = val; + } else { + p->state.lpcount0 = val; + } + set_active(loopNr); +} + +// Executed after execution of instruction +// pc: current pc (state.pc) +// npc: next pc (from instruction) +// returns next pc (can be modifed due to hw-loop) +reg_t processor_t::hwLoopUnit_t::handle_loops(reg_t pc, reg_t npc, insn_t insn) +{ + // immediately break if inactive as to not degrade performance + if(!any_active) { + return npc; + } + + for(int i=0; i<2; i++) { + // active and in body + if(lp_active[i] && get_start(i) <= pc && pc <= get_end(i)) { + // Constraints: if not met -> throw trap_illegal_instruction(insn.bits()) + // no compressed instructions + if(insn.length() < 4) { + throw trap_illegal_instruction(insn.bits()); + } + // Todo: Finish constraint checks + // no unconditional jumps + // no conditional branching + // no priviliged instructions except ebreak + // no memory ordering (fence) instr + + if(pc == get_end(i)) { + reg_t remaining = get_count(i) -1; + set_count(i, remaining); + lp_active[i] &= (remaining > 0); + any_active = lp_active[0] || lp_active[1]; + + if(lp_active[i]) { + return get_start(i); + } + } + } + } + + return npc; +} diff --git a/riscv/processor.h b/riscv/processor.h index de0be7829b..70f54bed71 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -8,7 +8,9 @@ #include "trap.h" #include #include +#include #include +#include #include "debug_rom_defines.h" class processor_t; @@ -27,11 +29,11 @@ struct insn_desc_t insn_func_t rv64; }; -struct commit_log_reg_t -{ - reg_t addr; - freg_t data; -}; +// regnum, data +typedef std::unordered_map commit_log_reg_t; + +// addr, value, size +typedef std::vector> commit_log_mem_t; typedef struct { @@ -83,6 +85,68 @@ typedef struct bool load; } mcontrol_t; +enum VRM{ + RNU = 0, + RNE, + RDN, + ROD, + INVALID_RM +}; + +template +struct type_usew_t; + +template<> +struct type_usew_t<8> +{ + using type=uint8_t; +}; + +template<> +struct type_usew_t<16> +{ + using type=uint16_t; +}; + +template<> +struct type_usew_t<32> +{ + using type=uint32_t; +}; + +template<> +struct type_usew_t<64> +{ + using type=uint64_t; +}; + +template +struct type_sew_t; + +template<> +struct type_sew_t<8> +{ + using type=int8_t; +}; + +template<> +struct type_sew_t<16> +{ + using type=int16_t; +}; + +template<> +struct type_sew_t<32> +{ + using type=int32_t; +}; + +template<> +struct type_sew_t<64> +{ + using type=int64_t; +}; + // architectural state of a RISC-V hart struct state_t { @@ -96,6 +160,7 @@ struct state_t // control and status registers reg_t prv; // TODO: Can this be an enum instead? + bool v; reg_t misa; reg_t mstatus; reg_t mepc; @@ -116,19 +181,47 @@ struct state_t reg_t stvec; reg_t satp; reg_t scause; + + reg_t mtval2; + reg_t mtinst; + reg_t hstatus; + reg_t hideleg; + reg_t hedeleg; + uint32_t hcounteren; + reg_t htval; + reg_t htinst; + reg_t hgatp; + reg_t vsstatus; + reg_t vstvec; + reg_t vsscratch; + reg_t vsepc; + reg_t vscause; + reg_t vstval; + reg_t vsatp; + reg_t dpc; - reg_t dscratch; + reg_t dscratch0, dscratch1; dcsr_t dcsr; reg_t tselect; mcontrol_t mcontrol[num_triggers]; reg_t tdata2[num_triggers]; + bool debug_mode; - static const int n_pmp = 16; - uint8_t pmpcfg[n_pmp]; - reg_t pmpaddr[n_pmp]; + static const int max_pmp = 16; + uint8_t pmpcfg[max_pmp]; + reg_t pmpaddr[max_pmp]; uint32_t fflags; uint32_t frm; + + // xpulphwloop + reg_t lpstart0; + reg_t lpend0; + reg_t lpcount0; + reg_t lpstart1; + reg_t lpend1; + reg_t lpcount1; + bool serialized; // whether timer CSRs are in a well-defined state // When true, execute a single instruction and then enter debug mode. This @@ -141,6 +234,8 @@ struct state_t #ifdef RISCV_ENABLE_COMMITLOG commit_log_reg_t log_reg_write; + commit_log_mem_t log_mem_read; + commit_log_mem_t log_mem_write; reg_t last_inst_priv; int last_inst_xlen; int last_inst_flen; @@ -153,6 +248,12 @@ typedef enum { OPERATION_LOAD, } trigger_operation_t; +typedef enum { + // 65('A') ~ 90('Z') is reserved for standard isa in misa + EXT_ZFH = 0, + EXT_ZVEDIV, +} isa_extension_t; + // Count number of contiguous 1 bits starting from the LSB. static int cto(reg_t val) { @@ -166,15 +267,22 @@ static int cto(reg_t val) class processor_t : public abstract_device_t { public: - processor_t(const char* isa, simif_t* sim, uint32_t id, bool halt_on_reset=false); + processor_t(const char* isa, const char* priv, const char* varch, + simif_t* sim, uint32_t id, bool halt_on_reset, + FILE *log_file); ~processor_t(); void set_debug(bool value); void set_histogram(bool value); +#ifdef RISCV_ENABLE_COMMITLOG + void enable_log_commits(); + bool get_log_commits_enabled() const { return log_commits_enabled; } +#endif void reset(); void step(size_t n); // run for n cycles void set_csr(int which, reg_t val); - reg_t get_csr(int which); + reg_t get_csr(int which, insn_t insn, bool write, bool peek = 0); + reg_t get_csr(int which) { return get_csr(which, insn_t(0), false, true); } mmu_t* get_mmu() { return mmu; } state_t* get_state() { return &state; } unsigned get_xlen() { return xlen; } @@ -187,21 +295,26 @@ class processor_t : public abstract_device_t } extension_t* get_extension() { return ext; } bool supports_extension(unsigned char ext) { - if (ext >= 'a' && ext <= 'z') ext += 'A' - 'a'; - return ext >= 'A' && ext <= 'Z' && ((state.misa >> (ext - 'A')) & 1); + if (ext >= 'A' && ext <= 'Z') + return ((state.misa >> (ext - 'A')) & 1); + else + return extension_table[ext]; } reg_t pc_alignment_mask() { return ~(reg_t)(supports_extension('C') ? 0 : 2); } void check_pc_alignment(reg_t pc) { if (unlikely(pc & ~pc_alignment_mask())) - throw trap_instruction_address_misaligned(pc); + throw trap_instruction_address_misaligned(pc, 0, 0); } reg_t legalize_privilege(reg_t); void set_privilege(reg_t); + void set_virt(bool); void update_histogram(reg_t pc); const disassembler_t* get_disassembler() { return disassembler; } + FILE *get_log_file() { return log_file; } + void register_insn(insn_desc_t); void register_extension(extension_t*); @@ -213,13 +326,17 @@ class processor_t : public abstract_device_t bool debug; // When true, take the slow simulation path. bool slow_path(); - bool halted() { return state.dcsr.cause ? true : false; } - bool halt_request; + bool halted() { return state.debug_mode; } + enum { + HR_NONE, /* Halt request is inactive. */ + HR_REGULAR, /* Regular halt request/debug interrupt. */ + HR_GROUP /* Halt requested due to halt group. */ + } halt_request; // Return the index of a trigger that matched, or -1. inline int trigger_match(trigger_operation_t operation, reg_t address, reg_t data) { - if (state.dcsr.cause) + if (state.debug_mode) return -1; bool chain_ok = true; @@ -259,7 +376,7 @@ class processor_t : public abstract_device_t break; case MATCH_NAPOT: { - reg_t mask = ~((1 << cto(state.tdata2[i])) - 1); + reg_t mask = ~((1 << (cto(state.tdata2[i])+1)) - 1); if ((value & mask) != (state.tdata2[i] & mask)) continue; } @@ -298,6 +415,11 @@ class processor_t : public abstract_device_t void trigger_updated(); + void set_pmp_num(reg_t pmp_num); + void set_pmp_granularity(reg_t pmp_granularity); + + const char* get_symbol(uint64_t addr); + private: simif_t* sim; mmu_t* mmu; // main memory is always accessed via the mmu @@ -310,7 +432,11 @@ class processor_t : public abstract_device_t reg_t max_isa; std::string isa_string; bool histogram_enabled; + bool log_commits_enabled; + FILE *log_file; bool halt_on_reset; + std::vector extension_table; + std::vector instructions; std::map pc_histogram; @@ -324,19 +450,125 @@ class processor_t : public abstract_device_t void disasm(insn_t insn); // disassemble and print an instruction int paddr_bits(); + reg_t pmp_tor_mask() { return -(reg_t(1) << (lg_pmp_granularity - PMP_SHIFT)); } + void enter_debug_mode(uint8_t cause); friend class mmu_t; friend class clint_t; friend class extension_t; - void parse_isa_string(const char* isa); + void parse_varch_string(const char*); + void parse_priv_string(const char*); + void parse_isa_string(const char*); void build_opcode_map(); void register_base_instructions(); insn_func_t decode_insn(insn_t insn); // Track repeated executions for processor_t::disasm() uint64_t last_pc, last_bits, executions; + reg_t n_pmp; + reg_t lg_pmp_granularity; + +public: + class vectorUnit_t { + public: + processor_t* p; + void *reg_file; + char reg_referenced[NVPR]; + int setvl_count; + reg_t vlmax; + reg_t vstart, vxrm, vxsat, vl, vtype, vlenb; + reg_t vma, vta; + reg_t vediv, vsew; + float vflmul; + reg_t ELEN, VLEN; + bool vill; + bool vstart_alu; + + // vector element for varies SEW + template + T& elt(reg_t vReg, reg_t n, bool is_write = false){ + assert(vsew != 0); + assert((VLEN >> 3)/sizeof(T) > 0); + reg_t elts_per_reg = (VLEN >> 3) / (sizeof(T)); + vReg += n / elts_per_reg; + n = n % elts_per_reg; +#ifdef WORDS_BIGENDIAN + // "V" spec 0.7.1 requires lower indices to map to lower significant + // bits when changing SEW, thus we need to index from the end on BE. + n ^= elts_per_reg - 1; +#endif + reg_referenced[vReg] = 1; + +#ifdef RISCV_ENABLE_COMMITLOG + if (is_write) + p->get_state()->log_reg_write[((vReg) << 4) | 2] = {0, 0}; +#endif + + T *regStart = (T*)((char*)reg_file + vReg * (VLEN >> 3)); + return regStart[n]; + } + public: + + void reset(); + + vectorUnit_t(){ + reg_file = 0; + } + + ~vectorUnit_t(){ + free(reg_file); + reg_file = 0; + } + + reg_t set_vl(int rd, int rs1, reg_t reqVL, reg_t newType); + + reg_t get_vlen() { return VLEN; } + reg_t get_elen() { return ELEN; } + reg_t get_slen() { return VLEN; } + + VRM get_vround_mode() { + return (VRM)vxrm; + } + }; + + vectorUnit_t VU; + + + // PULP HW-Loop extension (xpulphwloop) + class hwLoopUnit_t { + public: + processor_t* p; + + bool lp_active[2]; + bool any_active; // set if any group of count, start and end are valid + + hwLoopUnit_t() : + p(0) { + any_active = false; + } + + void set_active(int i); // handles exceptions and sets active-flags + + reg_t handle_loops(reg_t pc, reg_t npc, insn_t insn); // returns npc + + // Control and Status Register access (callable by instructions) + // It woul also be possible to use p->get_csr here but since + // hwloop doesn't have side effect this is fine and faster + reg_t get_start(int lpNr) { return (lpNr) ? p->state.lpstart1 : p->state.lpstart0; } + reg_t get_end(int lpNr) { return (lpNr) ? p->state.lpend1 : p->state.lpend0; } + reg_t get_count(int lpNr) { return (lpNr) ? p->state.lpcount1 : p->state.lpcount0; } + + // also used in set_csr() to make sure csr-write commands also + // check for constraints and throw exception + // setters take insn to throw illegal_instruction + void set_start(int loopNr, reg_t val); + void set_end(int loopNr, reg_t val); + void set_count(int loopNr, reg_t val); + }; + hwLoopUnit_t hwLoops; + }; reg_t illegal_instruction(processor_t* p, insn_t insn, reg_t pc); diff --git a/riscv/remote_bitbang.cc b/riscv/remote_bitbang.cc index 21306dd166..8453e85abd 100644 --- a/riscv/remote_bitbang.cc +++ b/riscv/remote_bitbang.cc @@ -5,6 +5,13 @@ #include #include +#ifndef AF_INET +#include +#endif +#ifndef INADDR_ANY +#include +#endif + #include #include #include diff --git a/riscv/riscv.ac b/riscv/riscv.ac index 68bcdb55d1..64693e9144 100644 --- a/riscv/riscv.ac +++ b/riscv/riscv.ac @@ -6,21 +6,24 @@ AC_ARG_WITH(isa, AC_DEFINE_UNQUOTED([DEFAULT_ISA], "$withval", [Default value for --isa switch]), AC_DEFINE_UNQUOTED([DEFAULT_ISA], "RV64IMAFDC", [Default value for --isa switch])) -AC_SEARCH_LIBS([dlopen], [dl dld], [], [ - AC_MSG_ERROR([unable to find the dlopen() function]) +AC_ARG_WITH(priv, + [AS_HELP_STRING([--with-priv=MSU], + [Sets the default RISC-V privilege modes supported])], + AC_DEFINE_UNQUOTED([DEFAULT_PRIV], "$withval", [Default value for --priv switch]), + AC_DEFINE_UNQUOTED([DEFAULT_PRIV], "MSU", [Default value for --priv switch])) + +AC_ARG_WITH(varch, + [AS_HELP_STRING([--with-varch=vlen:128,elen:64,slen:128], + [Sets the default vector config])], + AC_DEFINE_UNQUOTED([DEFAULT_VARCH], "$withval", [Default value for --varch switch]), + AC_DEFINE_UNQUOTED([DEFAULT_VARCH], ["vlen:128,elen:64,slen:128"], [Default value for --varch switch])) + + +AC_SEARCH_LIBS([dlopen], [dl dld], [ + AC_DEFINE([HAVE_DLOPEN], [], [Dynamic library loading is supported]), + AC_SUBST([HAVE_DLOPEN], [yes]) ]) -AC_ARG_WITH([fesvr], - [AS_HELP_STRING([--with-fesvr], - [path to your fesvr installation if not in a standard location])], - [ - LDFLAGS="-L$withval/lib $LDFLAGS" - CPPFLAGS="-I$withval/include $CPPFLAGS" - ] -) - -AC_CHECK_LIB(fesvr, libfesvr_is_present, [], [AC_MSG_ERROR([libfesvr is required])], [-pthread]) - AC_CHECK_LIB(pthread, pthread_create, [], [AC_MSG_ERROR([libpthread is required])]) AC_ARG_ENABLE([commitlog], AS_HELP_STRING([--enable-commitlog], [Enable commit log generation])) diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 80755e711c..d547a1efac 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -2,10 +2,13 @@ get_insn_list = $(shell grep ^DECLARE_INSN $(1) | sed 's/DECLARE_INSN(\(.*\),.*, get_opcode = $(shell grep ^DECLARE_INSN.*\\\<$(2)\\\> $(1) | sed 's/DECLARE_INSN(.*,\(.*\),.*)/\1/') riscv_subproject_deps = \ + fdt \ softfloat \ riscv_install_prog_srcs = \ +riscv_CFLAGS = -fPIC + riscv_hdrs = \ common.h \ decode.h \ @@ -20,16 +23,18 @@ riscv_hdrs = \ encoding.h \ cachesim.h \ memtracer.h \ + mmio_plugin.h \ tracer.h \ extension.h \ rocc.h \ insn_template.h \ - mulhi.h \ debug_module.h \ debug_rom_defines.h \ remote_bitbang.h \ jtag_dtm.h \ +riscv_install_hdrs = mmio_plugin.h + riscv_precompiled_hdrs = \ insn_template.h \ @@ -46,7 +51,6 @@ riscv_srcs = \ extension.cc \ extensions.cc \ rocc.cc \ - regnames.cc \ devices.cc \ rom.cc \ clint.cc \ @@ -61,11 +65,61 @@ riscv_gen_hdrs = \ icache.h \ insn_list.h \ -riscv_insn_list = \ + +riscv_insn_ext_i = \ add \ addi \ addiw \ addw \ + and \ + andi \ + auipc \ + beq \ + bge \ + bgeu \ + blt \ + bltu \ + bne \ + jal \ + jalr \ + lb \ + lbu \ + ld \ + lh \ + lhu \ + lui \ + lw \ + lwu \ + or \ + ori \ + sb \ + sd \ + sh \ + sll \ + slli \ + slliw \ + sllw \ + slt \ + slti \ + sltiu \ + sltu \ + sra \ + srai \ + sraiw \ + sraw \ + srl \ + srli \ + srliw \ + srlw \ + sub \ + subw \ + sw \ + xor \ + xori \ + fence \ + fence_i \ + +riscv_insn_ext_a = \ amoadd_d \ amoadd_w \ amoand_d \ @@ -84,18 +138,15 @@ riscv_insn_list = \ amoswap_w \ amoxor_d \ amoxor_w \ - and \ - andi \ - auipc \ - beq \ - bge \ - bgeu \ - blt \ - bltu \ - bne \ + lr_d \ + lr_w \ + sc_d \ + sc_w \ + +riscv_insn_ext_c = \ c_add \ - c_addi4spn \ c_addi \ + c_addi4spn \ c_addw \ c_and \ c_andi \ @@ -110,9 +161,9 @@ riscv_insn_list = \ c_fsdsp \ c_fsw \ c_fswsp \ + c_j \ c_jal \ c_jalr \ - c_j \ c_jr \ c_li \ c_lui \ @@ -125,28 +176,60 @@ riscv_insn_list = \ c_srli \ c_sub \ c_subw \ - c_xor \ - csrrc \ - csrrci \ - csrrs \ - csrrsi \ - csrrw \ - csrrwi \ c_sw \ c_swsp \ + c_xor \ + +riscv_insn_ext_m = \ div \ divu \ divuw \ divw \ - dret \ - ebreak \ - ecall \ - fadd_d \ - fadd_q \ + mul \ + mulh \ + mulhsu \ + mulhu \ + mulw \ + rem \ + remu \ + remuw \ + remw \ + +riscv_insn_ext_f = \ fadd_s \ - fclass_d \ - fclass_q \ fclass_s \ + fcvt_l_s \ + fcvt_lu_s \ + fcvt_s_l \ + fcvt_s_lu \ + fcvt_s_w \ + fcvt_s_wu \ + fcvt_w_s \ + fcvt_wu_s \ + fdiv_s \ + feq_s \ + fle_s \ + flt_s \ + flw \ + fmadd_s \ + fmax_s \ + fmin_s \ + fmsub_s \ + fmul_s \ + fmv_w_x \ + fmv_x_w \ + fnmadd_s \ + fnmsub_s \ + fsgnj_s \ + fsgnjn_s \ + fsgnjx_s \ + fsqrt_s \ + fsub_s \ + fsw \ + +riscv_insn_ext_d = \ + fadd_d \ + fclass_d \ fcvt_d_l \ fcvt_d_lu \ fcvt_d_q \ @@ -154,142 +237,952 @@ riscv_insn_list = \ fcvt_d_w \ fcvt_d_wu \ fcvt_l_d \ - fcvt_l_q \ - fcvt_l_s \ fcvt_lu_d \ + fcvt_s_d \ + fcvt_w_d \ + fcvt_wu_d \ + fdiv_d \ + feq_d \ + fld \ + fle_d \ + flt_d \ + fmadd_d \ + fmax_d \ + fmin_d \ + fmsub_d \ + fmul_d \ + fmv_d_x \ + fmv_x_d \ + fnmadd_d \ + fnmsub_d \ + fsd \ + fsgnj_d \ + fsgnjn_d \ + fsgnjx_d \ + fsqrt_d \ + fsub_d \ + +riscv_insn_ext_zfh = \ + fadd_h \ + fclass_h \ + fcvt_l_h \ + fcvt_lu_h \ + fcvt_d_h \ + fcvt_h_d \ + fcvt_h_l \ + fcvt_h_lu \ + #fcvt_h_q \ + fcvt_h_s \ + fcvt_h_w \ + fcvt_h_wu \ + #fcvt_q_h \ + fcvt_s_h \ + fcvt_w_h \ + fcvt_wu_h \ + fdiv_h \ + feq_h \ + fle_h \ + flh \ + flt_h \ + fmadd_h \ + fmax_h \ + fmin_h \ + fmsub_h \ + fmul_h \ + fmv_h_x \ + fmv_x_h \ + fnmadd_h \ + fnmsub_h \ + fsgnj_h \ + fsgnjn_h \ + fsgnjx_h \ + fsh \ + fsqrt_h \ + fsub_h \ + +riscv_insn_ext_q = \ + fadd_q \ + fclass_q \ + fcvt_l_q \ fcvt_lu_q \ - fcvt_lu_s \ fcvt_q_d \ fcvt_q_l \ fcvt_q_lu \ fcvt_q_s \ fcvt_q_w \ fcvt_q_wu \ - fcvt_s_d \ - fcvt_s_l \ - fcvt_s_lu \ fcvt_s_q \ - fcvt_s_w \ - fcvt_s_wu \ - fcvt_w_d \ fcvt_w_q \ - fcvt_w_s \ - fcvt_wu_d \ fcvt_wu_q \ - fcvt_wu_s \ - fdiv_d \ fdiv_q \ - fdiv_s \ - fence \ - fence_i \ - feq_d \ feq_q \ - feq_s \ - fld \ - fle_d \ fle_q \ - fle_s \ flq \ - flt_d \ flt_q \ - flt_s \ - flw \ - fmadd_d \ fmadd_q \ - fmadd_s \ - fmax_d \ fmax_q \ - fmax_s \ - fmin_d \ fmin_q \ - fmin_s \ - fmsub_d \ fmsub_q \ - fmsub_s \ - fmul_d \ fmul_q \ - fmul_s \ - fmv_d_x \ - fmv_w_x \ - fmv_x_d \ - fmv_x_w \ - fnmadd_d \ fnmadd_q \ - fnmadd_s \ - fnmsub_d \ fnmsub_q \ - fnmsub_s \ - fsd \ - fsgnj_d \ fsgnj_q \ - fsgnjn_d \ fsgnjn_q \ - fsgnjn_s \ - fsgnj_s \ - fsgnjx_d \ fsgnjx_q \ - fsgnjx_s \ fsq \ - fsqrt_d \ fsqrt_q \ - fsqrt_s \ - fsub_d \ fsub_q \ - fsub_s \ - fsw \ - jal \ - jalr \ - lb \ - lbu \ - ld \ - lh \ - lhu \ - lr_d \ - lr_w \ - lui \ - lw \ - lwu \ + +# Disabled riscv_insn_ext_v_alu_int instructions for opcode overlap: +#vasubu_vx +#vslide1up_vx +#vaaddu_vx +#vadc_vvm +#vadc_vxm +#vsbc_vvm +#vsbc_vxm +#vmulhu_vx +#vdivu_vx +#vmulhsu_vx + +riscv_insn_ext_v_alu_int = \ + vaadd_vv \ + vaaddu_vv \ + vaadd_vx \ + vadc_vim \ + vadd_vi \ + vadd_vv \ + vadd_vx \ + vand_vi \ + vand_vv \ + vand_vx \ + vasub_vv \ + vasubu_vv \ + vasub_vx \ + vcompress_vm \ + vdiv_vv \ + vdiv_vx \ + vdivu_vv \ + vdot_vv \ + vdotu_vv \ + vid_v \ + viota_m \ + vmacc_vv \ + vmacc_vx \ + vmadc_vim \ + vmadc_vvm \ + vmadc_vxm \ + vmadd_vv \ + vmadd_vx \ + vmand_mm \ + vmandnot_mm \ + vmax_vv \ + vmax_vx \ + vmaxu_vv \ + vmaxu_vx \ + vmerge_vim \ + vmerge_vvm \ + vmerge_vxm \ + vfirst_m \ + vmin_vv \ + vmin_vx \ + vminu_vv \ + vminu_vx \ + vmnand_mm \ + vmnor_mm \ + vmor_mm \ + vmornot_mm \ + vpopc_m \ + vmsbc_vvm \ + vmsbc_vxm \ + vmsbf_m \ + vmseq_vi \ + vmseq_vv \ + vmseq_vx \ + vmsgt_vi \ + vmsgt_vx \ + vmsgtu_vi \ + vmsgtu_vx \ + vmsif_m \ + vmsle_vi \ + vmsle_vv \ + vmsle_vx \ + vmsleu_vi \ + vmsleu_vv \ + vmsleu_vx \ + vmslt_vv \ + vmslt_vx \ + vmsltu_vv \ + vmsltu_vx \ + vmsne_vi \ + vmsne_vv \ + vmsne_vx \ + vmsof_m \ + vmul_vv \ + vmul_vx \ + vmulh_vv \ + vmulh_vx \ + vmulhsu_vv \ + vmulhu_vv \ + vmv_s_x \ + vmv_v_i \ + vmv_v_v \ + vmv_v_x \ + vmv_x_s \ + vmv1r_v \ + vmv2r_v \ + vmv4r_v \ + vmv8r_v \ + vmxnor_mm \ + vmxor_mm \ + vnclip_wi \ + vnclip_wv \ + vnclip_wx \ + vnclipu_wi \ + vnclipu_wv \ + vnclipu_wx \ + vnmsac_vv \ + vnmsac_vx \ + vnmsub_vv \ + vnmsub_vx \ + vnsra_wi \ + vnsra_wv \ + vnsra_wx \ + vnsrl_wi \ + vnsrl_wv \ + vnsrl_wx \ + vor_vi \ + vor_vv \ + vor_vx \ + vredand_vs \ + vredmax_vs \ + vredmaxu_vs \ + vredmin_vs \ + vredminu_vs \ + vredor_vs \ + vredsum_vs \ + vredxor_vs \ + vrem_vv \ + vrem_vx \ + vremu_vv \ + vremu_vx \ + vrgather_vi \ + vrgather_vv \ + vrgather_vx \ + vrgatherei16_vv \ + vrsub_vi \ + vrsub_vx \ + vsadd_vi \ + vsadd_vv \ + vsadd_vx \ + vsaddu_vi \ + vsaddu_vv \ + vsaddu_vx \ + vsext_vf2 \ + vsext_vf4 \ + vsext_vf8 \ + vslide1down_vx \ + vslidedown_vi \ + vslidedown_vx \ + vslideup_vi \ + vslideup_vx \ + vsll_vi \ + vsll_vv \ + vsll_vx \ + vsmul_vv \ + vsmul_vx \ + vsra_vi \ + vsra_vv \ + vsra_vx \ + vsrl_vi \ + vsrl_vv \ + vsrl_vx \ + vssra_vi \ + vssra_vv \ + vssra_vx \ + vssrl_vi \ + vssrl_vv \ + vssrl_vx \ + vssub_vv \ + vssub_vx \ + vssubu_vv \ + vssubu_vx \ + vsub_vv \ + vsub_vx \ + vwadd_vv \ + vwadd_vx \ + vwadd_wv \ + vwadd_wx \ + vwaddu_vv \ + vwaddu_vx \ + vwaddu_wv \ + vwaddu_wx \ + vwmacc_vv \ + vwmacc_vx \ + vwmaccsu_vv \ + vwmaccsu_vx \ + vwmaccu_vv \ + vwmaccu_vx \ + vwmaccus_vx \ + vwmul_vv \ + vwmul_vx \ + vwmulsu_vv \ + vwmulsu_vx \ + vwmulu_vv \ + vwmulu_vx \ + vwredsum_vs \ + vwredsumu_vs \ + vwsub_vv \ + vwsub_vx \ + vwsub_wv \ + vwsub_wx \ + vwsubu_vv \ + vwsubu_vx \ + vwsubu_wv \ + vwsubu_wx \ + vxor_vi \ + vxor_vv \ + vxor_vx \ + vzext_vf2 \ + vzext_vf4 \ + vzext_vf8 \ + +# Disabled riscv_insn_ext_v_alu_fp instructions for opcode overlap: +#vfcvt_x_f_v + +riscv_insn_ext_v_alu_fp = \ + vfadd_vf \ + vfadd_vv \ + vfclass_v \ + vfcvt_f_x_v \ + vfcvt_f_xu_v \ + vfcvt_rtz_x_f_v \ + vfcvt_rtz_xu_f_v \ + vfcvt_xu_f_v \ + vfdiv_vf \ + vfdiv_vv \ + vfdot_vv \ + vfmacc_vf \ + vfmacc_vv \ + vfmadd_vf \ + vfmadd_vv \ + vfmax_vf \ + vfmax_vv \ + vfmerge_vfm \ + vfmin_vf \ + vfmin_vv \ + vfmsac_vf \ + vfmsac_vv \ + vfmsub_vf \ + vfmsub_vv \ + vfmul_vf \ + vfmul_vv \ + vfmv_f_s \ + vfmv_s_f \ + vfmv_v_f \ + vfncvt_f_f_w \ + vfncvt_f_x_w \ + vfncvt_f_xu_w \ + vfncvt_rod_f_f_w \ + vfncvt_rtz_x_f_w \ + vfncvt_rtz_xu_f_w \ + vfncvt_x_f_w \ + vfncvt_xu_f_w \ + vfnmacc_vf \ + vfnmacc_vv \ + vfnmadd_vf \ + vfnmadd_vv \ + vfnmsac_vf \ + vfnmsac_vv \ + vfnmsub_vf \ + vfnmsub_vv \ + vfrdiv_vf \ + vfredmax_vs \ + vfredmin_vs \ + vfredosum_vs \ + vfredsum_vs \ + vfrece7_v \ + vfrsub_vf \ + vfrsqrte7_v \ + vfsgnj_vf \ + vfsgnj_vv \ + vfsgnjn_vf \ + vfsgnjn_vv \ + vfsgnjx_vf \ + vfsgnjx_vv \ + vfsqrt_v \ + vfslide1down_vf \ + vfslide1up_vf \ + vfsub_vf \ + vfsub_vv \ + vfwadd_vf \ + vfwadd_vv \ + vfwadd_wf \ + vfwadd_wv \ + vfwcvt_f_f_v \ + vfwcvt_f_x_v \ + vfwcvt_f_xu_v \ + vfwcvt_rtz_x_f_v \ + vfwcvt_rtz_xu_f_v \ + vfwcvt_x_f_v \ + vfwcvt_xu_f_v \ + vfwmacc_vf \ + vfwmacc_vv \ + vfwmsac_vf \ + vfwmsac_vv \ + vfwmul_vf \ + vfwmul_vv \ + vfwnmacc_vf \ + vfwnmacc_vv \ + vfwnmsac_vf \ + vfwnmsac_vv \ + vfwredosum_vs \ + vfwredsum_vs \ + vfwsub_vf \ + vfwsub_vv \ + vfwsub_wf \ + vfwsub_wv \ + vmfeq_vf \ + vmfeq_vv \ + vmfge_vf \ + vmfgt_vf \ + vmfle_vf \ + vmfle_vv \ + vmflt_vf \ + vmflt_vv \ + vmfne_vf \ + vmfne_vv \ + +riscv_insn_ext_v_amo = \ + vamoswapei8_v \ + vamoaddei8_v \ + vamoandei8_v \ + vamomaxei8_v \ + vamomaxuei8_v \ + vamominei8_v \ + vamominuei8_v \ + vamoorei8_v \ + vamoxorei8_v \ + vamoswapei16_v \ + vamoaddei16_v \ + vamoandei16_v \ + vamomaxei16_v \ + vamomaxuei16_v \ + vamominei16_v \ + vamominuei16_v \ + vamoorei16_v \ + vamoxorei16_v \ + vamoswapei32_v \ + vamoaddei32_v \ + vamoandei32_v \ + vamomaxei32_v \ + vamomaxuei32_v \ + vamominei32_v \ + vamominuei32_v \ + vamoorei32_v \ + vamoxorei32_v \ + vamoswapei64_v \ + vamoaddei64_v \ + vamoandei64_v \ + vamomaxei64_v \ + vamomaxuei64_v \ + vamominei64_v \ + vamominuei64_v \ + vamoorei64_v \ + vamoxorei64_v \ + +riscv_insn_ext_v_ldst = \ + vle8_v \ + vle16_v \ + vle32_v \ + vle64_v \ + vlse8_v \ + vlse16_v \ + vlse32_v \ + vlse64_v \ + vlxei8_v \ + vlxei16_v \ + vlxei32_v \ + vlxei64_v \ + vle8ff_v \ + vle16ff_v \ + vle32ff_v \ + vle64ff_v \ + vl1re8_v \ + vl2re8_v \ + vl4re8_v \ + vl8re8_v \ + vl1re16_v \ + vl2re16_v \ + vl4re16_v \ + vl8re16_v \ + vl1re32_v \ + vl2re32_v \ + vl4re32_v \ + vl8re32_v \ + vl1re64_v \ + vl2re64_v \ + vl4re64_v \ + vl8re64_v \ + vse8_v \ + vse16_v \ + vse32_v \ + vse64_v \ + vsse8_v \ + vsse16_v \ + vsse32_v \ + vsse64_v \ + vsxei8_v \ + vsxei16_v \ + vsxei32_v \ + vsxei64_v \ + vsuxei8_v \ + vsuxei16_v \ + vsuxei32_v \ + vsuxei64_v \ + vs1r_v \ + vs2r_v \ + vs4r_v \ + vs8r_v \ + +# Disabled riscv_insn_ext_v_ctrl instructions for opcode overlap: +#vsetvl + +riscv_insn_ext_v_ctrl = \ + vsetvli \ + +riscv_insn_ext_v = \ + $(riscv_insn_ext_v_alu_fp) \ + $(riscv_insn_ext_v_alu_int) \ + $(riscv_insn_ext_v_amo) \ + $(riscv_insn_ext_v_ctrl) \ + $(riscv_insn_ext_v_ldst) \ + + +riscv_insn_ext_pulphwloop = \ + lp_starti \ + lp_endi \ + lp_count \ + lp_counti \ + lp_setup \ + lp_setupi \ + +riscv_insn_ext_pulppostmod = \ + p_lb_irpost \ + p_lbu_irpost \ + p_lh_irpost \ + p_lhu_irpost \ + p_lw_irpost \ + p_lb_rrpost \ + p_lbu_rrpost \ + p_lh_rrpost \ + p_lhu_rrpost \ + p_lw_rrpost \ + p_lb_rr \ + p_lbu_rr \ + p_lh_rr \ + p_lhu_rr \ + p_lw_rr \ + p_sb_irpost \ + p_sh_irpost \ + p_sw_irpost \ + p_sb_rrpost \ + p_sh_rrpost \ + p_sw_rrpost \ + p_sb_rr \ + p_sh_rr \ + p_sw_rr \ + +riscv_insn_ext_pulpabs = \ + p_abs \ + +riscv_insn_ext_pulpslet = \ + p_slet \ + p_sletu \ + +riscv_insn_ext_pulpmacsi = \ + p_mac \ + p_msu \ + +riscv_insn_ext_pulpmulrnhi = \ + p_mulsN \ + p_mulsRN \ + p_muluN \ + p_muluRN \ + p_mulhhsN \ + p_mulhhuN \ + p_mulhhsRN \ + p_mulhhuRN \ + +riscv_insn_ext_pulpmacrnhi = \ + p_macsN \ + p_macuN \ + p_macsRN \ + p_macuRN \ + p_machhsN \ + p_machhuN \ + p_machhsRN \ + p_machhuRN \ + +riscv_insn_ext_pulppartmac = \ + p_macs \ + p_macu \ + p_machhs \ + p_machhu \ + +riscv_insn_ext_pulpminmax = \ + p_min \ + p_max \ + p_minu \ + p_maxu \ + +riscv_insn_ext_pulpbitopsmall = \ + p_cnt \ + p_clb \ + p_fl1 \ + p_ff1 \ + p_ror \ + p_exths \ + p_exthz \ + p_extbs \ + p_extbz \ + +riscv_insn_ext_pulpbitop = \ + $(riscv_insn_ext_pulpbitopsmall) \ + p_extract \ + p_extractr \ + p_extractu \ + p_extractur \ + p_insert \ + p_insertr \ + p_bset \ + p_bsetr \ + p_bclr \ + p_bclrr \ + +riscv_insn_ext_pulpvect = \ + pv_add_h \ + pv_add_sc_h \ + pv_add_sci_h \ + pv_add_b \ + pv_add_sc_b \ + pv_add_sci_b \ + pv_add_h_div2 \ + pv_add_h_div4 \ + pv_add_h_div8 \ + pv_sub_h \ + pv_sub_sc_h \ + pv_sub_sci_h \ + pv_sub_b \ + pv_sub_sc_b \ + pv_sub_sci_b \ + pv_sub_h_div2 \ + pv_sub_h_div4 \ + pv_sub_h_div8 \ + pv_avg_h \ + pv_avg_sc_h \ + pv_avg_sci_h \ + pv_avg_b \ + pv_avg_sc_b \ + pv_avg_sci_b \ + pv_avgu_h \ + pv_avgu_sc_h \ + pv_avgu_sci_h \ + pv_avgu_b \ + pv_avgu_sc_b \ + pv_avgu_sci_b \ + pv_min_h \ + pv_min_sc_h \ + pv_min_sci_h \ + pv_min_b \ + pv_min_sc_b \ + pv_min_sci_b \ + pv_minu_h \ + pv_minu_sc_h \ + pv_minu_sci_h \ + pv_minu_b \ + pv_minu_sc_b \ + pv_minu_sci_b \ + pv_max_h \ + pv_max_sc_h \ + pv_max_sci_h \ + pv_max_b \ + pv_max_sc_b \ + pv_max_sci_b \ + pv_maxu_h \ + pv_maxu_sc_h \ + pv_maxu_sci_h \ + pv_maxu_b \ + pv_maxu_sc_b \ + pv_maxu_sci_b \ + pv_srl_h \ + pv_srl_sc_h \ + pv_srl_sci_h \ + pv_srl_b \ + pv_srl_sc_b \ + pv_srl_sci_b \ + pv_sra_h \ + pv_sra_sc_h \ + pv_sra_sci_h \ + pv_sra_b \ + pv_sra_sc_b \ + pv_sra_sci_b \ + pv_sll_h \ + pv_sll_sc_h \ + pv_sll_sci_h \ + pv_sll_b \ + pv_sll_sc_b \ + pv_sll_sci_b \ + pv_or_h \ + pv_or_sc_h \ + pv_or_sci_h \ + pv_or_b \ + pv_or_sc_b \ + pv_or_sci_b \ + pv_xor_h \ + pv_xor_sc_h \ + pv_xor_sci_h \ + pv_xor_b \ + pv_xor_sc_b \ + pv_xor_sci_b \ + pv_and_h \ + pv_and_sc_h \ + pv_and_sci_h \ + pv_and_b \ + pv_and_sc_b \ + pv_and_sci_b \ + pv_abs_h \ + pv_abs_b \ + pv_extract_h \ + pv_extract_b \ + pv_extractu_h \ + pv_extractu_b \ + pv_insert_h \ + pv_insert_b \ + pv_dotup_h \ + pv_dotup_sc_h \ + pv_dotup_sci_h \ + pv_dotup_b \ + pv_dotup_sc_b \ + pv_dotup_sci_b \ + pv_dotusp_h \ + pv_dotusp_sc_h \ + pv_dotusp_sci_h \ + pv_dotusp_b \ + pv_dotusp_sc_b \ + pv_dotusp_sci_b \ + pv_dotsp_h \ + pv_dotsp_sc_h \ + pv_dotsp_sci_h \ + pv_dotsp_b \ + pv_dotsp_sc_b \ + pv_dotsp_sci_b \ + pv_sdotup_h \ + pv_sdotup_sc_h \ + pv_sdotup_sci_h \ + pv_sdotup_b \ + pv_sdotup_sc_b \ + pv_sdotup_sci_b \ + pv_sdotusp_h \ + pv_sdotusp_sc_h \ + pv_sdotusp_sci_h \ + pv_sdotusp_b \ + pv_sdotusp_sc_b \ + pv_sdotusp_sci_b \ + pv_sdotsp_h \ + pv_sdotsp_sc_h \ + pv_sdotsp_sci_h \ + pv_sdotsp_b \ + pv_sdotsp_sc_b \ + pv_sdotsp_sci_b \ + pv_cmpeq_h \ + pv_cmpeq_sc_h \ + pv_cmpeq_sci_h \ + pv_cmpeq_b \ + pv_cmpeq_sc_b \ + pv_cmpeq_sci_b \ + pv_cmpne_h \ + pv_cmpne_sc_h \ + pv_cmpne_sci_h \ + pv_cmpne_b \ + pv_cmpne_sc_b \ + pv_cmpne_sci_b \ + pv_cmpgt_h \ + pv_cmpgt_sc_h \ + pv_cmpgt_sci_h \ + pv_cmpgt_b \ + pv_cmpgt_sc_b \ + pv_cmpgt_sci_b \ + pv_cmpge_h \ + pv_cmpge_sc_h \ + pv_cmpge_sci_h \ + pv_cmpge_b \ + pv_cmpge_sc_b \ + pv_cmpge_sci_b \ + pv_cmplt_h \ + pv_cmplt_sc_h \ + pv_cmplt_sci_h \ + pv_cmplt_b \ + pv_cmplt_sc_b \ + pv_cmplt_sci_b \ + pv_cmple_h \ + pv_cmple_sc_h \ + pv_cmple_sci_h \ + pv_cmple_b \ + pv_cmple_sc_b \ + pv_cmple_sci_b \ + pv_cmpgtu_h \ + pv_cmpgtu_sc_h \ + pv_cmpgtu_sci_h \ + pv_cmpgtu_b \ + pv_cmpgtu_sc_b \ + pv_cmpgtu_sci_b \ + pv_cmpgeu_h \ + pv_cmpgeu_sc_h \ + pv_cmpgeu_sci_h \ + pv_cmpgeu_b \ + pv_cmpgeu_sc_b \ + pv_cmpgeu_sci_b \ + pv_cmpltu_h \ + pv_cmpltu_sc_h \ + pv_cmpltu_sci_h \ + pv_cmpltu_b \ + pv_cmpltu_sc_b \ + pv_cmpltu_sci_b \ + pv_cmpleu_h \ + pv_cmpleu_sc_h \ + pv_cmpleu_sci_h \ + pv_cmpleu_b \ + pv_cmpleu_sc_b \ + pv_cmpleu_sci_b \ + +riscv_insn_ext_pulpvectcomplex = \ + pv_cplxconj_h \ + pv_subrotmj_h \ + pv_subrotmj_h_div2 \ + pv_subrotmj_h_div4 \ + pv_subrotmj_h_div8 \ + pv_cplxmul_h_r \ + pv_cplxmul_h_r_div2 \ + pv_cplxmul_h_r_div4 \ + pv_cplxmul_h_r_div8 \ + pv_cplxmul_h_i \ + pv_cplxmul_h_i_div2 \ + pv_cplxmul_h_i_div4 \ + pv_cplxmul_h_i_div8 \ + +riscv_insn_ext_pulpvectshufflepack = \ + pv_shuffle_h \ + pv_shuffle_sci_h \ + pv_shuffle_b \ + pv_shufflei0_sci_b \ + pv_shufflei1_sci_b \ + pv_shufflei2_sci_b \ + pv_shufflei3_sci_b \ + pv_shuffle2_h \ + pv_shuffle2_b \ + pv_pack \ + pv_pack_h \ + pv_packhi_b \ + pv_packlo_b \ + +riscv_insn_ext_pulpclip = \ + p_clip \ + p_clipu \ + p_clipr \ + p_clipur \ + +riscv_insn_ext_pulpaddsubrn = \ + p_addN \ + p_adduN \ + p_addRN \ + p_adduRN \ + p_subN \ + p_subuN \ + p_subRN \ + p_subuRN \ + p_addNr \ + p_adduNr \ + p_addRNr \ + p_adduRNr \ + p_subNr \ + p_subuNr \ + p_subRNr \ + p_subuRNr \ + +riscv_insn_ext_pulpbr = \ + p_beqimm \ + p_bneimm \ + +riscv_insn_ext_pulpbitrev = \ + p_bitrev \ + +riscv_insn_ext_pulpimg = \ + $(riscv_insn_ext_pulphwloop) \ + $(riscv_insn_ext_pulppostmod) \ + $(riscv_insn_ext_pulpabs) \ + $(riscv_insn_ext_pulpslet) \ + $(riscv_insn_ext_pulpmacsi) \ + $(riscv_insn_ext_pulpmulrnhi) \ + $(riscv_insn_ext_pulpmacrnhi) \ + $(riscv_insn_ext_pulpminmax) \ + $(riscv_insn_ext_pulpbitop) \ + $(riscv_insn_ext_pulpvect) \ + $(riscv_insn_ext_pulpvectcomplex) \ + $(riscv_insn_ext_pulpvectshufflepack) \ + $(riscv_insn_ext_pulpclip) \ + $(riscv_insn_ext_pulpaddsubrn) \ + $(riscv_insn_ext_pulpbr) \ + $(riscv_insn_ext_pulpbitrev) \ +# $(riscv_insn_ext_pulppartmac) \ + + +riscv_insn_ext_h = \ + hfence_gvma \ + hfence_vvma \ + hlv_b \ + hlv_bu \ + hlv_h \ + hlv_hu \ + hlvx_hu \ + hlv_w \ + hlv_wu \ + hlvx_wu \ + hlv_d \ + hsv_b \ + hsv_h \ + hsv_w \ + hsv_d \ + +riscv_insn_priv = \ + csrrc \ + csrrci \ + csrrs \ + csrrsi \ + csrrw \ + csrrwi \ + dret \ + ebreak \ + ecall \ mret \ - mul \ - mulh \ - mulhsu \ - mulhu \ - mulw \ - or \ - ori \ - rem \ - remu \ - remuw \ - remw \ - sb \ - sc_d \ - sc_w \ - sd \ sfence_vma \ - sh \ - sll \ - slli \ - slliw \ - sllw \ - slt \ - slti \ - sltiu \ - sltu \ - sra \ - srai \ - sraiw \ - sraw \ sret \ - srl \ - srli \ - srliw \ - srlw \ - sub \ - subw \ - sw \ wfi \ - xor \ - xori \ + + +riscv_insn_list = \ + $(riscv_insn_ext_a) \ + $(riscv_insn_ext_c) \ + $(riscv_insn_ext_i) \ + $(riscv_insn_ext_m) \ + $(riscv_insn_ext_f) \ + $(riscv_insn_ext_d) \ + $(riscv_insn_ext_zfh) \ + $(riscv_insn_ext_q) \ + $(riscv_insn_ext_pulpimg) \ + $(riscv_insn_ext_h) \ + $(riscv_insn_priv) \ + # $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ riscv_gen_srcs = \ $(addsuffix .cc,$(riscv_insn_list)) @@ -311,3 +1204,4 @@ $(riscv_gen_srcs): %.cc: insns/%.h insn_template.cc riscv_junk = \ $(riscv_gen_srcs) \ + diff --git a/riscv/sim.cc b/riscv/sim.cc index 44223a7d90..76bb3cdff0 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -4,6 +4,8 @@ #include "mmu.h" #include "dts.h" #include "remote_bitbang.h" +#include "byteorder.h" +#include #include #include #include @@ -24,42 +26,79 @@ static void handle_signal(int sig) signal(sig, &handle_signal); } -sim_t::sim_t(const char* isa, size_t nprocs, bool halted, reg_t start_pc, - std::vector> mems, +sim_t::sim_t(const char* isa, const char* priv, const char* varch, + size_t nprocs, bool halted, bool real_time_clint, + reg_t initrd_start, reg_t initrd_end, const char* bootargs, + reg_t start_pc, std::vector> mems, + std::vector> plugin_devices, const std::vector& args, - std::vector const hartids, unsigned progsize, - unsigned max_bus_master_bits, bool require_authentication) - : htif_t(args), mems(mems), procs(std::max(nprocs, size_t(1))), - start_pc(start_pc), current_step(0), current_proc(0), debug(false), - histogram_enabled(false), dtb_enabled(true), remote_bitbang(NULL), - debug_module(this, progsize, max_bus_master_bits, require_authentication) + std::vector const hartids, + const debug_module_config_t &dm_config, + const char *log_path, + bool dtb_enabled, const char *dtb_file) + : htif_t(args), + mems(mems), + plugin_devices(plugin_devices), + procs(std::max(nprocs, size_t(1))), + initrd_start(initrd_start), + initrd_end(initrd_end), + bootargs(bootargs), + start_pc(start_pc), + dtb_file(dtb_file ? dtb_file : ""), + dtb_enabled(dtb_enabled), + log_file(log_path), + current_step(0), + current_proc(0), + debug(false), + histogram_enabled(false), + log(false), + remote_bitbang(NULL), + debug_module(this, dm_config) { signal(SIGINT, &handle_signal); for (auto& x : mems) bus.add_device(x.first, x.second); + for (auto& x : plugin_devices) + bus.add_device(x.first, x.second); + debug_module.add_device(&bus); debug_mmu = new mmu_t(this, NULL); - if (hartids.size() == 0) { - for (size_t i = 0; i < procs.size(); i++) { - procs[i] = new processor_t(isa, this, i, halted); - } - } - else { - if (hartids.size() != procs.size()) { - std::cerr << "Number of specified hartids doesn't match number of processors" << strerror(errno) << std::endl; + if (! (hartids.empty() || hartids.size() == nprocs)) { + std::cerr << "Number of specified hartids (" + << hartids.size() + << ") doesn't match number of processors (" + << nprocs << ").\n"; exit(1); - } - for (size_t i = 0; i < procs.size(); i++) { - procs[i] = new processor_t(isa, this, hartids[i], halted); - } } - clint.reset(new clint_t(procs)); - bus.add_device(CLINT_BASE, clint.get()); + for (size_t i = 0; i < nprocs; i++) { + int hart_id = hartids.empty() ? i : hartids[i]; + procs[i] = new processor_t(isa, priv, varch, this, hart_id, halted, + log_file.get()); + } + + make_dtb(); + + clint.reset(new clint_t(procs, CPU_HZ / INSNS_PER_RTC_TICK, real_time_clint)); + reg_t clint_base; + if (fdt_parse_clint((void *)dtb.c_str(), &clint_base, "riscv,clint0")) { + bus.add_device(CLINT_BASE, clint.get()); + } else { + bus.add_device(clint_base, clint.get()); + } + + for (size_t i = 0; i < nprocs; i++) { + reg_t pmp_num = 0, pmp_granularity = 0; + fdt_parse_pmp_num((void *)dtb.c_str(), &pmp_num, "riscv"); + fdt_parse_pmp_alignment((void *)dtb.c_str(), &pmp_granularity, "riscv"); + + procs[i]->set_pmp_num(pmp_num); + procs[i]->set_pmp_granularity(pmp_granularity); + } } sim_t::~sim_t() @@ -125,11 +164,6 @@ void sim_t::set_debug(bool value) debug = value; } -void sim_t::set_log(bool value) -{ - log = value; -} - void sim_t::set_histogram(bool value) { histogram_enabled = value; @@ -138,27 +172,71 @@ void sim_t::set_histogram(bool value) } } +void sim_t::configure_log(bool enable_log, bool enable_commitlog) +{ + log = enable_log; + + if (!enable_commitlog) + return; + +#ifndef RISCV_ENABLE_COMMITLOG + fputs("Commit logging support has not been properly enabled; " + "please re-build the riscv-isa-sim project using " + "\"configure --enable-commitlog\".\n", + stderr); + abort(); +#else + for (processor_t *proc : procs) { + proc->enable_log_commits(); + } +#endif +} + void sim_t::set_procs_debug(bool value) { for (size_t i=0; i< procs.size(); i++) procs[i]->set_debug(value); } +static bool paddr_ok(reg_t addr) +{ + return (addr >> MAX_PADDR_BITS) == 0; +} + bool sim_t::mmio_load(reg_t addr, size_t len, uint8_t* bytes) { - if (addr + len < addr) + if (addr + len < addr || !paddr_ok(addr + len - 1)) return false; return bus.load(addr, len, bytes); } bool sim_t::mmio_store(reg_t addr, size_t len, const uint8_t* bytes) { - if (addr + len < addr) + if (addr + len < addr || !paddr_ok(addr + len - 1)) return false; return bus.store(addr, len, bytes); } void sim_t::make_dtb() +{ + if (!dtb_file.empty()) { + std::ifstream fin(dtb_file.c_str(), std::ios::binary); + if (!fin.good()) { + std::cerr << "can't find dtb file: " << dtb_file << std::endl; + exit(-1); + } + + std::stringstream strstream; + strstream << fin.rdbuf(); + + dtb = strstream.str(); + } else { + dts = make_dts(INSNS_PER_RTC_TICK, CPU_HZ, initrd_start, initrd_end, bootargs, procs, mems); + dtb = dts_compile(dts); + } +} + +void sim_t::set_rom() { const int reset_vec_size = 8; @@ -176,11 +254,27 @@ void sim_t::make_dtb() (uint32_t) (start_pc & 0xffffffff), (uint32_t) (start_pc >> 32) }; + for(int i = 0; i < reset_vec_size; i++) + reset_vec[i] = to_le(reset_vec[i]); std::vector rom((char*)reset_vec, (char*)reset_vec + sizeof(reset_vec)); - dts = make_dts(INSNS_PER_RTC_TICK, CPU_HZ, procs, mems); - std::string dtb = dts_compile(dts); + std::string dtb; + if (!dtb_file.empty()) { + std::ifstream fin(dtb_file.c_str(), std::ios::binary); + if (!fin.good()) { + std::cerr << "can't find dtb file: " << dtb_file << std::endl; + exit(-1); + } + + std::stringstream strstream; + strstream << fin.rdbuf(); + + dtb = strstream.str(); + } else { + dts = make_dts(INSNS_PER_RTC_TICK, CPU_HZ, initrd_start, initrd_end, bootargs, procs, mems); + dtb = dts_compile(dts); + } rom.insert(rom.end(), dtb.begin(), dtb.end()); const int align = 0x1000; @@ -191,6 +285,8 @@ void sim_t::make_dtb() } char* sim_t::addr_to_mem(reg_t addr) { + if (!paddr_ok(addr)) + return NULL; auto desc = bus.find_device(addr); if (auto mem = dynamic_cast(desc.second)) if (addr - desc.first < mem->size()) @@ -198,12 +294,17 @@ char* sim_t::addr_to_mem(reg_t addr) { return NULL; } +const char* sim_t::get_symbol(uint64_t addr) +{ + return htif_t::get_symbol(addr); +} + // htif void sim_t::reset() { if (dtb_enabled) - make_dtb(); + set_rom(); } void sim_t::idle() @@ -214,7 +315,7 @@ void sim_t::idle() void sim_t::read_chunk(addr_t taddr, size_t len, void* dst) { assert(len == 8); - auto data = debug_mmu->load_uint64(taddr); + auto data = to_le(debug_mmu->load_uint64(taddr)); memcpy(dst, &data, sizeof data); } @@ -223,7 +324,7 @@ void sim_t::write_chunk(addr_t taddr, size_t len, const void* src) assert(len == 8); uint64_t data; memcpy(&data, src, sizeof data); - debug_mmu->store_uint64(taddr, data); + debug_mmu->store_uint64(taddr, from_le(data)); } void sim_t::proc_reset(unsigned id) diff --git a/riscv/sim.h b/riscv/sim.h index e42808b4ae..c7e3de4f71 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -3,15 +3,18 @@ #ifndef _RISCV_SIM_H #define _RISCV_SIM_H -#include "processor.h" -#include "devices.h" #include "debug_module.h" +#include "devices.h" +#include "log_file.h" +#include "processor.h" #include "simif.h" + #include #include #include #include #include +#include class mmu_t; class remote_bitbang_t; @@ -20,21 +23,30 @@ class remote_bitbang_t; class sim_t : public htif_t, public simif_t { public: - sim_t(const char* isa, size_t _nprocs, bool halted, reg_t start_pc, - std::vector> mems, + sim_t(const char* isa, const char* priv, const char* varch, size_t _nprocs, + bool halted, bool real_time_clint, + reg_t initrd_start, reg_t initrd_end, const char* bootargs, + reg_t start_pc, std::vector> mems, + std::vector> plugin_devices, const std::vector& args, const std::vector hartids, - unsigned progsize, unsigned max_bus_master_bits, bool require_authentication); + const debug_module_config_t &dm_config, const char *log_path, + bool dtb_enabled, const char *dtb_file); ~sim_t(); // run the simulation to completion int run(); void set_debug(bool value); - void set_log(bool value); void set_histogram(bool value); + + // Configure logging + // + // If enable_log is true, an instruction trace will be generated. If + // enable_commitlog is true, so will the commit results (if this + // build was configured without support for commit logging, the + // function will print an error message and abort). + void configure_log(bool enable_log, bool enable_commitlog); + void set_procs_debug(bool value); - void set_dtb_enabled(bool value) { - this->dtb_enabled = value; - } void set_remote_bitbang(remote_bitbang_t* remote_bitbang) { this->remote_bitbang = remote_bitbang; } @@ -47,13 +59,21 @@ class sim_t : public htif_t, public simif_t private: std::vector> mems; + std::vector> plugin_devices; mmu_t* debug_mmu; // debug port into main memory std::vector procs; + reg_t initrd_start; + reg_t initrd_end; + const char* bootargs; reg_t start_pc; std::string dts; + std::string dtb; + std::string dtb_file; + bool dtb_enabled; std::unique_ptr boot_rom; std::unique_ptr clint; bus_t bus; + log_file_t log_file; processor_t* get_core(const std::string& i); void step(size_t n); // step through simulation @@ -63,9 +83,8 @@ class sim_t : public htif_t, public simif_t size_t current_step; size_t current_proc; bool debug; - bool log; bool histogram_enabled; // provide a histogram of PCs - bool dtb_enabled; + bool log; remote_bitbang_t* remote_bitbang; // memory-mapped I/O routines @@ -73,6 +92,9 @@ class sim_t : public htif_t, public simif_t bool mmio_load(reg_t addr, size_t len, uint8_t* bytes); bool mmio_store(reg_t addr, size_t len, const uint8_t* bytes); void make_dtb(); + void set_rom(); + + const char* get_symbol(uint64_t addr); // presents a prompt for introspection into the simulation void interactive(); @@ -83,8 +105,10 @@ class sim_t : public htif_t, public simif_t void interactive_run(const std::string& cmd, const std::vector& args, bool noisy); void interactive_run_noisy(const std::string& cmd, const std::vector& args); void interactive_run_silent(const std::string& cmd, const std::vector& args); + void interactive_vreg(const std::string& cmd, const std::vector& args); void interactive_reg(const std::string& cmd, const std::vector& args); void interactive_freg(const std::string& cmd, const std::vector& args); + void interactive_fregh(const std::string& cmd, const std::vector& args); void interactive_fregs(const std::string& cmd, const std::vector& args); void interactive_fregd(const std::string& cmd, const std::vector& args); void interactive_pc(const std::string& cmd, const std::vector& args); diff --git a/riscv/simif.h b/riscv/simif.h index 1d982b3396..0e75d45b16 100644 --- a/riscv/simif.h +++ b/riscv/simif.h @@ -16,6 +16,9 @@ class simif_t virtual bool mmio_store(reg_t addr, size_t len, const uint8_t* bytes) = 0; // Callback for processors to let the simulation know they were reset. virtual void proc_reset(unsigned id) = 0; + + virtual const char* get_symbol(uint64_t addr) = 0; + }; #endif diff --git a/riscv/trap.h b/riscv/trap.h index b5b8a5080a..4431d8a94e 100644 --- a/riscv/trap.h +++ b/riscv/trap.h @@ -13,18 +13,23 @@ class trap_t public: trap_t(reg_t which) : which(which) {} virtual const char* name(); + virtual bool has_gva() { return false; } virtual bool has_tval() { return false; } virtual reg_t get_tval() { return 0; } + virtual bool has_tval2() { return false; } + virtual reg_t get_tval2() { return 0; } + virtual bool has_tinst() { return false; } + virtual reg_t get_tinst() { return 0; } reg_t cause() { return which; } private: char _name[16]; reg_t which; }; -class mem_trap_t : public trap_t +class insn_trap_t : public trap_t { public: - mem_trap_t(reg_t which, reg_t tval) + insn_trap_t(reg_t which, reg_t tval) : trap_t(which), tval(tval) {} bool has_tval() override { return true; } reg_t get_tval() override { return tval; } @@ -32,32 +37,59 @@ class mem_trap_t : public trap_t reg_t tval; }; +class mem_trap_t : public trap_t +{ + public: + mem_trap_t(reg_t which, bool gva, reg_t tval, reg_t tval2, reg_t tinst) + : trap_t(which), gva(gva), tval(tval), tval2(tval2), tinst(tinst) {} + bool has_gva() override { return gva; } + bool has_tval() override { return true; } + reg_t get_tval() override { return tval; } + bool has_tval2() override { return true; } + reg_t get_tval2() override { return tval2; } + bool has_tinst() override { return true; } + reg_t get_tinst() override { return tinst; } + private: + bool gva; + reg_t tval, tval2, tinst; +}; + #define DECLARE_TRAP(n, x) class trap_##x : public trap_t { \ public: \ trap_##x() : trap_t(n) {} \ const char* name() { return "trap_"#x; } \ }; +#define DECLARE_INST_TRAP(n, x) class trap_##x : public insn_trap_t { \ + public: \ + trap_##x(reg_t tval) : insn_trap_t(n, tval) {} \ + const char* name() { return "trap_"#x; } \ +}; + #define DECLARE_MEM_TRAP(n, x) class trap_##x : public mem_trap_t { \ public: \ - trap_##x(reg_t tval) : mem_trap_t(n, tval) {} \ + trap_##x(reg_t tval, reg_t tval2, reg_t tinst) : mem_trap_t(n, true, tval, tval2, tinst) {} \ const char* name() { return "trap_"#x; } \ }; DECLARE_MEM_TRAP(CAUSE_MISALIGNED_FETCH, instruction_address_misaligned) DECLARE_MEM_TRAP(CAUSE_FETCH_ACCESS, instruction_access_fault) -DECLARE_MEM_TRAP(CAUSE_ILLEGAL_INSTRUCTION, illegal_instruction) -DECLARE_MEM_TRAP(CAUSE_BREAKPOINT, breakpoint) +DECLARE_INST_TRAP(CAUSE_ILLEGAL_INSTRUCTION, illegal_instruction) +DECLARE_INST_TRAP(CAUSE_BREAKPOINT, breakpoint) DECLARE_MEM_TRAP(CAUSE_MISALIGNED_LOAD, load_address_misaligned) DECLARE_MEM_TRAP(CAUSE_MISALIGNED_STORE, store_address_misaligned) DECLARE_MEM_TRAP(CAUSE_LOAD_ACCESS, load_access_fault) DECLARE_MEM_TRAP(CAUSE_STORE_ACCESS, store_access_fault) DECLARE_TRAP(CAUSE_USER_ECALL, user_ecall) DECLARE_TRAP(CAUSE_SUPERVISOR_ECALL, supervisor_ecall) -DECLARE_TRAP(CAUSE_HYPERVISOR_ECALL, hypervisor_ecall) +DECLARE_TRAP(CAUSE_VIRTUAL_SUPERVISOR_ECALL, virtual_supervisor_ecall) DECLARE_TRAP(CAUSE_MACHINE_ECALL, machine_ecall) DECLARE_MEM_TRAP(CAUSE_FETCH_PAGE_FAULT, instruction_page_fault) DECLARE_MEM_TRAP(CAUSE_LOAD_PAGE_FAULT, load_page_fault) DECLARE_MEM_TRAP(CAUSE_STORE_PAGE_FAULT, store_page_fault) +DECLARE_MEM_TRAP(CAUSE_FETCH_GUEST_PAGE_FAULT, instruction_guest_page_fault) +DECLARE_MEM_TRAP(CAUSE_LOAD_GUEST_PAGE_FAULT, load_guest_page_fault) +DECLARE_INST_TRAP(CAUSE_VIRTUAL_INSTRUCTION, virtual_instruction) +DECLARE_MEM_TRAP(CAUSE_STORE_GUEST_PAGE_FAULT, store_guest_page_fault) #endif diff --git a/scripts/vcs-version.sh b/scripts/vcs-version.sh index 31fae86951..692c071ef7 100755 --- a/scripts/vcs-version.sh +++ b/scripts/vcs-version.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash #========================================================================= # vcs-version.sh [options] [src-dir] #========================================================================= diff --git a/softfloat/f16_classify.c b/softfloat/f16_classify.c new file mode 100755 index 0000000000..9402ff13e8 --- /dev/null +++ b/softfloat/f16_classify.c @@ -0,0 +1,36 @@ + +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f16_classify( float16_t a ) +{ + union ui16_f16 uA; + uint_fast16_t uiA; + + uA.f = a; + uiA = uA.ui; + + uint_fast16_t infOrNaN = expF16UI( uiA ) == 0x1F; + uint_fast16_t subnormalOrZero = expF16UI( uiA ) == 0; + bool sign = signF16UI( uiA ); + bool fracZero = fracF16UI( uiA ) == 0; + bool isNaN = isNaNF16UI( uiA ); + bool isSNaN = softfloat_isSigNaNF16UI( uiA ); + + return + ( sign && infOrNaN && fracZero ) << 0 | + ( sign && !infOrNaN && !subnormalOrZero ) << 1 | + ( sign && subnormalOrZero && !fracZero ) << 2 | + ( sign && subnormalOrZero && fracZero ) << 3 | + ( !sign && infOrNaN && fracZero ) << 7 | + ( !sign && !infOrNaN && !subnormalOrZero ) << 6 | + ( !sign && subnormalOrZero && !fracZero ) << 5 | + ( !sign && subnormalOrZero && fracZero ) << 4 | + ( isNaN && isSNaN ) << 8 | + ( isNaN && !isSNaN ) << 9; +} + diff --git a/softfloat/f16_to_i16.c b/softfloat/f16_to_i16.c new file mode 100644 index 0000000000..b0fbb7cc75 --- /dev/null +++ b/softfloat/f16_to_i16.c @@ -0,0 +1,57 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +int_fast16_t f16_to_i16( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + int_fast32_t sig32 = f16_to_i32(a, roundingMode, exact); + + if (sig32 > INT16_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i16_fromPosOverflow; + } else if (sig32 < INT16_MIN) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i16_fromNegOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f16_to_i8.c b/softfloat/f16_to_i8.c new file mode 100644 index 0000000000..23638cc102 --- /dev/null +++ b/softfloat/f16_to_i8.c @@ -0,0 +1,57 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +int_fast8_t f16_to_i8( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + int_fast32_t sig32 = f16_to_i32(a, roundingMode, exact); + + if (sig32 > INT8_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i8_fromPosOverflow; + } else if (sig32 < INT8_MIN) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i8_fromNegOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f16_to_ui16.c b/softfloat/f16_to_ui16.c new file mode 100644 index 0000000000..81c4f8d9e0 --- /dev/null +++ b/softfloat/f16_to_ui16.c @@ -0,0 +1,54 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f16_to_ui16( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + uint_fast32_t sig32 = f16_to_ui32(a, roundingMode, exact); + + if (sig32 > UINT16_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return ui16_fromPosOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f16_to_ui8.c b/softfloat/f16_to_ui8.c new file mode 100644 index 0000000000..96124e1275 --- /dev/null +++ b/softfloat/f16_to_ui8.c @@ -0,0 +1,54 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +uint_fast8_t f16_to_ui8( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + uint_fast32_t sig32 = f16_to_ui32(a, roundingMode, exact); + + if (sig32 > UINT8_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return ui8_fromPosOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f32_to_i16.c b/softfloat/f32_to_i16.c new file mode 100644 index 0000000000..bde4c76c9a --- /dev/null +++ b/softfloat/f32_to_i16.c @@ -0,0 +1,57 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +int_fast16_t f32_to_i16( float32_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + int_fast32_t sig32 = f32_to_i32(a, roundingMode, exact); + + if (sig32 > INT16_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i16_fromPosOverflow; + } else if (sig32 < INT16_MIN) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i16_fromNegOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f32_to_ui16.c b/softfloat/f32_to_ui16.c new file mode 100644 index 0000000000..073492bfaa --- /dev/null +++ b/softfloat/f32_to_ui16.c @@ -0,0 +1,53 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f32_to_ui16( float32_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + uint_fast32_t sig32 = f32_to_ui32(a, roundingMode, exact); + + if (sig32 > UINT16_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return ui16_fromPosOverflow; + } else { + return sig32; + } +} diff --git a/softfloat/fall_maxmin.c b/softfloat/fall_maxmin.c new file mode 100644 index 0000000000..32a9ade59e --- /dev/null +++ b/softfloat/fall_maxmin.c @@ -0,0 +1,81 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +#define COMPARE_MAX(a, b, bits) \ +float ## bits ## _t f ## bits ## _max( float ## bits ## _t a, float ## bits ## _t b ) \ +{ \ + bool greater = f ## bits ## _lt_quiet(b, a) || \ + (f ## bits ## _eq(b, a) && signF ## bits ## UI(b.v)); \ + \ + if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) { \ + union ui ## bits ## _f ## bits ui; \ + ui.ui = defaultNaNF ## bits ## UI; \ + return ui.f; \ + } else { \ + return greater || isNaNF ## bits ## UI((b).v) ? a : b; \ + } \ +} + +#define COMPARE_MIN(a, b, bits) \ +float ## bits ## _t f ## bits ## _min( float ## bits ## _t a, float ## bits ## _t b ) \ +{ \ + bool less = f ## bits ## _lt_quiet(a, b) || \ + (f ## bits ## _eq(a, b) && signF ## bits ## UI(a.v)); \ + \ + if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) { \ + union ui ## bits ## _f ## bits ui; \ + ui.ui = defaultNaNF ## bits ## UI; \ + return ui.f; \ + } else { \ + return less || isNaNF ## bits ## UI((b).v) ? a : b; \ + } \ +} + +COMPARE_MAX(a, b, 16); +COMPARE_MAX(a, b, 32); +COMPARE_MAX(a, b, 64); + +COMPARE_MIN(a, b, 16); +COMPARE_MIN(a, b, 32); +COMPARE_MIN(a, b, 64); diff --git a/softfloat/fall_reciprocal.c b/softfloat/fall_reciprocal.c new file mode 100644 index 0000000000..1c96458935 --- /dev/null +++ b/softfloat/fall_reciprocal.c @@ -0,0 +1,392 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +static inline uint64_t extract64(uint64_t val, int pos, int len) +{ + assert(pos >= 0 && len > 0 && len <= 64 - pos); + return (val >> pos) & (~UINT64_C(0) >> (64 - len)); +} + +static inline uint64_t make_mask64(int pos, int len) +{ + assert(pos >= 0 && len > 0 && pos < 64 && len <= 64); + return (UINT64_MAX >> (64 - len)) << pos; +} + +//user needs to truncate output to required length +static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) { + uint64_t exp = extract64(val, s, e); + uint64_t sig = extract64(val, 0, s); + uint64_t sign = extract64(val, s + e, 1); + const int p = 7; + + static const uint8_t table[] = { + 52, 51, 50, 48, 47, 46, 44, 43, + 42, 41, 40, 39, 38, 36, 35, 34, + 33, 32, 31, 30, 30, 29, 28, 27, + 26, 25, 24, 23, 23, 22, 21, 20, + 19, 19, 18, 17, 16, 16, 15, 14, + 14, 13, 12, 12, 11, 10, 10, 9, + 9, 8, 7, 7, 6, 6, 5, 4, + 4, 3, 3, 2, 2, 1, 1, 0, + 127, 125, 123, 121, 119, 118, 116, 114, + 113, 111, 109, 108, 106, 105, 103, 102, + 100, 99, 97, 96, 95, 93, 92, 91, + 90, 88, 87, 86, 85, 84, 83, 82, + 80, 79, 78, 77, 76, 75, 74, 73, + 72, 71, 70, 70, 69, 68, 67, 66, + 65, 64, 63, 63, 62, 61, 60, 59, + 59, 58, 57, 56, 56, 55, 54, 53}; + + if (sub) { + while (extract64(sig, s - 1, 1) == 0) + exp--, sig <<= 1; + + sig = (sig << 1) & make_mask64(0 ,s); + } + + int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1)); + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); + uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2; + + return (sign << (s+e)) | (out_exp << s) | out_sig; +} + +float16_t f16_rsqrte7(float16_t in) +{ + union ui16_f16 uA; + + uA.f = in; + unsigned int ret = f16_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF16UI; + break; + case 0x008: // -0 + uA.ui = 0xfc00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7c00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + default: // +num + uA.ui = rsqrte7(uA.ui, 5, 10, sub); + break; + } + + return uA.f; +} + +float32_t f32_rsqrte7(float32_t in) +{ + union ui32_f32 uA; + + uA.f = in; + unsigned int ret = f32_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF32UI; + break; + case 0x008: // -0 + uA.ui = 0xff800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7f800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + default: // +num + uA.ui = rsqrte7(uA.ui, 8, 23, sub); + break; + } + + return uA.f; +} + +float64_t f64_rsqrte7(float64_t in) +{ + union ui64_f64 uA; + + uA.f = in; + unsigned int ret = f64_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF64UI; + break; + case 0x008: // -0 + uA.ui = 0xfff0000000000000ul; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7ff0000000000000ul; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + default: // +num + uA.ui = rsqrte7(uA.ui, 11, 52, sub); + break; + } + + return uA.f; +} + +//user needs to truncate output to required length +static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub, + bool *round_abnormal) +{ + uint64_t exp = extract64(val, s, e); + uint64_t sig = extract64(val, 0, s); + uint64_t sign = extract64(val, s + e, 1); + const int p = 7; + + static const uint8_t table[] = { + 127, 125, 123, 121, 119, 117, 116, 114, + 112, 110, 109, 107, 105, 104, 102, 100, + 99, 97, 96, 94, 93, 91, 90, 88, + 87, 85, 84, 83, 81, 80, 79, 77, + 76, 75, 74, 72, 71, 70, 69, 68, + 66, 65, 64, 63, 62, 61, 60, 59, + 58, 57, 56, 55, 54, 53, 52, 51, + 50, 49, 48, 47, 46, 45, 44, 43, + 42, 41, 40, 40, 39, 38, 37, 36, + 35, 35, 34, 33, 32, 31, 31, 30, + 29, 28, 28, 27, 26, 25, 25, 24, + 23, 23, 22, 21, 21, 20, 19, 19, + 18, 17, 17, 16, 15, 15, 14, 14, + 13, 12, 12, 11, 11, 10, 9, 9, + 8, 8, 7, 7, 6, 5, 5, 4, + 4, 3, 3, 2, 2, 1, 1, 0}; + + if (sub) { + while (extract64(sig, s - 1, 1) == 0) + exp--, sig <<= 1; + + sig = (sig << 1) & make_mask64(0 ,s); + + if (exp != 0 && exp != UINT64_MAX) { + *round_abnormal = true; + if (rm == 1 || + (rm == 2 && !sign) || + (rm == 3 && sign)) + return ((sign << (s+e)) | make_mask64(s, e)) - 1; + else + return (sign << (s+e)) | make_mask64(s, e); + } + } + + int idx = sig >> (s-p); + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); + uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp; + if (out_exp == 0 || out_exp == UINT64_MAX) { + out_sig = (out_sig >> 1) | make_mask64(s - 1, 1); + if (out_exp == UINT64_MAX) { + out_sig >>= 1; + out_exp = 0; + } + } + + return (sign << (s+e)) | (out_exp << s) | out_sig; +} + +float16_t f16_recip7(float16_t in) +{ + union ui16_f16 uA; + + uA.f = in; + unsigned int ret = f16_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x8000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xfc00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7c00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF16UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + default: // +- normal + uA.ui = recip7(uA.ui, 5, 10, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} + +float32_t f32_recip7(float32_t in) +{ + union ui32_f32 uA; + + uA.f = in; + unsigned int ret = f32_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x80000000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xff800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7f800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF32UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + default: // +- normal + uA.ui = recip7(uA.ui, 8, 23, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} + +float64_t f64_recip7(float64_t in) +{ + union ui64_f64 uA; + + uA.f = in; + unsigned int ret = f64_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x8000000000000000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xfff0000000000000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7ff0000000000000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF64UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + default: // +- normal + uA.ui = recip7(uA.ui, 11, 52, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} diff --git a/softfloat/platform.h b/softfloat/platform.h index 03dd429faf..55de1941a7 100644 --- a/softfloat/platform.h +++ b/softfloat/platform.h @@ -36,11 +36,15 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ +#include "config.h" +#ifndef WORDS_BIGENDIAN #define LITTLEENDIAN 1 +#endif #define INLINE_LEVEL 5 #define SOFTFLOAT_FAST_INT64 #define SOFTFLOAT_FAST_DIV64TO32 +#define SOFTFLOAT_ROUND_ODD /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h index b277281ec2..bdac1be263 100644 --- a/softfloat/softfloat.h +++ b/softfloat/softfloat.h @@ -141,8 +141,12 @@ void i64_to_f128M( int64_t, float128_t * ); /*---------------------------------------------------------------------------- | 16-bit (half-precision) floating-point operations. *----------------------------------------------------------------------------*/ +uint_fast8_t f16_to_ui8( float16_t, uint_fast8_t, bool ); +uint_fast16_t f16_to_ui16( float16_t, uint_fast8_t, bool ); uint_fast32_t f16_to_ui32( float16_t, uint_fast8_t, bool ); uint_fast64_t f16_to_ui64( float16_t, uint_fast8_t, bool ); +int_fast8_t f16_to_i8( float16_t, uint_fast8_t, bool ); +int_fast16_t f16_to_i16( float16_t, uint_fast8_t, bool ); int_fast32_t f16_to_i32( float16_t, uint_fast8_t, bool ); int_fast64_t f16_to_i64( float16_t, uint_fast8_t, bool ); uint_fast32_t f16_to_ui32_r_minMag( float16_t, bool ); @@ -160,6 +164,8 @@ void f16_to_f128M( float16_t, float128_t * ); float16_t f16_roundToInt( float16_t, uint_fast8_t, bool ); float16_t f16_add( float16_t, float16_t ); float16_t f16_sub( float16_t, float16_t ); +float16_t f16_max( float16_t, float16_t ); +float16_t f16_min( float16_t, float16_t ); float16_t f16_mul( float16_t, float16_t ); float16_t f16_mulAdd( float16_t, float16_t, float16_t ); float16_t f16_div( float16_t, float16_t ); @@ -172,12 +178,17 @@ bool f16_eq_signaling( float16_t, float16_t ); bool f16_le_quiet( float16_t, float16_t ); bool f16_lt_quiet( float16_t, float16_t ); bool f16_isSignalingNaN( float16_t ); +uint_fast16_t f16_classify( float16_t ); +float16_t f16_rsqrte7( float16_t ); +float16_t f16_recip7( float16_t ); /*---------------------------------------------------------------------------- | 32-bit (single-precision) floating-point operations. *----------------------------------------------------------------------------*/ +uint_fast16_t f32_to_ui16( float32_t, uint_fast8_t, bool ); uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool ); uint_fast64_t f32_to_ui64( float32_t, uint_fast8_t, bool ); +int_fast16_t f32_to_i16( float32_t, uint_fast8_t, bool ); int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool ); int_fast64_t f32_to_i64( float32_t, uint_fast8_t, bool ); uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool ); @@ -195,6 +206,8 @@ void f32_to_f128M( float32_t, float128_t * ); float32_t f32_roundToInt( float32_t, uint_fast8_t, bool ); float32_t f32_add( float32_t, float32_t ); float32_t f32_sub( float32_t, float32_t ); +float32_t f32_max( float32_t, float32_t ); +float32_t f32_min( float32_t, float32_t ); float32_t f32_mul( float32_t, float32_t ); float32_t f32_mulAdd( float32_t, float32_t, float32_t ); float32_t f32_div( float32_t, float32_t ); @@ -208,6 +221,8 @@ bool f32_le_quiet( float32_t, float32_t ); bool f32_lt_quiet( float32_t, float32_t ); bool f32_isSignalingNaN( float32_t ); uint_fast16_t f32_classify( float32_t ); +float32_t f32_rsqrte7( float32_t ); +float32_t f32_recip7( float32_t ); /*---------------------------------------------------------------------------- | 64-bit (double-precision) floating-point operations. @@ -231,6 +246,8 @@ void f64_to_f128M( float64_t, float128_t * ); float64_t f64_roundToInt( float64_t, uint_fast8_t, bool ); float64_t f64_add( float64_t, float64_t ); float64_t f64_sub( float64_t, float64_t ); +float64_t f64_max( float64_t, float64_t ); +float64_t f64_min( float64_t, float64_t ); float64_t f64_mul( float64_t, float64_t ); float64_t f64_mulAdd( float64_t, float64_t, float64_t ); float64_t f64_div( float64_t, float64_t ); @@ -244,6 +261,8 @@ bool f64_le_quiet( float64_t, float64_t ); bool f64_lt_quiet( float64_t, float64_t ); bool f64_isSignalingNaN( float64_t ); uint_fast16_t f64_classify( float64_t ); +float64_t f64_rsqrte7( float64_t ); +float64_t f64_recip7( float64_t ); /*---------------------------------------------------------------------------- | Rounding precision for 80-bit extended double-precision floating-point. diff --git a/softfloat/softfloat.mk.in b/softfloat/softfloat.mk.in index ff7637b13e..07dca1618a 100644 --- a/softfloat/softfloat.mk.in +++ b/softfloat/softfloat.mk.in @@ -38,6 +38,7 @@ softfloat_c_srcs = \ f128_to_ui64.c \ f128_to_ui64_r_minMag.c \ f16_add.c \ + f16_classify.c \ f16_div.c \ f16_eq.c \ f16_eq_signaling.c \ @@ -55,10 +56,14 @@ softfloat_c_srcs = \ f16_to_f128.c \ f16_to_f32.c \ f16_to_f64.c \ + f16_to_i8.c \ + f16_to_i16.c \ f16_to_i32.c \ f16_to_i32_r_minMag.c \ f16_to_i64.c \ f16_to_i64_r_minMag.c \ + f16_to_ui8.c \ + f16_to_ui16.c \ f16_to_ui32.c \ f16_to_ui32_r_minMag.c \ f16_to_ui64.c \ @@ -82,10 +87,12 @@ softfloat_c_srcs = \ f32_to_f128.c \ f32_to_f16.c \ f32_to_f64.c \ + f32_to_i16.c \ f32_to_i32.c \ f32_to_i32_r_minMag.c \ f32_to_i64.c \ f32_to_i64_r_minMag.c \ + f32_to_ui16.c \ f32_to_ui32.c \ f32_to_ui32_r_minMag.c \ f32_to_ui64.c \ @@ -117,6 +124,8 @@ softfloat_c_srcs = \ f64_to_ui32_r_minMag.c \ f64_to_ui64.c \ f64_to_ui64_r_minMag.c \ + fall_maxmin.c \ + fall_reciprocal.c \ i32_to_f128.c \ i32_to_f16.c \ i32_to_f32.c \ @@ -225,6 +234,10 @@ softfloat_c_srcs = \ ui64_to_f32.c \ ui64_to_f64.c \ +softfloat_CFLAGS = -fPIC + +softfloat_install_shared_lib = yes + softfloat_test_srcs = softfloat_install_prog_srcs = diff --git a/softfloat/specialize.h b/softfloat/specialize.h index 629d5185b9..556476c1a5 100644 --- a/softfloat/specialize.h +++ b/softfloat/specialize.h @@ -55,6 +55,20 @@ extern "C" { | The values to return on conversions to 32-bit integer formats that raise an | invalid exception. *----------------------------------------------------------------------------*/ +#define ui8_fromPosOverflow 0xFF +#define ui8_fromNegOverflow 0 +#define ui8_fromNaN 0xFF +#define i8_fromPosOverflow 0x7F +#define i8_fromNegOverflow (-0x7F - 1) +#define i8_fromNaN 0x7F + +#define ui16_fromPosOverflow 0xFFFF +#define ui16_fromNegOverflow 0 +#define ui16_fromNaN 0xFFFF +#define i16_fromPosOverflow 0x7FFF +#define i16_fromNegOverflow (-0x7FFF - 1) +#define i16_fromNaN 0x7FFF + #define ui32_fromPosOverflow 0xFFFFFFFF #define ui32_fromNegOverflow 0 #define ui32_fromNaN 0xFFFFFFFF diff --git a/spike_main/spike-dasm.cc b/spike_dasm/spike-dasm.cc similarity index 70% rename from spike_main/spike-dasm.cc rename to spike_dasm/spike-dasm.cc index 1161825c2e..fa6a25ae6a 100644 --- a/spike_main/spike-dasm.cc +++ b/spike_dasm/spike-dasm.cc @@ -21,13 +21,32 @@ int main(int argc, char** argv) std::function extension; option_parser_t parser; +#ifdef HAVE_DLOPEN parser.option(0, "extension", 1, [&](const char* s){extension = find_extension(s);}); +#endif parser.option(0, "isa", 1, [&](const char* s){isa = s;}); parser.parse(argv); - processor_t p(isa, 0, 0); - if (extension) - p.register_extension(extension()); + std::string lowercase; + for (const char *p = isa; *p; p++) + lowercase += std::tolower(*p); + + int xlen; + if (lowercase.compare(0, 4, "rv32") == 0) { + xlen = 32; + } else if (lowercase.compare(0, 4, "rv64") == 0) { + xlen = 64; + } else { + fprintf(stderr, "bad ISA string: %s\n", isa); + return 1; + } + + disassembler_t* disassembler = new disassembler_t(xlen); + if (extension) { + for (auto disasm_insn : extension()->get_disasms()) { + disassembler->add_insn(disasm_insn); + } + } while (getline(cin, s)) { @@ -52,7 +71,7 @@ int main(int argc, char** argv) if (nbits < 64) bits = bits << (64 - nbits) >> (64 - nbits); - string dis = p.get_disassembler()->disassemble(bits); + string dis = disassembler->disassemble(bits); s = s.substr(0, start) + dis + s.substr(endp - &s[0] + 1); pos = start + dis.length(); } diff --git a/spike_dasm/spike_dasm.ac b/spike_dasm/spike_dasm.ac new file mode 100644 index 0000000000..e69de29bb2 diff --git a/spike_dasm/spike_dasm.mk.in b/spike_dasm/spike_dasm.mk.in new file mode 100644 index 0000000000..b6118fd5c6 --- /dev/null +++ b/spike_dasm/spike_dasm.mk.in @@ -0,0 +1,9 @@ +spike_dasm_subproject_deps = \ + disasm \ + $(if $(HAVE_DLOPEN),riscv,) \ + +spike_dasm_srcs = \ + spike_dasm_option_parser.cc \ + +spike_dasm_install_prog_srcs = \ + spike-dasm.cc \ diff --git a/spike_dasm/spike_dasm_option_parser.cc b/spike_dasm/spike_dasm_option_parser.cc new file mode 120000 index 0000000000..4244c15de1 --- /dev/null +++ b/spike_dasm/spike_dasm_option_parser.cc @@ -0,0 +1 @@ +../fesvr/option_parser.cc \ No newline at end of file diff --git a/spike_main/disasm.cc b/spike_main/disasm.cc deleted file mode 100644 index 81264dd802..0000000000 --- a/spike_main/disasm.cc +++ /dev/null @@ -1,648 +0,0 @@ -// See LICENSE for license details. - -#include "disasm.h" -#include -#include -#include -#include -#include - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.i_imm()) + '(' + xpr_name[insn.rs1()] + ')'; - } -} load_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.s_imm()) + '(' + xpr_name[insn.rs1()] + ')'; - } -} store_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::string("(") + xpr_name[insn.rs1()] + ')'; - } -} amo_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rd()]; - } -} xrd; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rs1()]; - } -} xrs1; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rs2()]; - } -} xrs2; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return fpr_name[insn.rd()]; - } -} frd; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return fpr_name[insn.rs1()]; - } -} frs1; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return fpr_name[insn.rs2()]; - } -} frs2; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return fpr_name[insn.rs3()]; - } -} frs3; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - switch (insn.csr()) - { - #define DECLARE_CSR(name, num) case num: return #name; - #include "encoding.h" - #undef DECLARE_CSR - default: - { - char buf[16]; - snprintf(buf, sizeof buf, "unknown_%03" PRIx64, insn.csr()); - return std::string(buf); - } - } - } -} csr; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.i_imm()); - } -} imm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.shamt()); - } -} shamt; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - std::stringstream s; - s << std::hex << "0x" << ((uint32_t)insn.u_imm() >> 12); - return s.str(); - } -} bigimm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string(insn.rs1()); - } -} zimm5; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - std::stringstream s; - int32_t target = insn.sb_imm(); - char sign = target >= 0 ? '+' : '-'; - s << "pc " << sign << ' ' << abs(target); - return s.str(); - } -} branch_target; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - std::stringstream s; - int32_t target = insn.uj_imm(); - char sign = target >= 0 ? '+' : '-'; - s << "pc " << sign << std::hex << " 0x" << abs(target); - return s.str(); - } -} jump_target; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rvc_rs1()]; - } -} rvc_rs1; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rvc_rs2()]; - } -} rvc_rs2; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return fpr_name[insn.rvc_rs2()]; - } -} rvc_fp_rs2; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rvc_rs1s()]; - } -} rvc_rs1s; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[insn.rvc_rs2s()]; - } -} rvc_rs2s; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return fpr_name[insn.rvc_rs2s()]; - } -} rvc_fp_rs2s; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return xpr_name[X_SP]; - } -} rvc_sp; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_imm()); - } -} rvc_imm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_addi4spn_imm()); - } -} rvc_addi4spn_imm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_addi16sp_imm()); - } -} rvc_addi16sp_imm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_lwsp_imm()); - } -} rvc_lwsp_imm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)(insn.rvc_imm() & 0x3f)); - } -} rvc_shamt; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - std::stringstream s; - s << std::hex << "0x" << ((uint32_t)insn.rvc_imm() << 12 >> 12); - return s.str(); - } -} rvc_uimm; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_lwsp_imm()) + '(' + xpr_name[X_SP] + ')'; - } -} rvc_lwsp_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_ldsp_imm()) + '(' + xpr_name[X_SP] + ')'; - } -} rvc_ldsp_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_swsp_imm()) + '(' + xpr_name[X_SP] + ')'; - } -} rvc_swsp_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_sdsp_imm()) + '(' + xpr_name[X_SP] + ')'; - } -} rvc_sdsp_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_lw_imm()) + '(' + xpr_name[insn.rvc_rs1s()] + ')'; - } -} rvc_lw_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - return std::to_string((int)insn.rvc_ld_imm()) + '(' + xpr_name[insn.rvc_rs1s()] + ')'; - } -} rvc_ld_address; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - std::stringstream s; - int32_t target = insn.rvc_b_imm(); - char sign = target >= 0 ? '+' : '-'; - s << "pc " << sign << ' ' << abs(target); - return s.str(); - } -} rvc_branch_target; - -struct : public arg_t { - std::string to_string(insn_t insn) const { - std::stringstream s; - int32_t target = insn.rvc_j_imm(); - char sign = target >= 0 ? '+' : '-'; - s << "pc " << sign << ' ' << abs(target); - return s.str(); - } -} rvc_jump_target; - -std::string disassembler_t::disassemble(insn_t insn) const -{ - const disasm_insn_t* disasm_insn = lookup(insn); - return disasm_insn ? disasm_insn->to_string(insn) : "unknown"; -} - -disassembler_t::disassembler_t(int xlen) -{ - const uint32_t mask_rd = 0x1fUL << 7; - const uint32_t match_rd_ra = 1UL << 7; - const uint32_t mask_rs1 = 0x1fUL << 15; - const uint32_t match_rs1_ra = 1UL << 15; - const uint32_t mask_rs2 = 0x1fUL << 20; - const uint32_t mask_imm = 0xfffUL << 20; - const uint32_t match_imm_1 = 1UL << 20; - const uint32_t mask_rvc_rs2 = 0x1fUL << 2; - const uint32_t mask_rvc_imm = mask_rvc_rs2 | 0x1000UL; - - #define DECLARE_INSN(code, match, mask) \ - const uint32_t match_##code = match; \ - const uint32_t mask_##code = mask; - #include "encoding.h" - #undef DECLARE_INSN - - // explicit per-instruction disassembly - #define DISASM_INSN(name, code, extra, ...) \ - add_insn(new disasm_insn_t(name, match_##code, mask_##code | (extra), __VA_ARGS__)); - #define DEFINE_NOARG(code) \ - add_insn(new disasm_insn_t(#code, match_##code, mask_##code, {})); - #define DEFINE_RTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &xrs2}) - #define DEFINE_ITYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &imm}) - #define DEFINE_ITYPE_SHIFT(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs1, &shamt}) - #define DEFINE_I0TYPE(name, code) DISASM_INSN(name, code, mask_rs1, {&xrd, &imm}) - #define DEFINE_I1TYPE(name, code) DISASM_INSN(name, code, mask_imm, {&xrd, &xrs1}) - #define DEFINE_I2TYPE(name, code) DISASM_INSN(name, code, mask_rd | mask_imm, {&xrs1}) - #define DEFINE_LTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &bigimm}) - #define DEFINE_BTYPE(code) DISASM_INSN(#code, code, 0, {&xrs1, &xrs2, &branch_target}) - #define DEFINE_B0TYPE(name, code) DISASM_INSN(name, code, mask_rs1 | mask_rs2, {&branch_target}) - #define DEFINE_B1TYPE(name, code) DISASM_INSN(name, code, mask_rs2, {&xrs1, &branch_target}) - #define DEFINE_XLOAD(code) DISASM_INSN(#code, code, 0, {&xrd, &load_address}) - #define DEFINE_XSTORE(code) DISASM_INSN(#code, code, 0, {&xrs2, &store_address}) - #define DEFINE_XAMO(code) DISASM_INSN(#code, code, 0, {&xrd, &xrs2, &amo_address}) - #define DEFINE_XAMO_LR(code) DISASM_INSN(#code, code, 0, {&xrd, &amo_address}) - #define DEFINE_FLOAD(code) DISASM_INSN(#code, code, 0, {&frd, &load_address}) - #define DEFINE_FSTORE(code) DISASM_INSN(#code, code, 0, {&frs2, &store_address}) - #define DEFINE_FRTYPE(code) DISASM_INSN(#code, code, 0, {&frd, &frs1, &frs2}) - #define DEFINE_FR1TYPE(code) DISASM_INSN(#code, code, 0, {&frd, &frs1}) - #define DEFINE_FR3TYPE(code) DISASM_INSN(#code, code, 0, {&frd, &frs1, &frs2, &frs3}) - #define DEFINE_FXTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &frs1}) - #define DEFINE_FX2TYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &frs1, &frs2}) - #define DEFINE_XFTYPE(code) DISASM_INSN(#code, code, 0, {&frd, &xrs1}) - #define DEFINE_SFENCE_TYPE(code) DISASM_INSN(#code, code, 0, {&xrs1, &xrs2}) - - DEFINE_XLOAD(lb) - DEFINE_XLOAD(lbu) - DEFINE_XLOAD(lh) - DEFINE_XLOAD(lhu) - DEFINE_XLOAD(lw) - DEFINE_XLOAD(lwu) - DEFINE_XLOAD(ld) - - DEFINE_XSTORE(sb) - DEFINE_XSTORE(sh) - DEFINE_XSTORE(sw) - DEFINE_XSTORE(sd) - - DEFINE_XAMO(amoadd_w) - DEFINE_XAMO(amoswap_w) - DEFINE_XAMO(amoand_w) - DEFINE_XAMO(amoor_w) - DEFINE_XAMO(amoxor_w) - DEFINE_XAMO(amomin_w) - DEFINE_XAMO(amomax_w) - DEFINE_XAMO(amominu_w) - DEFINE_XAMO(amomaxu_w) - DEFINE_XAMO(amoadd_d) - DEFINE_XAMO(amoswap_d) - DEFINE_XAMO(amoand_d) - DEFINE_XAMO(amoor_d) - DEFINE_XAMO(amoxor_d) - DEFINE_XAMO(amomin_d) - DEFINE_XAMO(amomax_d) - DEFINE_XAMO(amominu_d) - DEFINE_XAMO(amomaxu_d) - - DEFINE_XAMO_LR(lr_w) - DEFINE_XAMO(sc_w) - DEFINE_XAMO_LR(lr_d) - DEFINE_XAMO(sc_d) - - DEFINE_FLOAD(flw) - DEFINE_FLOAD(fld) - DEFINE_FLOAD(flq) - - DEFINE_FSTORE(fsw) - DEFINE_FSTORE(fsd) - DEFINE_FSTORE(fsq) - - add_insn(new disasm_insn_t("j", match_jal, mask_jal | mask_rd, {&jump_target})); - add_insn(new disasm_insn_t("jal", match_jal | match_rd_ra, mask_jal | mask_rd, {&jump_target})); - add_insn(new disasm_insn_t("jal", match_jal, mask_jal, {&xrd, &jump_target})); - - DEFINE_B1TYPE("beqz", beq); - DEFINE_B1TYPE("bnez", bne); - DEFINE_B1TYPE("bltz", blt); - DEFINE_B1TYPE("bgez", bge); - DEFINE_BTYPE(beq) - DEFINE_BTYPE(bne) - DEFINE_BTYPE(blt) - DEFINE_BTYPE(bge) - DEFINE_BTYPE(bltu) - DEFINE_BTYPE(bgeu) - - DEFINE_LTYPE(lui); - DEFINE_LTYPE(auipc); - - add_insn(new disasm_insn_t("ret", match_jalr | match_rs1_ra, mask_jalr | mask_rd | mask_rs1 | mask_imm, {})); - DEFINE_I2TYPE("jr", jalr); - add_insn(new disasm_insn_t("jalr", match_jalr | match_rd_ra, mask_jalr | mask_rd | mask_imm, {&xrs1})); - DEFINE_ITYPE(jalr); - - add_insn(new disasm_insn_t("nop", match_addi, mask_addi | mask_rd | mask_rs1 | mask_imm, {})); - add_insn(new disasm_insn_t(" - ", match_xor, mask_xor | mask_rd | mask_rs1 | mask_rs2, {})); // for machine-generated bubbles - DEFINE_I0TYPE("li", addi); - DEFINE_I1TYPE("mv", addi); - DEFINE_ITYPE(addi); - DEFINE_ITYPE(slti); - add_insn(new disasm_insn_t("seqz", match_sltiu | match_imm_1, mask_sltiu | mask_imm, {&xrd, &xrs1})); - DEFINE_ITYPE(sltiu); - add_insn(new disasm_insn_t("not", match_xori | mask_imm, mask_xori | mask_imm, {&xrd, &xrs1})); - DEFINE_ITYPE(xori); - - DEFINE_ITYPE_SHIFT(slli); - DEFINE_ITYPE_SHIFT(srli); - DEFINE_ITYPE_SHIFT(srai); - - DEFINE_ITYPE(ori); - DEFINE_ITYPE(andi); - DEFINE_I1TYPE("sext.w", addiw); - DEFINE_ITYPE(addiw); - - DEFINE_ITYPE_SHIFT(slliw); - DEFINE_ITYPE_SHIFT(srliw); - DEFINE_ITYPE_SHIFT(sraiw); - - DEFINE_RTYPE(add); - DEFINE_RTYPE(sub); - DEFINE_RTYPE(sll); - DEFINE_RTYPE(slt); - add_insn(new disasm_insn_t("snez", match_sltu, mask_sltu | mask_rs1, {&xrd, &xrs2})); - DEFINE_RTYPE(sltu); - DEFINE_RTYPE(xor); - DEFINE_RTYPE(srl); - DEFINE_RTYPE(sra); - DEFINE_RTYPE(or); - DEFINE_RTYPE(and); - DEFINE_RTYPE(mul); - DEFINE_RTYPE(mulh); - DEFINE_RTYPE(mulhu); - DEFINE_RTYPE(mulhsu); - DEFINE_RTYPE(div); - DEFINE_RTYPE(divu); - DEFINE_RTYPE(rem); - DEFINE_RTYPE(remu); - DEFINE_RTYPE(addw); - DEFINE_RTYPE(subw); - DEFINE_RTYPE(sllw); - DEFINE_RTYPE(srlw); - DEFINE_RTYPE(sraw); - DEFINE_RTYPE(mulw); - DEFINE_RTYPE(divw); - DEFINE_RTYPE(divuw); - DEFINE_RTYPE(remw); - DEFINE_RTYPE(remuw); - - DEFINE_NOARG(ecall); - DEFINE_NOARG(ebreak); - DEFINE_NOARG(uret); - DEFINE_NOARG(sret); - DEFINE_NOARG(mret); - DEFINE_NOARG(dret); - DEFINE_NOARG(wfi); - DEFINE_NOARG(fence); - DEFINE_NOARG(fence_i); - DEFINE_SFENCE_TYPE(sfence_vma); - - add_insn(new disasm_insn_t("csrr", match_csrrs, mask_csrrs | mask_rs1, {&xrd, &csr})); - add_insn(new disasm_insn_t("csrw", match_csrrw, mask_csrrw | mask_rd, {&csr, &xrs1})); - add_insn(new disasm_insn_t("csrs", match_csrrs, mask_csrrs | mask_rd, {&csr, &xrs1})); - add_insn(new disasm_insn_t("csrc", match_csrrc, mask_csrrc | mask_rd, {&csr, &xrs1})); - add_insn(new disasm_insn_t("csrwi", match_csrrwi, mask_csrrwi | mask_rd, {&csr, &zimm5})); - add_insn(new disasm_insn_t("csrsi", match_csrrsi, mask_csrrsi | mask_rd, {&csr, &zimm5})); - add_insn(new disasm_insn_t("csrci", match_csrrci, mask_csrrci | mask_rd, {&csr, &zimm5})); - add_insn(new disasm_insn_t("csrrw", match_csrrw, mask_csrrw, {&xrd, &csr, &xrs1})); - add_insn(new disasm_insn_t("csrrs", match_csrrs, mask_csrrs, {&xrd, &csr, &xrs1})); - add_insn(new disasm_insn_t("csrrc", match_csrrc, mask_csrrc, {&xrd, &csr, &xrs1})); - add_insn(new disasm_insn_t("csrrwi", match_csrrwi, mask_csrrwi, {&xrd, &csr, &zimm5})); - add_insn(new disasm_insn_t("csrrsi", match_csrrsi, mask_csrrsi, {&xrd, &csr, &zimm5})); - add_insn(new disasm_insn_t("csrrci", match_csrrci, mask_csrrci, {&xrd, &csr, &zimm5})); - - DEFINE_FRTYPE(fadd_s); - DEFINE_FRTYPE(fsub_s); - DEFINE_FRTYPE(fmul_s); - DEFINE_FRTYPE(fdiv_s); - DEFINE_FR1TYPE(fsqrt_s); - DEFINE_FRTYPE(fmin_s); - DEFINE_FRTYPE(fmax_s); - DEFINE_FR3TYPE(fmadd_s); - DEFINE_FR3TYPE(fmsub_s); - DEFINE_FR3TYPE(fnmadd_s); - DEFINE_FR3TYPE(fnmsub_s); - DEFINE_FRTYPE(fsgnj_s); - DEFINE_FRTYPE(fsgnjn_s); - DEFINE_FRTYPE(fsgnjx_s); - DEFINE_FR1TYPE(fcvt_s_d); - DEFINE_FR1TYPE(fcvt_s_q); - DEFINE_XFTYPE(fcvt_s_l); - DEFINE_XFTYPE(fcvt_s_lu); - DEFINE_XFTYPE(fcvt_s_w); - DEFINE_XFTYPE(fcvt_s_wu); - DEFINE_XFTYPE(fcvt_s_wu); - DEFINE_XFTYPE(fmv_w_x); - DEFINE_FXTYPE(fcvt_l_s); - DEFINE_FXTYPE(fcvt_lu_s); - DEFINE_FXTYPE(fcvt_w_s); - DEFINE_FXTYPE(fcvt_wu_s); - DEFINE_FXTYPE(fclass_s); - DEFINE_FXTYPE(fmv_x_w); - DEFINE_FX2TYPE(feq_s); - DEFINE_FX2TYPE(flt_s); - DEFINE_FX2TYPE(fle_s); - - DEFINE_FRTYPE(fadd_d); - DEFINE_FRTYPE(fsub_d); - DEFINE_FRTYPE(fmul_d); - DEFINE_FRTYPE(fdiv_d); - DEFINE_FR1TYPE(fsqrt_d); - DEFINE_FRTYPE(fmin_d); - DEFINE_FRTYPE(fmax_d); - DEFINE_FR3TYPE(fmadd_d); - DEFINE_FR3TYPE(fmsub_d); - DEFINE_FR3TYPE(fnmadd_d); - DEFINE_FR3TYPE(fnmsub_d); - DEFINE_FRTYPE(fsgnj_d); - DEFINE_FRTYPE(fsgnjn_d); - DEFINE_FRTYPE(fsgnjx_d); - DEFINE_FR1TYPE(fcvt_d_s); - DEFINE_FR1TYPE(fcvt_d_q); - DEFINE_XFTYPE(fcvt_d_l); - DEFINE_XFTYPE(fcvt_d_lu); - DEFINE_XFTYPE(fcvt_d_w); - DEFINE_XFTYPE(fcvt_d_wu); - DEFINE_XFTYPE(fcvt_d_wu); - DEFINE_XFTYPE(fmv_d_x); - DEFINE_FXTYPE(fcvt_l_d); - DEFINE_FXTYPE(fcvt_lu_d); - DEFINE_FXTYPE(fcvt_w_d); - DEFINE_FXTYPE(fcvt_wu_d); - DEFINE_FXTYPE(fclass_d); - DEFINE_FXTYPE(fmv_x_d); - DEFINE_FX2TYPE(feq_d); - DEFINE_FX2TYPE(flt_d); - DEFINE_FX2TYPE(fle_d); - - DEFINE_FRTYPE(fadd_q); - DEFINE_FRTYPE(fsub_q); - DEFINE_FRTYPE(fmul_q); - DEFINE_FRTYPE(fdiv_q); - DEFINE_FR1TYPE(fsqrt_q); - DEFINE_FRTYPE(fmin_q); - DEFINE_FRTYPE(fmax_q); - DEFINE_FR3TYPE(fmadd_q); - DEFINE_FR3TYPE(fmsub_q); - DEFINE_FR3TYPE(fnmadd_q); - DEFINE_FR3TYPE(fnmsub_q); - DEFINE_FRTYPE(fsgnj_q); - DEFINE_FRTYPE(fsgnjn_q); - DEFINE_FRTYPE(fsgnjx_q); - DEFINE_FR1TYPE(fcvt_q_s); - DEFINE_FR1TYPE(fcvt_q_d); - DEFINE_XFTYPE(fcvt_q_l); - DEFINE_XFTYPE(fcvt_q_lu); - DEFINE_XFTYPE(fcvt_q_w); - DEFINE_XFTYPE(fcvt_q_wu); - DEFINE_XFTYPE(fcvt_q_wu); - DEFINE_XFTYPE(fmv_q_x); - DEFINE_FXTYPE(fcvt_l_q); - DEFINE_FXTYPE(fcvt_lu_q); - DEFINE_FXTYPE(fcvt_w_q); - DEFINE_FXTYPE(fcvt_wu_q); - DEFINE_FXTYPE(fclass_q); - DEFINE_FXTYPE(fmv_x_q); - DEFINE_FX2TYPE(feq_q); - DEFINE_FX2TYPE(flt_q); - DEFINE_FX2TYPE(fle_q); - - DISASM_INSN("c.ebreak", c_add, mask_rd | mask_rvc_rs2, {}); - add_insn(new disasm_insn_t("ret", match_c_jr | match_rd_ra, mask_c_jr | mask_rd | mask_rvc_imm, {})); - DISASM_INSN("c.jr", c_jr, mask_rvc_imm, {&rvc_rs1}); - DISASM_INSN("c.jalr", c_jalr, mask_rvc_imm, {&rvc_rs1}); - DISASM_INSN("c.nop", c_addi, mask_rd | mask_rvc_imm, {}); - DISASM_INSN("c.addi16sp", c_addi16sp, mask_rd, {&rvc_sp, &rvc_addi16sp_imm}); - DISASM_INSN("c.addi4spn", c_addi4spn, 0, {&rvc_rs2s, &rvc_sp, &rvc_addi4spn_imm}); - DISASM_INSN("c.li", c_li, 0, {&xrd, &rvc_imm}); - DISASM_INSN("c.lui", c_lui, 0, {&xrd, &rvc_uimm}); - DISASM_INSN("c.addi", c_addi, 0, {&xrd, &rvc_imm}); - DISASM_INSN("c.slli", c_slli, 0, {&rvc_rs1, &rvc_shamt}); - DISASM_INSN("c.srli", c_srli, 0, {&rvc_rs1s, &rvc_shamt}); - DISASM_INSN("c.srai", c_srai, 0, {&rvc_rs1s, &rvc_shamt}); - DISASM_INSN("c.andi", c_andi, 0, {&rvc_rs1s, &rvc_imm}); - DISASM_INSN("c.mv", c_mv, 0, {&xrd, &rvc_rs2}); - DISASM_INSN("c.add", c_add, 0, {&xrd, &rvc_rs2}); - DISASM_INSN("c.addw", c_addw, 0, {&rvc_rs1s, &rvc_rs2s}); - DISASM_INSN("c.sub", c_sub, 0, {&rvc_rs1s, &rvc_rs2s}); - DISASM_INSN("c.subw", c_subw, 0, {&rvc_rs1s, &rvc_rs2s}); - DISASM_INSN("c.and", c_and, 0, {&rvc_rs1s, &rvc_rs2s}); - DISASM_INSN("c.or", c_or, 0, {&rvc_rs1s, &rvc_rs2s}); - DISASM_INSN("c.xor", c_xor, 0, {&rvc_rs1s, &rvc_rs2s}); - DISASM_INSN("c.lwsp", c_lwsp, 0, {&xrd, &rvc_lwsp_address}); - DISASM_INSN("c.fld", c_fld, 0, {&rvc_fp_rs2s, &rvc_ld_address}); - DISASM_INSN("c.swsp", c_swsp, 0, {&rvc_rs2, &rvc_swsp_address}); - DISASM_INSN("c.lw", c_lw, 0, {&rvc_rs2s, &rvc_lw_address}); - DISASM_INSN("c.sw", c_sw, 0, {&rvc_rs2s, &rvc_lw_address}); - DISASM_INSN("c.beqz", c_beqz, 0, {&rvc_rs1s, &rvc_branch_target}); - DISASM_INSN("c.bnez", c_bnez, 0, {&rvc_rs1s, &rvc_branch_target}); - DISASM_INSN("c.j", c_j, 0, {&rvc_jump_target}); - DISASM_INSN("c.fldsp", c_fldsp, 0, {&rvc_fp_rs2s, &rvc_ldsp_address}); - DISASM_INSN("c.fsd", c_fsd, 0, {&rvc_fp_rs2s, &rvc_ld_address}); - DISASM_INSN("c.fsdsp", c_fsdsp, 0, {&rvc_fp_rs2s, &rvc_sdsp_address}); - - if (xlen == 32) { - DISASM_INSN("c.flw", c_flw, 0, {&rvc_fp_rs2s, &rvc_lw_address}); - DISASM_INSN("c.flwsp", c_flwsp, 0, {&frd, &rvc_lwsp_address}); - DISASM_INSN("c.fsw", c_fsw, 0, {&rvc_fp_rs2s, &rvc_lw_address}); - DISASM_INSN("c.fswsp", c_fswsp, 0, {&rvc_fp_rs2, &rvc_swsp_address}); - DISASM_INSN("c.jal", c_jal, 0, {&rvc_jump_target}); - } else { - DISASM_INSN("c.ld", c_ld, 0, {&rvc_rs2s, &rvc_ld_address}); - DISASM_INSN("c.ldsp", c_ldsp, 0, {&xrd, &rvc_ldsp_address}); - DISASM_INSN("c.sd", c_sd, 0, {&rvc_rs2s, &rvc_ld_address}); - DISASM_INSN("c.sdsp", c_sdsp, 0, {&rvc_rs2, &rvc_sdsp_address}); - DISASM_INSN("c.addiw", c_addiw, 0, {&xrd, &rvc_imm}); - } - - // provide a default disassembly for all instructions as a fallback - #define DECLARE_INSN(code, match, mask) \ - add_insn(new disasm_insn_t(#code " (args unknown)", match, mask, {})); - #include "encoding.h" - #undef DECLARE_INSN -} - -const disasm_insn_t* disassembler_t::lookup(insn_t insn) const -{ - size_t idx = insn.bits() % HASH_SIZE; - for (size_t j = 0; j < chain[idx].size(); j++) - if(*chain[idx][j] == insn) - return chain[idx][j]; - - idx = HASH_SIZE; - for (size_t j = 0; j < chain[idx].size(); j++) - if(*chain[idx][j] == insn) - return chain[idx][j]; - - return NULL; -} - -void disassembler_t::add_insn(disasm_insn_t* insn) -{ - size_t idx = HASH_SIZE; - if (insn->get_mask() % HASH_SIZE == HASH_SIZE - 1) - idx = insn->get_match() % HASH_SIZE; - chain[idx].push_back(insn); -} - -disassembler_t::~disassembler_t() -{ - for (size_t i = 0; i < HASH_SIZE+1; i++) - for (size_t j = 0; j < chain[i].size(); j++) - delete chain[i][j]; -} diff --git a/spike_main/spike-log-parser.cc b/spike_main/spike-log-parser.cc new file mode 100644 index 0000000000..d174afc6ba --- /dev/null +++ b/spike_main/spike-log-parser.cc @@ -0,0 +1,60 @@ +// See LICENSE for license details. + +// This little program finds occurrences of strings like +// core 0: 0x000000008000c36c (0xfe843783) ld a5, -24(s0) +// in its inputs, then output the RISC-V instruction with the disassembly +// enclosed hexadecimal number. + +#include +#include +#include +#include +#include "fesvr/option_parser.h" + +#include "disasm.h" +#include "extension.h" + +using namespace std; + +int main(int argc, char** argv) +{ + string s; + const char* isa = DEFAULT_ISA; + + std::function extension; + option_parser_t parser; + parser.option(0, "extension", 1, [&](const char* s){extension = find_extension(s);}); + parser.option(0, "isa", 1, [&](const char* s){isa = s;}); + parser.parse(argv); + + processor_t p(isa, DEFAULT_PRIV, DEFAULT_VARCH, 0, 0, false, nullptr); + if (extension) { + p.register_extension(extension()); + } + + std::regex reg("^core\\s+\\d+:\\s+0x[0-9a-f]+\\s+\\(0x([0-9a-f]+)\\)", std::regex_constants::icase); + std::smatch m; + std::ssub_match sm ; + + while (getline(cin,s)){ + if (regex_search(s, m, reg)){ + // the opcode string + string op = m[1].str(); + uint32_t bit_num = op.size() * 4; + uint64_t opcode = strtoull(op.c_str(), nullptr, 16); + + if (bit_num<64){ + opcode = opcode << (64-bit_num) >> (64-bit_num); + } + + const disasm_insn_t* disasm = p.get_disassembler()->lookup(opcode); + if (disasm) { + cout << disasm->get_name() << '\n'; + } else { + cout << "unknown_op\n"; + } + } + } + + return 0; +} diff --git a/spike_main/spike.cc b/spike_main/spike.cc index 3e5c7e6c60..89bf915103 100644 --- a/spike_main/spike.cc +++ b/spike_main/spike.cc @@ -12,9 +12,12 @@ #include #include #include +#include +#include "../VERSION" -static void help() +static void help(int exit_code = 1) { + fprintf(stderr, "Spike RISC-V ISA Simulator " SPIKE_VERSION "\n\n"); fprintf(stderr, "usage: spike [host options] [target options]\n"); fprintf(stderr, "Host Options:\n"); fprintf(stderr, " -p Simulate processors [default 1]\n"); @@ -24,27 +27,113 @@ static void help() fprintf(stderr, " -d Interactive debug mode\n"); fprintf(stderr, " -g Track histogram of PCs\n"); fprintf(stderr, " -l Generate a log of execution\n"); - fprintf(stderr, " -h Print this help message\n"); + fprintf(stderr, " -h, --help Print this help message\n"); fprintf(stderr, " -H Start halted, allowing a debugger to connect\n"); fprintf(stderr, " --isa= RISC-V ISA string [default %s]\n", DEFAULT_ISA); + fprintf(stderr, " --priv= RISC-V privilege modes supported [default %s]\n", DEFAULT_PRIV); + fprintf(stderr, " --varch= RISC-V Vector uArch string [default %s]\n", DEFAULT_VARCH); fprintf(stderr, " --pc=
Override ELF entry point\n"); fprintf(stderr, " --hartids= Explicitly specify hartids, default is 0,1,...\n"); fprintf(stderr, " --ic=:: Instantiate a cache model with S sets,\n"); fprintf(stderr, " --dc=:: W ways, and B-byte blocks (with S and\n"); fprintf(stderr, " --l2=:: B both powers of 2).\n"); + fprintf(stderr, " --device= Attach MMIO plugin device from an --extlib library\n"); + fprintf(stderr, " P -- Name of the MMIO plugin\n"); + fprintf(stderr, " B -- Base memory address of the device\n"); + fprintf(stderr, " A -- String arguments to pass to the plugin\n"); + fprintf(stderr, " This flag can be used multiple times.\n"); + fprintf(stderr, " The extlib flag for the library must come first.\n"); fprintf(stderr, " --log-cache-miss Generate a log of cache miss\n"); fprintf(stderr, " --extension= Specify RoCC Extension\n"); fprintf(stderr, " --extlib= Shared library to load\n"); + fprintf(stderr, " This flag can be used multiple times.\n"); fprintf(stderr, " --rbb-port= Listen on for remote bitbang connection\n"); fprintf(stderr, " --dump-dts Print device tree string and exit\n"); fprintf(stderr, " --disable-dtb Don't write the device tree blob into memory\n"); - fprintf(stderr, " --progsize= Progsize for the debug module [default 2]\n"); - fprintf(stderr, " --debug-sba= Debug bus master supports up to " + fprintf(stderr, " --kernel= Load kernel flat image into memory\n"); + fprintf(stderr, " --initrd= Load kernel initrd into memory\n"); + fprintf(stderr, " --bootargs= Provide custom bootargs for kernel [default: console=hvc0 earlycon=sbi]\n"); + fprintf(stderr, " --real-time-clint Increment clint time at real-time rate\n"); + fprintf(stderr, " --dm-progsize= Progsize for the debug module [default 2]\n"); + fprintf(stderr, " --dm-sba= Debug bus master supports up to " " wide accesses [default 0]\n"); - fprintf(stderr, " --debug-auth Debug module requires debugger to authenticate\n"); + fprintf(stderr, " --dm-auth Debug module requires debugger to authenticate\n"); + fprintf(stderr, " --dmi-rti= Number of Run-Test/Idle cycles " + "required for a DMI access [default 0]\n"); + fprintf(stderr, " --dm-abstract-rti= Number of Run-Test/Idle cycles " + "required for an abstract command to execute [default 0]\n"); + fprintf(stderr, " --dm-no-hasel Debug module supports hasel\n"); + fprintf(stderr, " --dm-no-abstract-csr Debug module won't support abstract to authenticate\n"); + fprintf(stderr, " --dm-no-halt-groups Debug module won't support halt groups\n"); + fprintf(stderr, " --dm-no-impebreak Debug module won't support implicit ebreak in program buffer\n"); + + exit(exit_code); +} + +static void suggest_help() +{ + fprintf(stderr, "Try 'spike --help' for more information.\n"); exit(1); } +static bool check_file_exists(const char *fileName) +{ + std::ifstream infile(fileName); + return infile.good(); +} + +static std::ifstream::pos_type get_file_size(const char *filename) +{ + std::ifstream in(filename, std::ios::ate | std::ios::binary); + return in.tellg(); +} + +static void read_file_bytes(const char *filename,size_t fileoff, + char *read_buf, size_t read_sz) +{ + std::ifstream in(filename, std::ios::in | std::ios::binary); + in.seekg(fileoff, std::ios::beg); + in.read(read_buf, read_sz); +} + +bool sort_mem_region(const std::pair &a, + const std::pair &b) +{ + if (a.first == b.first) + return (a.second->size() < b.second->size()); + else + return (a.first < b.first); +} + +void merge_overlapping_memory_regions(std::vector>& mems) +{ + // check the user specified memory regions and merge the overlapping or + // eliminate the containing parts + std::sort(mems.begin(), mems.end(), sort_mem_region); + reg_t start_page = 0, end_page = 0; + std::vector>::reverse_iterator it = mems.rbegin(); + std::vector>::reverse_iterator _it = mems.rbegin(); + for(; it != mems.rend(); ++it) { + reg_t _start_page = it->first/PGSIZE; + reg_t _end_page = _start_page + it->second->size()/PGSIZE; + if (_start_page >= start_page && _end_page <= end_page) { + // contains + mems.erase(std::next(it).base()); + }else if ( _start_page < start_page && _end_page > start_page) { + // overlapping + _it->first = _start_page; + if (_end_page > end_page) + end_page = _end_page; + mems.erase(std::next(it).base()); + }else { + _it = it; + start_page = _start_page; + end_page = _end_page; + assert(start_page < end_page); + } + } +} + static std::vector> make_mems(const char* arg) { // handle legacy mem argument @@ -64,8 +153,23 @@ static std::vector> make_mems(const char* arg) if (!*p || *p != ':') help(); auto size = strtoull(p + 1, &p, 0); - if ((size | base) % PGSIZE != 0) + + // page-align base and size + auto base0 = base, size0 = size; + size += base0 % PGSIZE; + base -= base0 % PGSIZE; + if (size % PGSIZE != 0) + size += PGSIZE - size % PGSIZE; + + if (base + size < base) help(); + + if (size != size0) { + fprintf(stderr, "Warning: the memory at [0x%llX, 0x%llX] has been realigned\n" + "to the %ld KiB page size: [0x%llX, 0x%llX]\n", + base0, base0 + size0 - 1, PGSIZE / 1024, base, base + size - 1); + } + res.push_back(std::make_pair(reg_t(base), new mem_t(size))); if (!*p) break; @@ -73,6 +177,8 @@ static std::vector> make_mems(const char* arg) help(); arg = p + 1; } + + merge_overlapping_memory_regions(res); return res; } @@ -84,20 +190,41 @@ int main(int argc, char** argv) bool log = false; bool dump_dts = false; bool dtb_enabled = true; + bool real_time_clint = false; size_t nprocs = 1; + const char* kernel = NULL; + reg_t kernel_offset, kernel_size; + size_t initrd_size; + reg_t initrd_start = 0, initrd_end = 0; + const char* bootargs = NULL; reg_t start_pc = reg_t(-1); std::vector> mems; + std::vector> plugin_devices; std::unique_ptr ic; std::unique_ptr dc; std::unique_ptr l2; bool log_cache = false; + bool log_commits = false; + const char *log_path = nullptr; std::function extension; + const char* initrd = NULL; const char* isa = DEFAULT_ISA; + const char* priv = DEFAULT_PRIV; + const char* varch = DEFAULT_VARCH; + const char* dtb_file = NULL; uint16_t rbb_port = 0; bool use_rbb = false; - unsigned progsize = 2; - unsigned max_bus_master_bits = 0; - bool require_authentication = false; + unsigned dmi_rti = 0; + debug_module_config_t dm_config = { + .progbufsize = 2, + .max_bus_master_bits = 0, + .require_authentication = false, + .abstract_rti = 0, + .support_hasel = true, + .support_abstract_csr_access = true, + .support_haltgroups = true, + .support_impebreak = true + }; std::vector hartids; auto const hartids_parser = [&](const char *s) { @@ -112,9 +239,52 @@ int main(int argc, char** argv) } }; + auto const device_parser = [&plugin_devices](const char *s) { + const std::string str(s); + std::istringstream stream(str); + + // We are parsing a string like name,base,args. + + // Parse the name, which is simply all of the characters leading up to the + // first comma. The validity of the plugin name will be checked later. + std::string name; + std::getline(stream, name, ','); + if (name.empty()) { + throw std::runtime_error("Plugin name is empty."); + } + + // Parse the base address. First, get all of the characters up to the next + // comma (or up to the end of the string if there is no comma). Then try to + // parse that string as an integer according to the rules of strtoull. It + // could be in decimal, hex, or octal. Fail if we were able to parse a + // number but there were garbage characters after the valid number. We must + // consume the entire string between the commas. + std::string base_str; + std::getline(stream, base_str, ','); + if (base_str.empty()) { + throw std::runtime_error("Device base address is empty."); + } + char* end; + reg_t base = static_cast(strtoull(base_str.c_str(), &end, 0)); + if (end != &*base_str.cend()) { + throw std::runtime_error("Error parsing device base address."); + } + + // The remainder of the string is the arguments. We could use getline, but + // that could ignore newline characters in the arguments. That should be + // rare and discouraged, but handle it here anyway with this weird in_avail + // technique. The arguments are optional, so if there were no arguments + // specified we could end up with an empty string here. That's okay. + auto avail = stream.rdbuf()->in_avail(); + std::string args(avail, '\0'); + stream.readsome(&args[0], avail); + + plugin_devices.emplace_back(base, new mmio_plugin_device_t(name, args)); + }; + option_parser_t parser; - parser.help(&help); - parser.option('h', 0, 0, [&](const char* s){help();}); + parser.help(&suggest_help); + parser.option('h', "help", 0, [&](const char* s){help(0);}); parser.option('d', 0, 0, [&](const char* s){debug = true;}); parser.option('g', 0, 0, [&](const char* s){histogram = true;}); parser.option('l', 0, 0, [&](const char* s){log = true;}); @@ -130,9 +300,17 @@ int main(int argc, char** argv) parser.option(0, "l2", 1, [&](const char* s){l2.reset(cache_sim_t::construct(s, "L2$"));}); parser.option(0, "log-cache-miss", 0, [&](const char* s){log_cache = true;}); parser.option(0, "isa", 1, [&](const char* s){isa = s;}); + parser.option(0, "priv", 1, [&](const char* s){priv = s;}); + parser.option(0, "varch", 1, [&](const char* s){varch = s;}); + parser.option(0, "device", 1, device_parser); parser.option(0, "extension", 1, [&](const char* s){extension = find_extension(s);}); parser.option(0, "dump-dts", 0, [&](const char *s){dump_dts = true;}); parser.option(0, "disable-dtb", 0, [&](const char *s){dtb_enabled = false;}); + parser.option(0, "dtb", 1, [&](const char *s){dtb_file = s;}); + parser.option(0, "kernel", 1, [&](const char* s){kernel = s;}); + parser.option(0, "initrd", 1, [&](const char* s){initrd = s;}); + parser.option(0, "bootargs", 1, [&](const char* s){bootargs = s;}); + parser.option(0, "real-time-clint", 0, [&](const char *s){real_time_clint = true;}); parser.option(0, "extlib", 1, [&](const char *s){ void *lib = dlopen(s, RTLD_NOW | RTLD_GLOBAL); if (lib == NULL) { @@ -140,11 +318,28 @@ int main(int argc, char** argv) exit(-1); } }); - parser.option(0, "progsize", 1, [&](const char* s){progsize = atoi(s);}); - parser.option(0, "debug-sba", 1, - [&](const char* s){max_bus_master_bits = atoi(s);}); - parser.option(0, "debug-auth", 0, - [&](const char* s){require_authentication = true;}); + parser.option(0, "dm-progsize", 1, + [&](const char* s){dm_config.progbufsize = atoi(s);}); + parser.option(0, "dm-no-impebreak", 0, + [&](const char* s){dm_config.support_impebreak = false;}); + parser.option(0, "dm-sba", 1, + [&](const char* s){dm_config.max_bus_master_bits = atoi(s);}); + parser.option(0, "dm-auth", 0, + [&](const char* s){dm_config.require_authentication = true;}); + parser.option(0, "dmi-rti", 1, + [&](const char* s){dmi_rti = atoi(s);}); + parser.option(0, "dm-abstract-rti", 1, + [&](const char* s){dm_config.abstract_rti = atoi(s);}); + parser.option(0, "dm-no-hasel", 0, + [&](const char* s){dm_config.support_hasel = false;}); + parser.option(0, "dm-no-abstract-csr", 0, + [&](const char* s){dm_config.support_abstract_csr_access = false;}); + parser.option(0, "dm-no-halt-groups", 0, + [&](const char* s){dm_config.support_haltgroups = false;}); + parser.option(0, "log-commits", 0, + [&](const char* s){log_commits = true;}); + parser.option(0, "log", 1, + [&](const char* s){log_path = s;}); auto argv1 = parser.parse(argv); std::vector htif_args(argv1, (const char*const*)argv + argc); @@ -154,15 +349,42 @@ int main(int argc, char** argv) if (!*argv1) help(); - sim_t s(isa, nprocs, halted, start_pc, mems, htif_args, std::move(hartids), - progsize, max_bus_master_bits, require_authentication); + if (kernel && check_file_exists(kernel)) { + kernel_size = get_file_size(kernel); + if (isa[2] == '6' && isa[3] == '4') + kernel_offset = 0x200000; + else + kernel_offset = 0x400000; + for (auto& m : mems) { + if (kernel_size && (kernel_offset + kernel_size) < m.second->size()) { + read_file_bytes(kernel, 0, m.second->contents() + kernel_offset, kernel_size); + break; + } + } + } + + if (initrd && check_file_exists(initrd)) { + initrd_size = get_file_size(initrd); + for (auto& m : mems) { + if (initrd_size && (initrd_size + 0x1000) < m.second->size()) { + initrd_end = m.first + m.second->size() - 0x1000; + initrd_start = initrd_end - initrd_size; + read_file_bytes(initrd, 0, m.second->contents() + (initrd_start - m.first), initrd_size); + break; + } + } + } + + sim_t s(isa, priv, varch, nprocs, halted, real_time_clint, + initrd_start, initrd_end, bootargs, start_pc, mems, plugin_devices, htif_args, + std::move(hartids), dm_config, log_path, dtb_enabled, dtb_file); std::unique_ptr remote_bitbang((remote_bitbang_t *) NULL); - std::unique_ptr jtag_dtm(new jtag_dtm_t(&s.debug_module)); + std::unique_ptr jtag_dtm( + new jtag_dtm_t(&s.debug_module, dmi_rti)); if (use_rbb) { remote_bitbang.reset(new remote_bitbang_t(rbb_port, &(*jtag_dtm))); s.set_remote_bitbang(&(*remote_bitbang)); } - s.set_dtb_enabled(dtb_enabled); if (dump_dts) { printf("%s", s.get_dts()); @@ -181,7 +403,16 @@ int main(int argc, char** argv) } s.set_debug(debug); - s.set_log(log); + s.configure_log(log, log_commits); s.set_histogram(histogram); - return s.run(); + + auto return_code = s.run(); + + for (auto& mem : mems) + delete mem.second; + + for (auto& plugin_device : plugin_devices) + delete plugin_device.second; + + return return_code; } diff --git a/spike_main/spike_main.mk.in b/spike_main/spike_main.mk.in index 500446fa6c..35bef398c4 100644 --- a/spike_main/spike_main.mk.in +++ b/spike_main/spike_main.mk.in @@ -1,10 +1,13 @@ spike_main_subproject_deps = \ + fdt \ + fesvr \ softfloat \ + disasm \ riscv \ spike_main_install_prog_srcs = \ spike.cc \ - spike-dasm.cc \ + spike-log-parser.cc \ xspike.cc \ termios-xspike.cc \