From 588578e447085fef89957b33e38c9aa308c50aa4 Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Fri, 23 Dec 2022 13:51:21 +0500 Subject: [PATCH] [riscv-tests] Updated tests for single lane --- .../isa/rv64uv/1_lane_tests/Makefrag | 40 + .../isa/rv64uv/1_lane_tests/vaadd.c | 59 ++ .../isa/rv64uv/1_lane_tests/vaaddu.c | 59 ++ .../isa/rv64uv/1_lane_tests/vadc.c | 103 +++ .../isa/rv64uv/1_lane_tests/vadd.c | 203 +++++ .../isa/rv64uv/1_lane_tests/vand.c | 309 +++++++ .../isa/rv64uv/1_lane_tests/vasub.c | 54 ++ .../isa/rv64uv/1_lane_tests/vasubu.c | 54 ++ .../isa/rv64uv/1_lane_tests/vcompress.c | 26 + .../isa/rv64uv/1_lane_tests/vcpop.c | 47 + .../isa/rv64uv/1_lane_tests/vdiv.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vdivu.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vfadd.c | 449 ++++++++++ .../isa/rv64uv/1_lane_tests/vfclass.c | 90 ++ .../isa/rv64uv/1_lane_tests/vfcvt.c | 834 ++++++++++++++++++ .../isa/rv64uv/1_lane_tests/vfdiv.c | 355 ++++++++ .../isa/rv64uv/1_lane_tests/vfirst.c | 48 + .../isa/rv64uv/1_lane_tests/vfmacc.c | 356 ++++++++ .../isa/rv64uv/1_lane_tests/vfmadd.c | 433 +++++++++ .../isa/rv64uv/1_lane_tests/vfmax.c | 351 ++++++++ .../isa/rv64uv/1_lane_tests/vfmerge.c | 94 ++ .../isa/rv64uv/1_lane_tests/vfmin.c | 348 ++++++++ .../isa/rv64uv/1_lane_tests/vfmsac.c | 454 ++++++++++ .../isa/rv64uv/1_lane_tests/vfmsub.c | 453 ++++++++++ .../isa/rv64uv/1_lane_tests/vfmul.c | 350 ++++++++ .../isa/rv64uv/1_lane_tests/vfmv.c | 68 ++ .../isa/rv64uv/1_lane_tests/vfmvfs.c | 90 ++ .../isa/rv64uv/1_lane_tests/vfmvsf.c | 69 ++ .../isa/rv64uv/1_lane_tests/vfncvt.c | 793 +++++++++++++++++ .../isa/rv64uv/1_lane_tests/vfnmacc.c | 456 ++++++++++ .../isa/rv64uv/1_lane_tests/vfnmadd.c | 458 ++++++++++ .../isa/rv64uv/1_lane_tests/vfnmsac.c | 455 ++++++++++ .../isa/rv64uv/1_lane_tests/vfnmsub.c | 454 ++++++++++ .../isa/rv64uv/1_lane_tests/vfrdiv.c | 179 ++++ .../isa/rv64uv/1_lane_tests/vfredmax.c | 348 ++++++++ .../isa/rv64uv/1_lane_tests/vfredmin.c | 350 ++++++++ .../isa/rv64uv/1_lane_tests/vfredosum.c | 348 ++++++++ .../isa/rv64uv/1_lane_tests/vfredusum.c | 352 ++++++++ .../isa/rv64uv/1_lane_tests/vfrsub.c | 167 ++++ .../isa/rv64uv/1_lane_tests/vfsgnj.c | 408 +++++++++ .../isa/rv64uv/1_lane_tests/vfsgnjn.c | 350 ++++++++ .../isa/rv64uv/1_lane_tests/vfsgnjx.c | 348 ++++++++ .../isa/rv64uv/1_lane_tests/vfslide1down.c | 101 +++ .../isa/rv64uv/1_lane_tests/vfslide1up.c | 90 ++ .../isa/rv64uv/1_lane_tests/vfsqrt.c | 142 +++ .../isa/rv64uv/1_lane_tests/vfsub.c | 349 ++++++++ .../isa/rv64uv/1_lane_tests/vfwadd.c | 531 +++++++++++ .../isa/rv64uv/1_lane_tests/vfwcvt.c | 670 ++++++++++++++ .../isa/rv64uv/1_lane_tests/vfwmacc.c | 351 ++++++++ .../isa/rv64uv/1_lane_tests/vfwmsac.c | 353 ++++++++ .../isa/rv64uv/1_lane_tests/vfwmul.c | 258 ++++++ .../isa/rv64uv/1_lane_tests/vfwnmacc.c | 352 ++++++++ .../isa/rv64uv/1_lane_tests/vfwnmsac.c | 347 ++++++++ .../isa/rv64uv/1_lane_tests/vfwredosum.c | 268 ++++++ .../isa/rv64uv/1_lane_tests/vfwredusum.c | 272 ++++++ .../isa/rv64uv/1_lane_tests/vfwsub.c | 527 +++++++++++ .../riscv-tests/isa/rv64uv/1_lane_tests/vid.c | 31 + .../isa/rv64uv/1_lane_tests/viota.c | 37 + apps/riscv-tests/isa/rv64uv/1_lane_tests/vl.c | 79 ++ .../isa/rv64uv/1_lane_tests/vl1r.c | 439 +++++++++ .../isa/rv64uv/1_lane_tests/vl_nocheck.c | 68 ++ .../isa/rv64uv/1_lane_tests/vle1.c | 45 + .../isa/rv64uv/1_lane_tests/vle16.c | 293 ++++++ .../isa/rv64uv/1_lane_tests/vle32.c | 307 +++++++ .../isa/rv64uv/1_lane_tests/vle64.c | 315 +++++++ .../isa/rv64uv/1_lane_tests/vle8.c | 273 ++++++ .../isa/rv64uv/1_lane_tests/vlff.c | 91 ++ .../riscv-tests/isa/rv64uv/1_lane_tests/vls.c | 190 ++++ .../isa/rv64uv/1_lane_tests/vluxei.c | 167 ++++ .../riscv-tests/isa/rv64uv/1_lane_tests/vlx.c | 101 +++ .../isa/rv64uv/1_lane_tests/vmacc.c | 292 ++++++ .../isa/rv64uv/1_lane_tests/vmadc.c | 224 +++++ .../isa/rv64uv/1_lane_tests/vmadd.c | 292 ++++++ .../isa/rv64uv/1_lane_tests/vmand.c | 79 ++ .../isa/rv64uv/1_lane_tests/vmandnot.c | 68 ++ .../isa/rv64uv/1_lane_tests/vmax.c | 181 ++++ .../isa/rv64uv/1_lane_tests/vmaxu.c | 181 ++++ .../isa/rv64uv/1_lane_tests/vmerge.c | 113 +++ .../isa/rv64uv/1_lane_tests/vmfeq.c | 503 +++++++++++ .../isa/rv64uv/1_lane_tests/vmfge.c | 134 +++ .../isa/rv64uv/1_lane_tests/vmfgt.c | 134 +++ .../isa/rv64uv/1_lane_tests/vmfle.c | 273 ++++++ .../isa/rv64uv/1_lane_tests/vmflt.c | 279 ++++++ .../isa/rv64uv/1_lane_tests/vmfne.c | 503 +++++++++++ .../isa/rv64uv/1_lane_tests/vmin.c | 181 ++++ .../isa/rv64uv/1_lane_tests/vminu.c | 176 ++++ .../isa/rv64uv/1_lane_tests/vmnand.c | 68 ++ .../isa/rv64uv/1_lane_tests/vmnor.c | 68 ++ .../isa/rv64uv/1_lane_tests/vmor.c | 68 ++ .../isa/rv64uv/1_lane_tests/vmornot.c | 68 ++ .../isa/rv64uv/1_lane_tests/vmsbc.c | 160 ++++ .../isa/rv64uv/1_lane_tests/vmsbf.c | 33 + .../isa/rv64uv/1_lane_tests/vmseq.c | 306 +++++++ .../isa/rv64uv/1_lane_tests/vmsgt.c | 168 ++++ .../isa/rv64uv/1_lane_tests/vmsgtu.c | 168 ++++ .../isa/rv64uv/1_lane_tests/vmsif.c | 33 + .../isa/rv64uv/1_lane_tests/vmsle.c | 237 +++++ .../isa/rv64uv/1_lane_tests/vmsleu.c | 237 +++++ .../isa/rv64uv/1_lane_tests/vmslt.c | 163 ++++ .../isa/rv64uv/1_lane_tests/vmsltu.c | 163 ++++ .../isa/rv64uv/1_lane_tests/vmsne.c | 306 +++++++ .../isa/rv64uv/1_lane_tests/vmsof.c | 33 + .../isa/rv64uv/1_lane_tests/vmul.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vmulh.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vmulhsu.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vmulhu.c | 232 +++++ .../riscv-tests/isa/rv64uv/1_lane_tests/vmv.c | 104 +++ .../isa/rv64uv/1_lane_tests/vmvnrr.c | 146 +++ .../isa/rv64uv/1_lane_tests/vmvsx.c | 75 ++ .../isa/rv64uv/1_lane_tests/vmvxs.c | 72 ++ .../isa/rv64uv/1_lane_tests/vmxnor.c | 68 ++ .../isa/rv64uv/1_lane_tests/vmxor.c | 68 ++ .../isa/rv64uv/1_lane_tests/vnclip.c | 78 ++ .../isa/rv64uv/1_lane_tests/vnclipu.c | 78 ++ .../isa/rv64uv/1_lane_tests/vnmsac.c | 292 ++++++ .../isa/rv64uv/1_lane_tests/vnmsub.c | 292 ++++++ .../isa/rv64uv/1_lane_tests/vnsra.c | 242 +++++ .../isa/rv64uv/1_lane_tests/vnsrl.c | 242 +++++ .../riscv-tests/isa/rv64uv/1_lane_tests/vor.c | 309 +++++++ .../isa/rv64uv/1_lane_tests/vpopc_m.c | 30 + .../isa/rv64uv/1_lane_tests/vredand.c | 93 ++ .../isa/rv64uv/1_lane_tests/vredmax.c | 79 ++ .../isa/rv64uv/1_lane_tests/vredmaxu.c | 106 +++ .../isa/rv64uv/1_lane_tests/vredmin.c | 78 ++ .../isa/rv64uv/1_lane_tests/vredminu.c | 78 ++ .../isa/rv64uv/1_lane_tests/vredor.c | 93 ++ .../isa/rv64uv/1_lane_tests/vredsum.c | 178 ++++ .../isa/rv64uv/1_lane_tests/vredxor.c | 44 + .../isa/rv64uv/1_lane_tests/vrem.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vremu.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vrgather.c | 72 ++ .../isa/rv64uv/1_lane_tests/vrsub.c | 136 +++ apps/riscv-tests/isa/rv64uv/1_lane_tests/vs.c | 93 ++ .../isa/rv64uv/1_lane_tests/vs1r.c | 162 ++++ .../isa/rv64uv/1_lane_tests/vsadd.c | 99 +++ .../isa/rv64uv/1_lane_tests/vsaddu.c | 113 +++ .../isa/rv64uv/1_lane_tests/vsbc.c | 76 ++ .../isa/rv64uv/1_lane_tests/vse1.c | 57 ++ .../isa/rv64uv/1_lane_tests/vse16.c | 357 ++++++++ .../isa/rv64uv/1_lane_tests/vse32.c | 408 +++++++++ .../isa/rv64uv/1_lane_tests/vse64.c | 435 +++++++++ .../isa/rv64uv/1_lane_tests/vse8.c | 331 +++++++ .../isa/rv64uv/1_lane_tests/vsetivli.c | 466 ++++++++++ .../isa/rv64uv/1_lane_tests/vsetvl.c | 526 +++++++++++ .../isa/rv64uv/1_lane_tests/vsetvli.c | 528 +++++++++++ .../isa/rv64uv/1_lane_tests/vsext.c | 106 +++ .../isa/rv64uv/1_lane_tests/vslide1down.c | 101 +++ .../isa/rv64uv/1_lane_tests/vslide1up.c | 78 ++ .../isa/rv64uv/1_lane_tests/vslidedown.c | 164 ++++ .../isa/rv64uv/1_lane_tests/vslideup.c | 166 ++++ .../isa/rv64uv/1_lane_tests/vsll.c | 316 +++++++ .../isa/rv64uv/1_lane_tests/vsmul.c | 59 ++ .../isa/rv64uv/1_lane_tests/vsra.c | 316 +++++++ .../isa/rv64uv/1_lane_tests/vsrl.c | 316 +++++++ .../riscv-tests/isa/rv64uv/1_lane_tests/vss.c | 146 +++ .../isa/rv64uv/1_lane_tests/vssra.c | 79 ++ .../isa/rv64uv/1_lane_tests/vssrl.c | 79 ++ .../isa/rv64uv/1_lane_tests/vssub.c | 55 ++ .../isa/rv64uv/1_lane_tests/vssubu.c | 55 ++ .../isa/rv64uv/1_lane_tests/vsub.c | 136 +++ .../isa/rv64uv/1_lane_tests/vsux.c | 104 +++ .../isa/rv64uv/1_lane_tests/vsuxei.c | 137 +++ .../riscv-tests/isa/rv64uv/1_lane_tests/vsx.c | 102 +++ .../isa/rv64uv/1_lane_tests/vwadd.c | 241 +++++ .../isa/rv64uv/1_lane_tests/vwaddu.c | 244 +++++ .../isa/rv64uv/1_lane_tests/vwmacc.c | 248 ++++++ .../isa/rv64uv/1_lane_tests/vwmaccsu.c | 248 ++++++ .../isa/rv64uv/1_lane_tests/vwmaccu.c | 248 ++++++ .../isa/rv64uv/1_lane_tests/vwmaccus.c | 127 +++ .../isa/rv64uv/1_lane_tests/vwmul.c | 188 ++++ .../isa/rv64uv/1_lane_tests/vwmulsu.c | 188 ++++ .../isa/rv64uv/1_lane_tests/vwmulu.c | 188 ++++ .../isa/rv64uv/1_lane_tests/vwredsum.c | 153 ++++ .../isa/rv64uv/1_lane_tests/vwredsumu.c | 153 ++++ .../isa/rv64uv/1_lane_tests/vwsub.c | 246 ++++++ .../isa/rv64uv/1_lane_tests/vwsubu.c | 246 ++++++ .../isa/rv64uv/1_lane_tests/vxor.c | 309 +++++++ .../isa/rv64uv/1_lane_tests/vzext.c | 106 +++ 178 files changed, 38409 insertions(+) create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vaadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vaaddu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vadc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vand.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vasub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vasubu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vcompress.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vcpop.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vdiv.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vdivu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfclass.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfcvt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfdiv.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfirst.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmacc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmax.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmerge.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmin.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsac.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmul.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmv.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvfs.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvsf.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfncvt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmacc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsac.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrdiv.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmax.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmin.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredosum.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredusum.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnj.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjn.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjx.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1down.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1up.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsqrt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwcvt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmacc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmsac.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmul.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmacc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmsac.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredosum.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredusum.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vid.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/viota.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vl.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vl1r.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vl_nocheck.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vle1.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vle16.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vle32.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vle64.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vle8.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vlff.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vls.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vluxei.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vlx.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmacc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmand.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmandnot.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmax.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmaxu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmerge.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfeq.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfge.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfgt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfle.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmflt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfne.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmin.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vminu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnand.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmornot.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbf.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmseq.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgtu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsif.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsle.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsleu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmslt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsltu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsne.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsof.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmul.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulh.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhsu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmv.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvnrr.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvsx.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvxs.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxnor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclip.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclipu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsac.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsra.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsrl.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vpopc_m.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredand.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmax.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmaxu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmin.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredminu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredsum.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredxor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vrem.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vremu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vrgather.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vrsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vs.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vs1r.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsaddu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsbc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vse1.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vse16.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vse32.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vse64.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vse8.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetivli.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvl.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvli.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsext.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1down.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1up.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vslidedown.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vslideup.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsll.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsmul.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsra.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsrl.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vss.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vssra.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vssrl.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vssub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vssubu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsux.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsuxei.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsx.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwaddu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmacc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccsu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccus.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmul.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulsu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsum.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsumu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsubu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vxor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vzext.c diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag b/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag new file mode 100644 index 000000000..caba44c02 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag @@ -0,0 +1,40 @@ +#Copyright 2021 ETH Zurich and University of Bologna. +#Solderpad Hardware License, Version 0.51, see LICENSE for details. +#SPDX - License - Identifier : SHL - 0.51 +# +#Author : Matheus Cavalcante < matheusd @iis.ee.ethz.ch> +#Basile Bougenot < bbougenot @student.ethz.ch> + +rv64uv_sc_tests = vaadd vaaddu vsadd vsaddu vsmul vssra vssrl vnclip vnclipu vadd + vsub vrsub vwaddu vwsubu vwadd vwsub vsext vzext vadc vmadc vsbc vmsbc vand vor + vxor vsll vsrl vsra vnsrl vnsra vmseq vmsne vmsltu vmslt vmsleu vmsle vmsgtu + vmsgt vminu vmin vmaxu vmax vmul vmulh vmulhu vmulhsu vdivu vdiv vremu + vrem vwmul vwmulu vwmulsu vmacc vnmsac vmadd vnmsub vwmaccu vwmacc + vwmaccsu vwmaccus vmerge vmv vmvxs vmvsx vfmvfs vfmvsf vmvnrr + vredsum vredmaxu vredmax vredminu vredmin vredand vredor + vredxor vwredsumu vwredsum vfadd vfsub vfrsub vfwadd + vfwsub vfmul vfdiv vfrdiv vfwmul vfmacc vfnmacc vfmsac + vfnmsac vfmadd vfnmadd vfmsub vfnmsub vfwmacc + vfwnmacc vfwmsac vfwnmsac vfsqrt vfmin vfmax vfredusum vfredosum vfredmin vfredmax + vfwredusum vfwredosum vfclass vfsgnj vfsgnjn vfsgnjx vfmerge + vfmv vmfeq vmfne vmflt vmfle vmfgt vmfge vfcvt vfwcvt vfncvt + vmand vmnand vmandnot vmor vmnor vmornot vmxor vmxnor vslideup vslidedown + vslide1up vfslide1up vslide1down vfslide1down vl + vl1r vle1 vls vluxei vs + vs1r vse1 vss vsuxei vsetivli vsetvli + vsetvl vmsbf vmsof vmsif viota vid vcpop vfirst vle8 + vse8 vle16 vse16 vle32 vse32 vle64 vse64 + +#rv64uv_sc_tests = vaadd vaaddu vadc vasub vasubu vcompress vfirst vid viota \ + vl vlff vl_nocheck vlx vmsbf vmsif vmsof vpopc_m vrgather vsadd vsaddu \ + vsetvl vsetivli vsetvli vsmul vssra vssrl vssub vssubu vsux vsx + + rv64uv_p_tests = $( + addprefix + rv64uv - + p - + , + $(rv64uv_sc_tests)) + + spike_ctests += + $(rv64uv_p_tests) diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaadd.c new file mode 100644 index 000000000..513b4f4f3 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaadd.c @@ -0,0 +1,59 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + set_vxrm(0); // setting vxrm to rnu rounding mode + VSET(4, e8, m1); + VLOAD_8(v1, 1, -2, -3, 4); + VLOAD_8(v2, 1, 2, -3, 3); + __asm__ volatile("vaadd.vv v3, v1, v2" ::); + VCMP_U8(1, v3, 1, 0, -3, 4); +} + +void TEST_CASE2(void) { + set_vxrm(1); // setting vxrm to rne rounding mode + VSET(4, e8, m1); + VLOAD_8(v1, 1, -2, -3, 4); + VLOAD_8(v2, 1, 9, -3, 5); + VLOAD_8(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vaadd.vv v3, v1, v2, v0.t" ::); + VCMP_U8(2, v3, 0, 4, 0, 4); +} + +void TEST_CASE3(void) { + set_vxrm(2); // setting vxrm to rdn rounding mode + VSET(4, e32, m1); + VLOAD_32(v1, 1, -2, 3, -4); + const uint32_t scalar = 5; + __asm__ volatile("vaadd.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U32(3, v3, 3, 1, 4, 0); +} + +// Dont use VCLEAR here, it results in a glitch where are values are off by 1 +void TEST_CASE4(void) { + set_vxrm(3); // setting vxrm to rod rounding mode + VSET(4, e32, m1); + VLOAD_32(v1, 1, 2, 3, 4); + const uint32_t scalar = 5; + VLOAD_32(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vaadd.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(4, v3, 0, 3, 0, 5); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaaddu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaaddu.c new file mode 100644 index 000000000..ff50b894c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaaddu.c @@ -0,0 +1,59 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + set_vxrm(0); // setting vxrm to rnu rounding mode + VSET(4, e8, m1); + VLOAD_8(v1, 1, 2, 3, 5); + VLOAD_8(v2, 1, 3, 8, 4); + __asm__ volatile("vaaddu.vv v3, v1, v2" ::); + VCMP_U8(1, v3, 1, 3, 6, 5); +} + +void TEST_CASE2(void) { + set_vxrm(1); // setting vxrm to rne rounding mode + VSET(4, e8, m1); + VLOAD_8(v1, 5, 8, 3, 7); + VLOAD_8(v2, 7, 5, 3, 5); + VLOAD_8(v0, 0x0A, 0x00, 0x00, 0x00); + VCLEAR(v3); + __asm__ volatile("vaaddu.vv v3, v1, v2, v0.t" ::); + VCMP_U8(2, v3, 0, 6, 0, 6); +} + +void TEST_CASE3(void) { + set_vxrm(2); // setting vxrm to rdn rounding mode + VSET(4, e32, m1); + VLOAD_32(v1, 1, 2, 3, 4); + const uint32_t scalar = 5; + __asm__ volatile("vaaddu.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U32(3, v3, 3, 3, 4, 4); +} + +// Dont use VCLEAR here, it results in a glitch where are values are off by 1 +void TEST_CASE4(void) { + set_vxrm(3); // setting vxrm to rod rounding mode + VSET(4, e32, m1); + VLOAD_32(v1, 1, 2, 3, 4); + const uint32_t scalar = 5; + VLOAD_32(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vaaddu.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(4, v3, 0, 3, 0, 5); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vadc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vadc.c new file mode 100644 index 000000000..ed24c2d45 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vadc.c @@ -0,0 +1,103 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, -7, 7); + VLOAD_8(v2, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, -8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vvm v3, v1, v2, v0"); + VCMP_U8(1, v3, 9, 10, 9, 10, 9, 10, 9, 10, 2, 5, 6, 9, 10, 13, 0, 0); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, -7, 7); + VLOAD_16(v4, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, -8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vvm v6, v2, v4, v0"); + VCMP_U16(2, v6, 9, 10, 9, 10, 9, 10, 9, 10, 2, 5, 6, 9, 10, 13, 0, 0); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, -7, 7); + VLOAD_32(v8, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, -8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vvm v12, v4, v8, v0"); + VCMP_U32(3, v12, 9, 10, 9, 10, 9, 10, 9, 10, 2, 5, 6, 9, 10, 13, 0, 0); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, -7, 7); + VLOAD_64(v16, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, -8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vvm v24, v8, v16, v0"); + VCMP_U64(4, v24, 9, 10, 9, 10, 9, 10, 9, 10, 2, 5, 6, 9, 10, 13, 0, 0); +}; + +void TEST_CASE2(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vxm v3, v1, %[A], v0" ::[A] "r"(scalar)); + VCMP_U8(5, v3, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vxm v4, v2, %[A], v0" ::[A] "r"(scalar)); + VCMP_U16(6, v4, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vxm v8, v4, %[A], v0" ::[A] "r"(scalar)); + VCMP_U32(7, v8, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vxm v16, v8, %[A], v0" ::[A] "r"(scalar)); + VCMP_U64(8, v16, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vim v3, v1, 5, v0"); + VCMP_U8(9, v3, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vim v4, v2, 5, v0"); + VCMP_U16(10, v4, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vim v8, v4, 5, v0"); + VCMP_U32(11, v8, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vim v16, v8, 5, v0"); + VCMP_U64(12, v16, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vadd.c new file mode 100644 index 000000000..a83b07b3a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vadd.c @@ -0,0 +1,203 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vv v3, v1, v2"); + VCMP_U8(1, v3, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vv v6, v2, v4"); + VCMP_U16(2, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vv v12, v4, v8"); + VCMP_U32(3, v12, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vv v24, v8, v16"); + VCMP_U64(4, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v3); + asm volatile("vadd.vv v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vadd.vv v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vadd.vv v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vadd.vv v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); +} + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vi v3, v1, 5"); + VCMP_U8(9, v3, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vi v4, v2, 5"); + VCMP_U16(10, v4, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vi v8, v4, 5"); + VCMP_U32(11, v8, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vi v16, v8, 5"); + VCMP_U64(12, v16, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); +} + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v3); + asm volatile("vadd.vi v3, v1, 5, v0.t"); + VCMP_U8(13, v3, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vadd.vi v4, v2, 5, v0.t"); + VCMP_U16(14, v4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vadd.vi v8, v4, 5, v0.t"); + VCMP_U32(15, v8, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vadd.vi v16, v8, 5, v0.t"); + VCMP_U64(16, v16, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); +} + +void TEST_CASE5(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v1, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vadd.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U8(17, v3, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vadd.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U16(18, v4, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vadd.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U32(19, v8, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vadd.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(20, v16, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, + -11); +} + +void TEST_CASE6(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v3); + asm volatile("vadd.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(21, v3, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vadd.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(22, v4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vadd.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(23, v8, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vadd.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(24, v16, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); +} + +// Check that the addition also works when source register EEWs are changed +void TEST_CASE7(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(8, e16, m1); + asm volatile("vadd.vv v3, v1, v2"); + VSET(16, e8, m1); + VCMP_U8(25, v3, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vand.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vand.c new file mode 100644 index 000000000..d2dd25e0e --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vand.c @@ -0,0 +1,309 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v3, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + 0xf0); + asm volatile("vand.vv v1, v2, v3"); + VCMP_U8(1, v1, 0xf0, 0x01, 0xf0, 0xf0, 0x01, 0xf0, 0xf0, 0x01, 0xf0, 0xf0, + 0x01, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + 0xf0f0, 0xff00, 0x0003, 0xf0f0); + asm volatile("vand.vv v2, v4, v6"); + VCMP_U16(2, v2, 0xff00, 0x0001, 0xf0f0, 0xff00, 0x0001, 0xf0f0, 0xff00, + 0x0001, 0xf0f0, 0xff00, 0x0001, 0xf0f0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_32(v12, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, + 0x00000003, 0xf0f0f0f0); + asm volatile("vand.vv v4, v8, v12"); + VCMP_U32(3, v4, 0xffff0000, 0x00000001, 0xf0f0f0f0, 0xffff0000, 0x00000001, + 0xf0f0f0f0, 0xffff0000, 0x00000001, 0xf0f0f0f0, 0xffff0000, + 0x00000001, 0xf0f0f0f0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); + asm volatile("vand.vv v8, v16, v24"); + VCMP_U64(4, v8, 0xffffffff00000000, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); +} + +void TEST_CASE2() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v3, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vand.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, + 0xef, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + 0xf0f0, 0xff00, 0x0003, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vand.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0xff00, 0xbeef, 0xf0f0, 0xff00, 0xbeef, 0xf0f0, 0xff00, + 0xbeef, 0xf0f0, 0xff00, 0xbeef, 0xf0f0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_32(v12, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, + 0x00000003, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vand.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0xffff0000, 0xdeadbeef, 0xf0f0f0f0, 0xffff0000, 0xdeadbeef, + 0xf0f0f0f0, 0xffff0000, 0xdeadbeef, 0xf0f0f0f0, 0xffff0000, + 0xdeadbeef, 0xf0f0f0f0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vand.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0xffffffff00000000, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0); +} + +void TEST_CASE3() { + const uint64_t scalar = 0x0ff00ff00ff00ff0; + + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + asm volatile("vand.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0xf0, 0x00, 0xf0, 0xf0, 0x00, 0xf0, 0xf0, 0x00, 0xf0, 0xf0, + 0x00, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + asm volatile("vand.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0x0ff0, 0x0000, 0x00f0, 0x0ff0, 0x0000, 0x00f0, 0x0ff0, + 0x0000, 0x00f0, 0x0ff0, 0x0000, 0x00f0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + asm volatile("vand.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0x0ff00ff0, 0x00000000, 0x00f000f0, 0x0ff00ff0, 0x00000000, + 0x00f000f0, 0x0ff00ff0, 0x00000000, 0x00f000f0, 0x0ff00ff0, + 0x00000000, 0x00f000f0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + asm volatile("vand.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0x0ff00ff00ff00ff0, 0x0000000000000000, 0x00f000f000f000f0, + 0x0ff00ff00ff00ff0, 0x0000000000000000, 0x00f000f000f000f0, + 0x0ff00ff00ff00ff0, 0x0000000000000000, 0x00f000f000f000f0, + 0x0ff00ff00ff00ff0, 0x0000000000000000, 0x00f000f000f000f0); +} + +void TEST_CASE4() { + const uint64_t scalar = 0x0ff00ff00ff00ff0; + + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vand.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, + 0xef, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vand.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0x0ff0, 0xbeef, 0x00f0, 0x0ff0, 0xbeef, 0x00f0, 0x0ff0, + 0xbeef, 0x00f0, 0x0ff0, 0xbeef, 0x00f0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vand.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0x0ff00ff0, 0xdeadbeef, 0x00f000f0, 0x0ff00ff0, 0xdeadbeef, + 0x00f000f0, 0x0ff00ff0, 0xdeadbeef, 0x00f000f0, 0x0ff00ff0, + 0xdeadbeef, 0x00f000f0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vand.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0x0ff00ff00ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, + 0x0ff00ff00ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, + 0x0ff00ff00ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, + 0x0ff00ff00ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0); +} + +void TEST_CASE5() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + asm volatile("vand.vi v1, v2, 15"); + VCMP_U8(17, v1, 0x0f, 0x01, 0x00, 0x0f, 0x01, 0x00, 0x0f, 0x01, 0x00, 0x0f, + 0x01, 0x00); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + asm volatile("vand.vi v2, v4, 15"); + VCMP_U16(18, v2, 0x000f, 0x0001, 0x0000, 0x000f, 0x0001, 0x0000, 0x000f, + 0x0001, 0x0000, 0x000f, 0x0001, 0x0000); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + asm volatile("vand.vi v4, v8, 15"); + VCMP_U32(19, v4, 0x0000000f, 0x00000001, 0x00000000, 0x0000000f, 0x00000001, + 0x00000000, 0x0000000f, 0x00000001, 0x00000000, 0x0000000f, + 0x00000001, 0x00000000); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + asm volatile("vand.vi v8, v16, 15"); + VCMP_U64(20, v8, 0x000000000000000f, 0x0000000000000001, 0x0000000000000000, + 0x000000000000000f, 0x0000000000000001, 0x0000000000000000, + 0x000000000000000f, 0x0000000000000001, 0x0000000000000000, + 0x000000000000000f, 0x0000000000000001, 0x0000000000000000); +} + +void TEST_CASE6() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vand.vi v1, v2, 15, v0.t"); + VCMP_U8(21, v1, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, + 0xef, 0x00); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vand.vi v2, v4, 15, v0.t"); + VCMP_U16(22, v2, 0x000f, 0xbeef, 0x0000, 0x000f, 0xbeef, 0x0000, 0x000f, + 0xbeef, 0x0000, 0x000f, 0xbeef, 0x0000); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vand.vi v4, v8, 15, v0.t"); + VCMP_U32(23, v4, 0x0000000f, 0xdeadbeef, 0x00000000, 0x0000000f, 0xdeadbeef, + 0x00000000, 0x0000000f, 0xdeadbeef, 0x00000000, 0x0000000f, + 0xdeadbeef, 0x00000000); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vand.vi v8, v16, 15, v0.t"); + VCMP_U64(24, v8, 0x000000000000000f, 0xdeadbeefdeadbeef, 0x0000000000000000, + 0x000000000000000f, 0xdeadbeefdeadbeef, 0x0000000000000000, + 0x000000000000000f, 0xdeadbeefdeadbeef, 0x0000000000000000, + 0x000000000000000f, 0xdeadbeefdeadbeef, 0x0000000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vasub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vasub.c new file mode 100644 index 000000000..760a33369 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vasub.c @@ -0,0 +1,54 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e32, m1); + VLOAD_32(v1, 5, 10, 15, 20); + VLOAD_32(v2, -1, 2, -3, 4); + __asm__ volatile("vasub.vv v3, v1, v2" ::); + VEC_CMP_32(1, v3, 3, 4, 9, 8); +} + +void TEST_CASE2(void) { + VSET(4, e32, m1); + VLOAD_32(v1, 5, 10, 15, 20); + VLOAD_32(v2, 1, 2, 3, -4); + VLOAD_32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vasub.vv v3, v1, v2, v0.t" ::); + VEC_CMP_32(2, v3, 0, 4, 0, 12); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + const uint64_t scalar = -5; + __asm__ volatile("vasub.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VEC_CMP_U32(3, v3, 5, 8, 10, 13); +} + +void TEST_CASE4(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + const uint64_t scalar = -5; + VLOAD_U32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vasub.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VEC_CMP_U32(4, v3, 0, 8, 0, 13); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vasubu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vasubu.c new file mode 100644 index 000000000..68ec9cdbf --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vasubu.c @@ -0,0 +1,54 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + VLOAD_U32(v2, 1, 2, 3, 4); + __asm__ volatile("vasubu.vv v3, v1, v2" ::); + VEC_CMP_U32(1, v3, 2, 4, 6, 8); +} + +void TEST_CASE2(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + VLOAD_U32(v2, 1, 2, 3, 4); + VLOAD_U32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vasubu.vv v3, v1, v2, v0.t" ::); + VEC_CMP_U32(2, v3, 0, 4, 0, 8); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + const uint64_t scalar = 5; + __asm__ volatile("vasubu.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VEC_CMP_U32(3, v3, 0, 3, 5, 8); +} + +void TEST_CASE4(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + const uint64_t scalar = 5; + VLOAD_U32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vasubu.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VEC_CMP_U32(4, v3, 0, 3, 0, 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcompress.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcompress.c new file mode 100644 index 000000000..58917263e --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcompress.c @@ -0,0 +1,26 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(4, e64, m1); + VLOAD_64(v4, 1, 2, 3, 4); + VLOAD_64(v0, 12, 0, 0, 0); + CLEAR(v2); + __asm__ volatile("vcompress.vm v2, v4, v0"); + DEBUG_64(v2); + VEC_CMP_64(1, v2, 3, 4, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcpop.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcpop.c new file mode 100644 index 000000000..a9b828e31 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcpop.c @@ -0,0 +1,47 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +// masked +void TEST_CASE1(void) { + VSET(4, e32, m1); + VCLEAR(v2); + VLOAD_32(v2, 7, 0, 0, 0); + VLOAD_32(v0, 5, 0, 0, 0); + volatile uint32_t scalar = 1337; + volatile uint32_t OUP[] = {0, 0, 0, 0}; + __asm__ volatile( + "vpopc.m %[A], v2, v0.t \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); + XCMP(1, OUP[0], 2); +} + +// unmasked +void TEST_CASE2(void) { + VSET(4, e32, m1); + VLOAD_32(v2, 0xF, 0, 0, 0); + volatile uint32_t scalar = 1337; + volatile uint32_t OUP[] = {0, 0, 0, 0}; + __asm__ volatile( + "vpopc.m %[A], v2 \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); + XCMP(2, OUP[0], 4); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vdiv.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vdiv.c new file mode 100644 index 000000000..ca92520a4 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vdiv.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xcd, 0x42, 0x2c, 0xc4, 0x7a, 0x7b, 0xd1, 0x21, 0x38, 0x1d, 0x2e, + 0x4f, 0xe7, 0x3d, 0x63, 0xd8); + VLOAD_8(v3, 0x11, 0xa1, 0x7c, 0xde, 0x02, 0x38, 0x4e, 0x03, 0x1e, 0xc6, 0x16, + 0xa0, 0xca, 0x83, 0x54, 0x90); + asm volatile("vdiv.vv v1, v2, v3"); + VCMP_I8(1, v1, 0xfd, 0x00, 0x00, 0x01, 0x3d, 0x02, 0x00, 0x0b, 0x01, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x9e55, 0xf806, 0xa137, 0xa5fc, 0x38ae, 0x58c3, 0x2a66, 0x6bd7, + 0x74e7, 0xa845, 0x2052, 0x6f9a, 0x6d88, 0x2861, 0xdaea, 0x2075); + VLOAD_16(v6, 0x5e64, 0x0a44, 0xdde5, 0x813f, 0x78b9, 0x29be, 0x28b4, 0x1b2f, + 0xc4a3, 0x4a05, 0x5501, 0x49bb, 0xe5f8, 0xfa20, 0x4edf, 0xf892); + asm volatile("vdiv.vv v2, v4, v6"); + VCMP_I16(2, v2, 0xffff, 0x0000, 0x0002, 0x0000, 0x0000, 0x0002, 0x0001, + 0x0003, 0xffff, 0xffff, 0x0000, 0x0001, 0xfffc, 0xfffa, 0x0000, + 0xfffc); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xed578a38, 0xba7b1912, 0xb99934ef, 0x21a85df9, 0xb01c09f2, + 0xd0cb54fc, 0x9b617331, 0xd32cf029, 0xaea08daa, 0xd76f06e2, + 0x99b8e084, 0x9fdc6bfc, 0x3103b573, 0xaf1e96b4, 0x54fe9ea1, + 0x0ceff9c0); + VLOAD_32(v12, 0x0000002d, 0x0000001e, 0x0000003e, 0xffffffe0, 0x0000004b, + 0x00000064, 0xffffff88, 0x0000003b, 0x00000011, 0xffffffc3, + 0xffffffa2, 0x0000004b, 0xffffffcc, 0xffffffb1, 0xffffff9d, + 0xffffffba); + asm volatile("vdiv.vv v4, v8, v12"); + VCMP_I32(3, v4, 0xff95db40, 0xfdaec51b, 0xfedd4f4f, 0xfef2bd11, 0xfeef4ea4, + 0xff872740, 0x00d6a792, 0xff3d81e7, 0xfb369eec, 0x00aa3ed6, + 0x01168b3a, 0xfeb7d87b, 0xff0eb2ab, 0x01061804, 0xff24374a, + 0xffd0afa2); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x7954dd7fc5e0687c, 0xfcf310f44f869aa0, 0x375fcbcf6aae0cac, + 0x6038c1825cfb5a72, 0x59046c6b324e7fb3, 0xbac247d729fb18e6, + 0x0cecf04984784b3f, 0x2cd8e8ea5be0c201, 0xcab2cf17c48b57cb, + 0xcb53d1b55097656d, 0xbeeb18a6fab9af8d, 0xbf94f3a0fa74670b, + 0x817c1dfb5ab3bfd7, 0x40951ef6459642d1, 0x4b5f994556f6ba42, + 0x63a8eaa417e6d29c); + VLOAD_64(v24, 0xffffff9bd8e00c88, 0x0000002057c200e4, 0x0000004c4c93640e, + 0x000000497b7bfcdd, 0x00000001feebe76a, 0xffffffb93a2c242c, + 0xffffff8523c47d2a, 0x00000019f01c4433, 0xffffffd940862ecc, + 0x00000016ac4df9c8, 0x0000001e9f15d00c, 0xffffffd0af22d791, + 0xffffff9feca249bc, 0xffffffac5eae7985, 0x0000003567fe8027, + 0x000000175355cab3); + asm volatile("vdiv.vv v8, v16, v24"); + VCMP_I64(4, v8, 0xfffffffffec9dd87, 0xffffffffffe7dac0, 0x0000000000b9cad9, + 0x00000000014f3850, 0x000000002c9a4382, 0x0000000000fa75a9, + 0xffffffffffe51146, 0x0000000001baa14f, 0x000000000160270e, + 0xfffffffffdad470e, 0xfffffffffddfe832, 0x00000000015c87ee, + 0x0000000001511bae, 0xffffffffff3a4e84, 0x0000000001694c75, + 0x000000000445c6cf); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xcd, 0x42, 0x2c, 0xc4, 0x7a, 0x7b, 0xd1, 0x21, 0x38, 0x1d, 0x2e, + 0x4f, 0xe7, 0x3d, 0x63, 0xd8); + VLOAD_8(v3, 0x11, 0xa1, 0x7c, 0xde, 0x02, 0x38, 0x4e, 0x03, 0x1e, 0xc6, 0x16, + 0xa0, 0xca, 0x83, 0x54, 0x90); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vdiv.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0x00, 0, 0x01, 0, 0x02, 0, 0x0b, 0, 0x00, 0, 0x00, 0, 0x00, + 0, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x9e55, 0xf806, 0xa137, 0xa5fc, 0x38ae, 0x58c3, 0x2a66, 0x6bd7, + 0x74e7, 0xa845, 0x2052, 0x6f9a, 0x6d88, 0x2861, 0xdaea, 0x2075); + VLOAD_16(v6, 0x5e64, 0x0a44, 0xdde5, 0x813f, 0x78b9, 0x29be, 0x28b4, 0x1b2f, + 0xc4a3, 0x4a05, 0x5501, 0x49bb, 0xe5f8, 0xfa20, 0x4edf, 0xf892); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vdiv.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0x0000, 0, 0x0000, 0, 0x0002, 0, 0x0003, 0, 0xffff, 0, + 0x0001, 0, 0xfffa, 0, 0xfffc); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xed578a38, 0xba7b1912, 0xb99934ef, 0x21a85df9, 0xb01c09f2, + 0xd0cb54fc, 0x9b617331, 0xd32cf029, 0xaea08daa, 0xd76f06e2, + 0x99b8e084, 0x9fdc6bfc, 0x3103b573, 0xaf1e96b4, 0x54fe9ea1, + 0x0ceff9c0); + VLOAD_32(v12, 0x0000002d, 0x0000001e, 0x0000003e, 0xffffffe0, 0x0000004b, + 0x00000064, 0xffffff88, 0x0000003b, 0x00000011, 0xffffffc3, + 0xffffffa2, 0x0000004b, 0xffffffcc, 0xffffffb1, 0xffffff9d, + 0xffffffba); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vdiv.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0xfdaec51b, 0, 0xfef2bd11, 0, 0xff872740, 0, 0xff3d81e7, 0, + 0x00aa3ed6, 0, 0xfeb7d87b, 0, 0x01061804, 0, 0xffd0afa2); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x7954dd7fc5e0687c, 0xfcf310f44f869aa0, 0x375fcbcf6aae0cac, + 0x6038c1825cfb5a72, 0x59046c6b324e7fb3, 0xbac247d729fb18e6, + 0x0cecf04984784b3f, 0x2cd8e8ea5be0c201, 0xcab2cf17c48b57cb, + 0xcb53d1b55097656d, 0xbeeb18a6fab9af8d, 0xbf94f3a0fa74670b, + 0x817c1dfb5ab3bfd7, 0x40951ef6459642d1, 0x4b5f994556f6ba42, + 0x63a8eaa417e6d29c); + VLOAD_64(v24, 0xffffff9bd8e00c88, 0x0000002057c200e4, 0x0000004c4c93640e, + 0x000000497b7bfcdd, 0x00000001feebe76a, 0xffffffb93a2c242c, + 0xffffff8523c47d2a, 0x00000019f01c4433, 0xffffffd940862ecc, + 0x00000016ac4df9c8, 0x0000001e9f15d00c, 0xffffffd0af22d791, + 0xffffff9feca249bc, 0xffffffac5eae7985, 0x0000003567fe8027, + 0x000000175355cab3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vdiv.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0xffffffffffe7dac0, 0, 0x00000000014f3850, 0, + 0x0000000000fa75a9, 0, 0x0000000001baa14f, 0, 0xfffffffffdad470e, 0, + 0x00000000015c87ee, 0, 0xffffffffff3a4e84, 0, 0x000000000445c6cf); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x14, 0xab, 0x4d, 0xcd, 0xc3, 0x7c, 0xb5, 0xf0, 0xc1, 0x90, 0x14, + 0x59, 0x98, 0xda, 0x76, 0x84); + int64_t scalar = 5; + asm volatile("vdiv.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0x04, 0xef, 0x0f, 0xf6, 0xf4, 0x18, 0xf1, 0xfd, 0xf4, 0xea, + 0x04, 0x11, 0xec, 0xf9, 0x17, 0xe8); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x19c9, 0x865a, 0x3063, 0xd5c2, 0xbe39, 0x98c7, 0x1ca7, 0x5d1e, + 0x8fdc, 0x3396, 0x9442, 0xee77, 0x7da8, 0xf200, 0xaba3, 0x4cd6); + scalar = -538; + asm volatile("vdiv.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0xfff4, 0x0039, 0xffe9, 0x0014, 0x001f, 0x0031, 0xfff3, + 0xffd4, 0x0035, 0xffe8, 0x0033, 0x0008, 0xffc5, 0x0006, 0x0028, + 0xffdc); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x49dd393c, 0xfc1d701e, 0x7670b541, 0x5ef6c28f, 0x60da5cab, + 0x6be56bc4, 0x6f629cde, 0xf1ab595a, 0x3d99363b, 0xb8a7840e, + 0x84071026, 0x6697d435, 0x3768cf44, 0x82f1a5a1, 0xf5d4f40e, + 0xcda97e6d); + scalar = 649; + asm volatile("vdiv.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0x001d22cc, 0xfffe77b2, 0x002eb818, 0x00257573, 0x00263435, + 0x002a8f5a, 0x002befac, 0xfffa58e4, 0x00184c36, 0xffe3db85, + 0xffcf1946, 0x002877d8, 0x0015db3d, 0xffceabd8, 0xfffbfd39, + 0xffec24e2); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xeea1bad034de2c3e, 0x5acd284816152166, 0x52a24c3b98af20f9, + 0x843d3c2e39d3221e, 0xda6c4bef77118459, 0x8c7e991a77cc3ddc, + 0x58f56c82eceafc72, 0xb4b1bac0a66d4984, 0x126283c905985ab8, + 0x3a859a64dbdb137e, 0x46674604f440792d, 0x04b1df734a3f312a, + 0xde91f735ce81d174, 0x3d254eb16d0c87f4, 0xc06ebbe7936e6774, + 0xb17ccbc475c8724e); + scalar = -59223; + asm volatile("vdiv.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0x000013383ad25844, 0xffff9b84f9ef594c, 0xffffa48eb726f738, + 0x000088f40e45bbd1, 0x0000299522c72a62, 0x00007fd16a16b1db, + 0xffff9d8efec5cf15, 0x000053554738ae55, 0xffffeba7c8cdd664, + 0xffffbf3d66c69bdf, 0xffffb2177f70bf18, 0xffffface02d012e3, + 0x000024fe4bc4a5dc, 0xffffbc56186f1f3d, 0x00004657f2ee1ea3, + 0x000056e1b88b70d9); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x14, 0xab, 0x4d, 0xcd, 0xc3, 0x7c, 0xb5, 0xf0, 0xc1, 0x90, 0x14, + 0x59, 0x98, 0xda, 0x76, 0x84); + int64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vdiv.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0xef, 0, 0xf6, 0, 0x18, 0, 0xfd, 0, 0xea, 0, 0x11, 0, 0xf9, + 0, 0xe8); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x19c9, 0x865a, 0x3063, 0xd5c2, 0xbe39, 0x98c7, 0x1ca7, 0x5d1e, + 0x8fdc, 0x3396, 0x9442, 0xee77, 0x7da8, 0xf200, 0xaba3, 0x4cd6); + scalar = -538; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vdiv.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0x0039, 0, 0x0014, 0, 0x0031, 0, 0xffd4, 0, 0xffe8, 0, + 0x0008, 0, 0x0006, 0, 0xffdc); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x49dd393c, 0xfc1d701e, 0x7670b541, 0x5ef6c28f, 0x60da5cab, + 0x6be56bc4, 0x6f629cde, 0xf1ab595a, 0x3d99363b, 0xb8a7840e, + 0x84071026, 0x6697d435, 0x3768cf44, 0x82f1a5a1, 0xf5d4f40e, + 0xcda97e6d); + scalar = 649; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vdiv.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0xfffe77b2, 0, 0x00257573, 0, 0x002a8f5a, 0, 0xfffa58e4, + 0, 0xffe3db85, 0, 0x002877d8, 0, 0xffceabd8, 0, 0xffec24e2); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xeea1bad034de2c3e, 0x5acd284816152166, 0x52a24c3b98af20f9, + 0x843d3c2e39d3221e, 0xda6c4bef77118459, 0x8c7e991a77cc3ddc, + 0x58f56c82eceafc72, 0xb4b1bac0a66d4984, 0x126283c905985ab8, + 0x3a859a64dbdb137e, 0x46674604f440792d, 0x04b1df734a3f312a, + 0xde91f735ce81d174, 0x3d254eb16d0c87f4, 0xc06ebbe7936e6774, + 0xb17ccbc475c8724e); + scalar = -59223; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vdiv.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0xffff9b84f9ef594c, 0, 0x000088f40e45bbd1, 0, + 0x00007fd16a16b1db, 0, 0x000053554738ae55, 0, 0xffffbf3d66c69bdf, 0, + 0xffffface02d012e3, 0, 0xffffbc56186f1f3d, 0, 0x000056e1b88b70d9); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vdivu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vdivu.c new file mode 100644 index 000000000..22ae19a22 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vdivu.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x85, 0x1d, 0x9f, 0x31, 0x8c, 0x4c, 0x4c, 0xf2, 0x11, 0xfc, 0xc8, + 0xab, 0xc2, 0xff, 0xf5, 0xc2); + VLOAD_8(v3, 0x3d, 0x06, 0x32, 0x36, 0x02, 0x0f, 0x27, 0x35, 0x1e, 0x0f, 0x36, + 0x1c, 0x24, 0x1a, 0x22, 0x01); + asm volatile("vdivu.vv v1, v2, v3"); + VCMP_I8(1, v1, 0x02, 0x04, 0x03, 0x00, 0x46, 0x05, 0x01, 0x04, 0x00, 0x10, + 0x03, 0x06, 0x05, 0x09, 0x07, 0xc2); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xd200, 0xa047, 0x7af8, 0x453d, 0xd6eb, 0xfabb, 0x604a, 0xea35, + 0xbc2d, 0x45e7, 0x8407, 0x3845, 0x1495, 0x8ee6, 0x7da4, 0xf34a); + VLOAD_16(v6, 0x03ad, 0x00b8, 0x001b, 0x0353, 0x013f, 0x008c, 0x015e, 0x01e6, + 0x00cd, 0x0093, 0x00ba, 0x03d0, 0x0117, 0x009d, 0x007b, 0x02cf); + asm volatile("vdivu.vv v2, v4, v6"); + VCMP_I16(2, v2, 0x0039, 0x00de, 0x048d, 0x0014, 0x00ac, 0x01ca, 0x0046, + 0x007b, 0x00ea, 0x0079, 0x00b5, 0x000e, 0x0012, 0x00e9, 0x0105, + 0x0056); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xd56bd03a, 0x1036c5ff, 0xaa01847f, 0x988dc35d, 0x4d8615dc, + 0xb62269e2, 0xe842ba75, 0x02fecbf9, 0xe536c712, 0xe93e2160, + 0x9ba34297, 0x554d290d, 0x319f668c, 0x0d6c2fbb, 0x6a7eb54a, + 0x3fa1cc84); + VLOAD_32(v12, 0x00000025, 0x00000057, 0x0000002e, 0x0000004c, 0x00000052, + 0x00000021, 0x0000001d, 0x0000002f, 0x00000029, 0x00000008, + 0x00000015, 0x00000029, 0x00000048, 0x00000051, 0x0000003f, + 0x00000007); + asm volatile("vdivu.vv v4, v8, v12"); + VCMP_I32(3, v4, 0x05c4a4c3, 0x002fb5c5, 0x03b21eb4, 0x0201dd84, 0x00f20682, + 0x0584ebef, 0x08024d0c, 0x00105098, 0x05973090, 0x1d27c42c, + 0x07694c50, 0x02149d19, 0x00b06fa5, 0x002a6c0b, 0x01b0bdcc, + 0x09171d37); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xf251717441f02108, 0x2668b5d7f073b9d1, 0x4708b49fd356a60a, + 0x195bedcb9ce5956b, 0x41ce2b35b4280568, 0x32e5b89eed21de3b, + 0x73f17f3d0e1f243f, 0x0d0aa119af3b0e95, 0x024cbba6ba662213, + 0xf7df98f44da5e55f, 0x3cf2951aa7e4c0a9, 0x0ed8987446e84f30, + 0xd983a7a0d4c648b4, 0x60eb8249069801a5, 0x529967e8b06df477, + 0x776410b4b0cc22ad); + VLOAD_64(v24, 0x000000000000695b, 0x0000000000007420, 0x000000000001850c, + 0x000000000000b46f, 0x000000000000e92c, 0x0000000000024e72, + 0x0000000000032774, 0x000000000001c36f, 0x00000000000063d2, + 0x0000000000037bb4, 0x000000000003692c, 0x000000000001d60c, + 0x000000000002cf7b, 0x0000000000037899, 0x0000000000038bcf, + 0x000000000003d0e4); + asm volatile("vdivu.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x00024ccd25dd5faf, 0x000054ac6a930494, 0x00002ebddee9df57, + 0x000023fac7321f85, 0x0000483f73b2e3e2, 0x000016114f5d8a9e, + 0x000024c26869df0e, 0x00000765470f410f, 0x000005e5de9b769d, + 0x0000472988fa89c3, 0x000011de6d57a394, 0x00000815e7b8df73, + 0x00004d64ede3b4a6, 0x00001bec0e79307a, 0x0000174af5139f58, + 0x00001f497ec0ff30); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x85, 0x1d, 0x9f, 0x31, 0x8c, 0x4c, 0x4c, 0xf2, 0x11, 0xfc, 0xc8, + 0xab, 0xc2, 0xff, 0xf5, 0xc2); + VLOAD_8(v3, 0x3d, 0x06, 0x32, 0x36, 0x02, 0x0f, 0x27, 0x35, 0x1e, 0x0f, 0x36, + 0x1c, 0x24, 0x1a, 0x22, 0x01); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vdivu.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0x04, 0, 0x00, 0, 0x05, 0, 0x04, 0, 0x10, 0, 0x06, 0, 0x09, + 0, 0xc2); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xd200, 0xa047, 0x7af8, 0x453d, 0xd6eb, 0xfabb, 0x604a, 0xea35, + 0xbc2d, 0x45e7, 0x8407, 0x3845, 0x1495, 0x8ee6, 0x7da4, 0xf34a); + VLOAD_16(v6, 0x03ad, 0x00b8, 0x001b, 0x0353, 0x013f, 0x008c, 0x015e, 0x01e6, + 0x00cd, 0x0093, 0x00ba, 0x03d0, 0x0117, 0x009d, 0x007b, 0x02cf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vdivu.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0x00de, 0, 0x0014, 0, 0x01ca, 0, 0x007b, 0, 0x0079, 0, + 0x000e, 0, 0x00e9, 0, 0x0056); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xd56bd03a, 0x1036c5ff, 0xaa01847f, 0x988dc35d, 0x4d8615dc, + 0xb62269e2, 0xe842ba75, 0x02fecbf9, 0xe536c712, 0xe93e2160, + 0x9ba34297, 0x554d290d, 0x319f668c, 0x0d6c2fbb, 0x6a7eb54a, + 0x3fa1cc84); + VLOAD_32(v12, 0x00000025, 0x00000057, 0x0000002e, 0x0000004c, 0x00000052, + 0x00000021, 0x0000001d, 0x0000002f, 0x00000029, 0x00000008, + 0x00000015, 0x00000029, 0x00000048, 0x00000051, 0x0000003f, + 0x00000007); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vdivu.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0x002fb5c5, 0, 0x0201dd84, 0, 0x0584ebef, 0, 0x00105098, 0, + 0x1d27c42c, 0, 0x02149d19, 0, 0x002a6c0b, 0, 0x09171d37); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xf251717441f02108, 0x2668b5d7f073b9d1, 0x4708b49fd356a60a, + 0x195bedcb9ce5956b, 0x41ce2b35b4280568, 0x32e5b89eed21de3b, + 0x73f17f3d0e1f243f, 0x0d0aa119af3b0e95, 0x024cbba6ba662213, + 0xf7df98f44da5e55f, 0x3cf2951aa7e4c0a9, 0x0ed8987446e84f30, + 0xd983a7a0d4c648b4, 0x60eb8249069801a5, 0x529967e8b06df477, + 0x776410b4b0cc22ad); + VLOAD_64(v24, 0x000000000000695b, 0x0000000000007420, 0x000000000001850c, + 0x000000000000b46f, 0x000000000000e92c, 0x0000000000024e72, + 0x0000000000032774, 0x000000000001c36f, 0x00000000000063d2, + 0x0000000000037bb4, 0x000000000003692c, 0x000000000001d60c, + 0x000000000002cf7b, 0x0000000000037899, 0x0000000000038bcf, + 0x000000000003d0e4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vdivu.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0x000054ac6a930494, 0, 0x000023fac7321f85, 0, + 0x000016114f5d8a9e, 0, 0x00000765470f410f, 0, 0x0000472988fa89c3, 0, + 0x00000815e7b8df73, 0, 0x00001bec0e79307a, 0, 0x00001f497ec0ff30); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x0f, 0xbd, 0x0a, 0x58, 0x8e, 0x09, 0xa7, 0x02, 0x4b, 0xe8, 0xd2, + 0xfc, 0xa9, 0x8e, 0x67, 0x49); + uint64_t scalar = 5; + asm volatile("vdivu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0x03, 0x25, 0x02, 0x11, 0x1c, 0x01, 0x21, 0x00, 0x0f, 0x2e, + 0x2a, 0x32, 0x21, 0x1c, 0x14, 0x0e); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xcf2f, 0x0c02, 0x1417, 0x1747, 0x5e43, 0x9552, 0xe03e, 0x5367, + 0xb2f9, 0x09d8, 0x3b19, 0x8ed0, 0x4740, 0xa628, 0x5560, 0x713b); + scalar = 538; + asm volatile("vdivu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0x0062, 0x0005, 0x0009, 0x000b, 0x002c, 0x0047, 0x006a, + 0x0027, 0x0055, 0x0004, 0x001c, 0x0043, 0x0021, 0x004f, 0x0028, + 0x0035); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x643498d4, 0xe1e4c6d4, 0x2fef6b6f, 0xe68ef651, 0x9943599a, + 0x68af922e, 0x09a3beb2, 0x117ff561, 0x86a1a3f7, 0x03566f4f, + 0xc3c0c7de, 0x8cb524f8, 0x532e1652, 0xb0c26bf2, 0x886d0b1c, + 0xf94d6b63); + scalar = 649; + asm volatile("vdivu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0x002786be, 0x00591abc, 0x0012e87f, 0x005af1c8, 0x003c7480, + 0x00294b2b, 0x0003cd68, 0x0006e722, 0x00351b13, 0x00015108, + 0x004d3723, 0x003780a6, 0x0020cf84, 0x0045b92a, 0x0035d049, + 0x0062568c); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x751ea878eaf9f85f, 0x9843aba4c3e313af, 0xc2f56d78083dc0f2, + 0x4fcb920a2a4ebc8d, 0x33a71e7364643a7c, 0x76f96f8403af4ad7, + 0xcdbbb2002ea6ac93, 0xc380d0b6a5182bcc, 0x93b79fcc64af88cf, + 0x85d32b075e613f6c, 0x4f1f75bfa6d8f319, 0xd2a34a8d9a02d7f1, + 0x8679a27b237a032e, 0x7e0881a487bbb235, 0x17d97d9849271cec, + 0x1c85ac87ba3c7d1e); + scalar = 9223; + asm volatile("vdivu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0x000340383152452b, 0x000439f263aaf34a, 0x000569521e089c7c, + 0x0002370079144c76, 0x00016f07b37c5546, 0x00034d65d36c535c, + 0x0005b5e194247d88, 0x00056d3090f69ef0, 0x000419a3026cfde7, + 0x0003b6ebd974c870, 0x000232398140d5dd, 0x0005d8bb7bec2e99, + 0x0003bb8ab6abb03a, 0x00037f8e5aab0783, 0x0000a977deb32c78, + 0x0000caab9b4a8885); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x0f, 0xbd, 0x0a, 0x58, 0x8e, 0x09, 0xa7, 0x02, 0x4b, 0xe8, 0xd2, + 0xfc, 0xa9, 0x8e, 0x67, 0x49); + uint64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vdivu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0x25, 0, 0x11, 0, 0x01, 0, 0x00, 0, 0x2e, 0, 0x32, 0, 0x1c, + 0, 0x0e); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xcf2f, 0x0c02, 0x1417, 0x1747, 0x5e43, 0x9552, 0xe03e, 0x5367, + 0xb2f9, 0x09d8, 0x3b19, 0x8ed0, 0x4740, 0xa628, 0x5560, 0x713b); + scalar = 538; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vdivu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0x0005, 0, 0x000b, 0, 0x0047, 0, 0x0027, 0, 0x0004, 0, + 0x0043, 0, 0x004f, 0, 0x0035); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x643498d4, 0xe1e4c6d4, 0x2fef6b6f, 0xe68ef651, 0x9943599a, + 0x68af922e, 0x09a3beb2, 0x117ff561, 0x86a1a3f7, 0x03566f4f, + 0xc3c0c7de, 0x8cb524f8, 0x532e1652, 0xb0c26bf2, 0x886d0b1c, + 0xf94d6b63); + scalar = 649; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vdivu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0x00591abc, 0, 0x005af1c8, 0, 0x00294b2b, 0, 0x0006e722, + 0, 0x00015108, 0, 0x003780a6, 0, 0x0045b92a, 0, 0x0062568c); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x751ea878eaf9f85f, 0x9843aba4c3e313af, 0xc2f56d78083dc0f2, + 0x4fcb920a2a4ebc8d, 0x33a71e7364643a7c, 0x76f96f8403af4ad7, + 0xcdbbb2002ea6ac93, 0xc380d0b6a5182bcc, 0x93b79fcc64af88cf, + 0x85d32b075e613f6c, 0x4f1f75bfa6d8f319, 0xd2a34a8d9a02d7f1, + 0x8679a27b237a032e, 0x7e0881a487bbb235, 0x17d97d9849271cec, + 0x1c85ac87ba3c7d1e); + scalar = 9223; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vdivu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0x000439f263aaf34a, 0, 0x0002370079144c76, 0, + 0x00034d65d36c535c, 0, 0x00056d3090f69ef0, 0, 0x0003b6ebd974c870, 0, + 0x0005d8bb7bec2e99, 0, 0x00037f8e5aab0783, 0, 0x0000caab9b4a8885); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfadd.c new file mode 100644 index 000000000..d2fb46a39 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfadd.c @@ -0,0 +1,449 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Notes: hard to check if FS is Dirtied by the first vector FP instruction +// since it is not accessible in U mode and it is dirtied before the first vfp +// operation + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.8896, -0.3406, 0.7324, -0.6846, -0.2969, -0.7739, 0.5737, + // 0.4331, 0.8940, -0.4900, 0.4219, 0.4639, 0.6694, 0.4382, + // 0.1356, 0.5337 + VLOAD_16(v4, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); + // -0.8164, 0.6533, -0.4685, 0.6284, 0.1666, 0.9438, 0.0445, + // -0.1342, -0.8071, -0.3167, -0.8350, 0.2178, -0.0896, -0.3057, + // -0.3064, 0.2073 + VLOAD_16(v6, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, + 0xba75, 0xb511, 0xbaae, 0x32f8, 0xadbc, 0xb4e4, 0xb4e7, 0x8010); + asm volatile("vfadd.vv v2, v4, v6"); + // -1.7061, 0.3127, 0.2639, -0.0562, -0.1302, 0.1699, 0.6182, + // 0.2988, 0.0869, -0.8066, -0.4131, 0.6816, 0.5801, 0.1326, + // -0.1708, 0.7412 + VCMP_U16(1, v2, 0xbed3, 0x3501, 0x3439, 0xab30, 0xb02b, 0x3170, 0x38f2, + 0x34c8, 0x2d90, 0xba74, 0xb69c, 0x3974, 0x38a4, 0x303e, 0xb177, + 0x800f); + + VSET(16, e32, m4); + // -0.28968573, 0.40292332, 0.33936000, 0.53889370, 0.39942014, + // -0.27004066, 0.78120714, -0.15632398, -0.49984047, + // -0.69259918, -0.03384063, -0.62385744, 0.00338853, 0.33711585, + // -0.34673852, 0.11450682 + VLOAD_32(v8, 0xbe9451b0, 0x3ece4bf7, 0x3eadc098, 0x3f09f4f0, 0x3ecc80cc, + 0xbe8a42c5, 0x3f47fd31, 0xbe201365, 0xbeffeb17, 0xbf314e2e, + 0xbd0a9c78, 0xbf1fb51f, 0x3b5e1209, 0x3eac9a73, 0xbeb187b6, + 0x3dea828d); + // -0.62142891, 0.63306540, 0.26511025, 0.85738784, + // -0.78492641, -0.44331804, -0.84668529, 0.13981950, 0.84909225, + // 0.23569171, 0.34283128, 0.56619811, 0.22596644, 0.55843508, + // 0.53194439, 0.02510819 + VLOAD_32(v12, 0xbf1f15f7, 0x3f221093, 0x3e87bc88, 0x3f5b7dc5, 0xbf48f0f0, + 0xbee2fa95, 0xbf58c05e, 0x3e0f2cd8, 0x3f595e1c, 0x3e71592b, + 0x3eaf8795, 0x3f10f25c, 0x3e6763bf, 0x3f0ef59a, 0x3f082d82, + 0x3ccdafb0); + asm volatile("vfadd.vv v4, v8, v12"); + // -0.91111463, 1.03598869, 0.60447025, 1.39628148, + // -0.38550627, -0.71335870, -0.06547815, -0.01650448, 0.34925178, + // -0.45690745, 0.30899066, -0.05765933, 0.22935496, 0.89555097, + // 0.18520588, 0.13961500 + VCMP_U32(2, v4, 0xbf693ecf, 0x3f849b47, 0x3f1abe90, 0x3fb2b95a, 0xbec56114, + 0xbf369ead, 0xbd861968, 0xbc873468, 0x3eb2d121, 0xbee9efc6, + 0x3e9e3406, 0xbd6c2c30, 0x3e6adc07, 0x3f6542d4, 0x3e3da69c, + 0x3e0ef73c); + + VSET(16, e64, m8); + // -0.1192486190170796, 0.7099687505713703, -0.6001652243371716, + // -0.9559723926483070, 0.7987976623002717, -0.3314459653039117, + // 0.7678805321182058, -0.3118871679402779, -0.7580588930783800, + // 0.5940681950113129, 0.6471754222100761, 0.4175915562917139, + // -0.3690504607938143, 0.0740574148132984, -0.1493616685664843, + // 0.3560295367616439 + VLOAD_64(v16, 0xbfbe8713d6c58260, 0x3fe6b810629c5a40, 0xbfe3348db3573060, + 0xbfee97536a49b50a, 0x3fe98fc01d766dee, 0xbfd536692357c5dc, + 0x3fe8927a3195d944, 0xbfd3f5f598961d8c, 0xbfe84204b946d5d6, + 0x3fe3029b4da55ad8, 0x3fe4b5a93b255a44, 0x3fdab9d1ef56f430, + 0xbfd79e85d2ebb8f0, 0x3fb2f56d3ea64090, 0xbfc31e487ce26ff0, + 0x3fd6c9301c334858); + // -0.7765903295164327, 0.4195489676706889, -0.3911414124398265, + // 0.6922029856623244, 0.5664741772288600, -0.1412820433489181, + // -0.1847941224896075, -0.4907136082532593, -0.9146160877742129, + // -0.7130864084314152, -0.5516927493459973, -0.4203081001100177, + // 0.6487326796833275, -0.5631384800254344, -0.0996872955425372, + // -0.4382844162164241 + VLOAD_64(v24, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, + 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, + 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, + 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, + 0xbfdc0cda147fbe5c); + asm volatile("vfadd.vv v8, v16, v24"); + // -0.8958389485335123, 1.1295177182420593, -0.9913066367769980, + // -0.2637694069859826, 1.3652718395291317, -0.4727280086528298, + // 0.5830864096285984, -0.8026007761935372, -1.6726749808525929, + // -0.1190182134201023, 0.0954826728640787, -0.0027165438183039, + // 0.2796822188895132, -0.4890810652121360, -0.2490489641090214, + // -0.0822548794547802 + VCMP_U64(3, v8, 0xbfecaab6714de71e, 0x3ff212812bc1a28f, 0xbfefb8c8b2287a88, + 0xbfd0e199142c2ac0, 0x3ff5d82748ced68d, 0xbfde412cfa444b84, + 0x3fe2a8a4d48319b8, 0xbfe9aee7d2afdaca, 0xbffac346d73996c9, + 0xbfbe77fa46448730, 0x3fb8718d6d492fc0, 0xbf6641015b72d200, + 0x3fd1e6503fd9cd94, 0xbfdf4d1aab0b7434, 0xbfcfe0d621f9b7e8, + 0xbfb50ea7e131d810); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); + VLOAD_16(v6, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, + 0xba75, 0xb511, 0xbaae, 0x32f8, 0xadbc, 0xb4e4, 0xb4e7, 0x8010); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfadd.vv v2, v4, v6, v0.t"); + VCMP_U16(4, v2, 0, 0x3501, 0, 0xab30, 0, 0x3170, 0, 0x34c8, 0, 0xba74, 0, + 0x3974, 0, 0x303e, 0, 0x800f); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xbe9451b0, 0x3ece4bf7, 0x3eadc098, 0x3f09f4f0, 0x3ecc80cc, + 0xbe8a42c5, 0x3f47fd31, 0xbe201365, 0xbeffeb17, 0xbf314e2e, + 0xbd0a9c78, 0xbf1fb51f, 0x3b5e1209, 0x3eac9a73, 0xbeb187b6, + 0x3dea828d); + VLOAD_32(v12, 0xbf1f15f7, 0x3f221093, 0x3e87bc88, 0x3f5b7dc5, 0xbf48f0f0, + 0xbee2fa95, 0xbf58c05e, 0x3e0f2cd8, 0x3f595e1c, 0x3e71592b, + 0x3eaf8795, 0x3f10f25c, 0x3e6763bf, 0x3f0ef59a, 0x3f082d82, + 0x3ccdafb0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfadd.vv v4, v8, v12, v0.t"); + VCMP_U32(5, v4, 0, 0x3f849b47, 0, 0x3fb2b95a, 0, 0xbf369ead, 0, 0xbc873468, 0, + 0xbee9efc6, 0, 0xbd6c2c30, 0, 0x3f6542d4, 0, 0x3e0ef73c); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xbfbe8713d6c58260, 0x3fe6b810629c5a40, 0xbfe3348db3573060, + 0xbfee97536a49b50a, 0x3fe98fc01d766dee, 0xbfd536692357c5dc, + 0x3fe8927a3195d944, 0xbfd3f5f598961d8c, 0xbfe84204b946d5d6, + 0x3fe3029b4da55ad8, 0x3fe4b5a93b255a44, 0x3fdab9d1ef56f430, + 0xbfd79e85d2ebb8f0, 0x3fb2f56d3ea64090, 0xbfc31e487ce26ff0, + 0x3fd6c9301c334858); + VLOAD_64(v24, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, + 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, + 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, + 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, + 0xbfdc0cda147fbe5c); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfadd.vv v8, v16, v24, v0.t"); + VCMP_U64(6, v8, 0, 0x3ff212812bc1a28f, 0, 0xbfd0e199142c2ac0, 0, + 0xbfde412cfa444b84, 0, 0xbfe9aee7d2afdaca, 0, 0xbfbe77fa46448730, 0, + 0xbf6641015b72d200, 0, 0xbfdf4d1aab0b7434, 0, 0xbfb50ea7e131d810); +}; + +// Edge-case tests +void TEST_CASE3(void) { + VSET(16, e16, m2); + VLOAD_16(v4, pInfh, pInfh, mInfh, qNaNh, pMaxh, pMaxh, pZero, mZeroh, pZero, + pMaxh, pZero, qNaNh, mInfh, pInfh, qNaNh, qNaNh); + VLOAD_16(v6, mInfh, pInfh, mInfh, pZero, pMaxh, mMaxh, pZero, mZeroh, mZeroh, + mZeroh, mMaxh, 0x1, 0xba88, pZero, qNaNh, 0xba88); + asm volatile("vfadd.vv v2, v4, v6"); + VCMP_U16(7, v2, qNaNh, pInfh, mInfh, qNaNh, pInfh, pZero, pZero, mZeroh, + pZero, pMaxh, mMaxh, qNaNh, mInfh, pInfh, qNaNh, qNaNh); + + VSET(16, e32, m4); + VLOAD_32(v8, pInff, pInff, mInff, qNaNf, pMaxf, pMaxf, pZero, mZerof, pZero, + pMaxf, pZero, qNaNf, mInff, pInff, qNaNf, qNaNf); + VLOAD_32(v12, mInff, pInff, mInff, pZero, pMaxf, mMaxf, pZero, mZerof, mZerof, + mZerof, mMaxf, 0x1, 0xbf48f0f0, pZero, qNaNf, 0xbf48f0f0); + asm volatile("vfadd.vv v4, v8, v12"); + VCMP_U32(8, v4, qNaNf, pInff, mInff, qNaNf, pInff, pZero, pZero, mZerof, + pZero, pMaxf, mMaxf, qNaNf, mInff, pInff, qNaNf, qNaNf); + + VSET(16, e64, m8); + VLOAD_64(v16, pInfd, pInfd, mInfd, qNaNd, pMaxd, pMaxd, pZero, mZerod, pZero, + pMaxd, pZero, qNaNd, mInfd, pInfd, qNaNd, qNaNd); + VLOAD_64(v24, mInfd, pInfd, mInfd, pZero, pMaxd, mMaxd, pZero, mZerod, mZerod, + mZerod, mMaxd, 0x1, 0xbfd90875fda29450, pZero, qNaNd, + 0xbfd90875fda29450); + asm volatile("vfadd.vv v8, v16, v24"); + VCMP_U64(9, v8, qNaNd, pInfd, mInfd, qNaNd, pInfd, pZero, pZero, mZerod, + pZero, pMaxd, mMaxd, qNaNd, mInfd, pInfd, qNaNd, qNaNd); +}; + +// Imprecise exceptions +// If the check is done immediately after the vector instruction, it fails as it +// is completed before the "faulty" operations are executed by Ara's FPU +void TEST_CASE4(void) { + // Overflow + Inexact + CLEAR_FFLAGS; + VSET(16, e16, m2); + CHECK_FFLAGS(0); + VLOAD_16(v4, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, + pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh); + VLOAD_16(v6, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, + pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh); + asm volatile("vfadd.vv v2, v4, v6"); + VCMP_U16(10, v2, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, + pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh); + CHECK_FFLAGS(OF | NX); + + // Invalid operation, overflow + CLEAR_FFLAGS; + VSET(16, e32, m4); + CHECK_FFLAGS(0); + VLOAD_32(v8, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, + pInff, pInff, pInff, pInff, pInff, pInff, pInff); + VLOAD_32(v12, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, + mInff, mInff, mInff, mInff, mInff, mInff, mInff); + asm volatile("vfadd.vv v4, v8, v12"); + VCMP_U32(11, v4, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, + qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf); + CHECK_FFLAGS(NV); + + // Invalid operation, overflow, inexact + CLEAR_FFLAGS; + VSET(16, e64, m8); + CHECK_FFLAGS(0); + VLOAD_64(v16, pMaxd, pInfd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, + pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd); + VLOAD_64(v24, pMaxd, mInfd, 8000000000000001, pMaxd, pMaxd, pMaxd, pMaxd, + pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd); + asm volatile("vfadd.vv v8, v16, v24"); + VCMP_U64(12, v8, pInfd, qNaNd, pMaxd, pInfd, pInfd, pInfd, pInfd, pInfd, + pInfd, pInfd, pInfd, pInfd, pInfd, pInfd, pInfd, pInfd); + CHECK_FFLAGS(NV | OF | NX); +}; + +// Different rounding-mode + Back-to-back rm change and vfp operation +// Index 12 (starting from 0) rounds differently for RNE and RTZ +void TEST_CASE5(void) { + VSET(16, e16, m2); + // -0.8896, -0.3406, 0.7324, -0.6846, -0.2969, -0.7739, 0.5737, + // 0.4331, 0.8940, -0.4900, 0.4219, 0.4639, 0.6694, 0.4382, + // 0.1356, 0.5337 + VLOAD_16(v4, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); + // -0.8164, 0.6533, -0.4685, 0.6284, 0.1666, 0.9438, 0.0445, + // -0.1342, -0.8071, -0.3167, -0.8350, 0.2178, -0.0896, -0.3057, + // -0.3064, 0.2073 + VLOAD_16(v6, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, + 0xba75, 0xb511, 0xbaae, 0x32f8, 0xadbc, 0xb4e4, 0xb4e7, 0x8010); + CHANGE_RM(RM_RTZ); + asm volatile("vfadd.vv v2, v4, v6"); + // -1.7061, 0.3127, 0.2639, -0.0562, -0.1302, 0.1699, 0.6182, + // 0.2988, 0.0869, -0.8066, -0.4131, 0.6816, 0.5801, 0.1326, + // -0.1708, 0.7412 + VCMP_U16(13, v2, 0xbed3, 0x3501, 0x3439, 0xab30, 0xb02b, 0x3170, 0x38f2, + 0x34c8, 0x2d90, 0xba74, 0xb69c, 0x3974, 0x38a3, 0x303e, 0xb177, + 0x800f); + + VSET(16, e16, m4); + // -0.8896, -0.3406, 0.7324, -0.6846, -0.2969, -0.7739, 0.5737, + // 0.4331, 0.8940, -0.4900, 0.4219, 0.4639, 0.6694, 0.4382, + // 0.1356, 0.5337 + VLOAD_16(v8, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); + // -0.8164, 0.6533, -0.4685, 0.6284, 0.1666, 0.9438, 0.0445, + // -0.1342, -0.8071, -0.3167, -0.8350, 0.2178, -0.0896, -0.3057, + // -0.3064, 0.2073 + VLOAD_16(v12, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, + 0xba75, 0xb511, 0xbaae, 0x32f8, 0xadbc, 0xb4e4, 0xb4e7, 0x8010); + CHANGE_RM(RM_RNE); + asm volatile("vfadd.vv v4, v8, v12"); + // -1.7061, 0.3127, 0.2639, -0.0562, -0.1302, 0.1699, 0.6182, + // 0.2988, 0.0869, -0.8066, -0.4131, 0.6816, 0.5801, 0.1326, + // -0.1708, 0.7412 + VCMP_U16(14, v4, 0xbed3, 0x3501, 0x3439, 0xab30, 0xb02b, 0x3170, 0x38f2, + 0x34c8, 0x2d90, 0xba74, 0xb69c, 0x3974, 0x38a4, 0x303e, 0xb177, + 0x800f); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE6(void) { + VSET(16, e16, m2); + // -0.1481, -0.1797, -0.5454, 0.3228, 0.3237, -0.7212, -0.5195, + // -0.4500, 0.2681, 0.7300, 0.5059, 0.5830, 0.3198, -0.1713, + // -0.6431, 0.4841 + VLOAD_16(v4, 0xb0bd, 0xb1c0, 0xb85d, 0x352a, 0x352e, 0xb9c5, 0xb828, 0xb733, + 0x344a, 0x39d7, 0x380c, 0x38aa, 0x351e, 0xb17b, 0xb925, 0x37bf); + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + asm volatile("vfadd.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // -1.0859, -1.1172, -1.4834, -0.6152, -0.6143, -1.6592, + // -1.4570, -1.3877, -0.6699, -0.2080, -0.4321, -0.3550, + // -0.6182, -1.1094, -1.5811, -0.4539 + VCMP_U16(15, v2, 0xbc58, 0xbc78, 0xbdef, 0xb8ec, 0xb8ea, 0xbea3, 0xbdd4, + 0xbd8d, 0xb95c, 0xb2a8, 0xb6ea, 0xb5ae, 0xb8f2, 0xbc70, 0xbe53, + 0xb743); + + VSET(16, e32, m4); + // 0.86539453, -0.53925377, -0.47128764, 0.99265540, + // 0.32128176, -0.47335613, -0.30028856, 0.44394016, + // -0.72540921, -0.26464799, 0.77351445, -0.21725702, + // -0.25191557, -0.53123665, 0.80404943, 0.81841671 + VLOAD_32(v8, 0x3f5d8a7f, 0xbf0a0c89, 0xbef14c9d, 0x3f7e1eaa, 0x3ea47f0b, + 0xbef25bbc, 0xbe99bf6c, 0x3ee34c20, 0xbf39b46b, 0xbe877ff1, + 0x3f46050b, 0xbe5e78a0, 0xbe80fb14, 0xbf07ff20, 0x3f4dd62f, + 0x3f5183c2); + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + asm volatile("vfadd.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // -0.09516734, -1.49981570, -1.43184948, 0.03209352, + // -0.63928008, -1.43391800, -1.26085043, -0.51662171, + // -1.68597102, -1.22520983, -0.18704742, -1.17781889, + // -1.21247745, -1.49179852, -0.15651244, -0.14214516 + VCMP_U32(16, v4, 0xbdc2e718, 0xbfbff9f6, 0xbfb746d8, 0x3d037480, 0xbf23a7dc, + 0xbfb78aa0, 0xbfa1638c, 0xbf044152, 0xbfd7cde6, 0xbf9cd3ad, + 0xbe3f895c, 0xbf96c2c5, 0xbf9b3276, 0xbfbef341, 0xbe2044cc, + 0xbe118e80); + + VSET(16, e64, m8); + // -0.3488917150781869, -0.4501495513738740, 0.8731197104152684, + // 0.3256432550932964, 0.6502591178769535, -0.3169358689246526, + // -0.5396694979141685, -0.5417807430937591, + // -0.7971574213160249, -0.1764794100111047, 0.3564275916066595, + // -0.3754449946313438, 0.6580947137446858, + // -0.3328857144699515, 0.1761214464164236, 0.1429774118511240 + VLOAD_64(v16, 0xbfd6543dea86cb60, 0xbfdccf40105d6e5c, 0x3febf098bf37400c, + 0x3fd4d756ceb279f4, 0x3fe4ceec35a6a266, 0xbfd448ad61fd7c88, + 0xbfe144f8f7861540, 0xbfe1564491a616b8, 0xbfe9825047ca1cd6, + 0xbfc696e097352100, 0x3fd6cfb5ac55edec, 0xbfd8074a7158dd78, + 0x3fe50f1ca5268668, 0xbfd54dffe23d0eec, 0x3fc68b25c63dcaf0, + 0x3fc24d1575fbd080); + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + asm volatile("vfadd.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // 0.5619790110445508, 0.4607211747488638, 1.7839904365380062, + // 1.2365139812160342, 1.5611298439996912, 0.5939348571980851, + // 0.3712012282085693, 0.3690899830289787, 0.1137133048067129, + // 0.7343913161116331, 1.2672983177293973, 0.5354257314913939, + // 1.5689654398674235, 0.5779850116527863, 1.0869921725391614, + // 1.0538481379738618 + VCMP_U64(17, v8, 0x3fe1fbbb682f314e, 0x3fdd7c74aa87bfa0, 0x3ffc8b398e54eb85, + 0x3ff3c8c2e265e9fc, 0x3ff8fa63498c9cb2, 0x3fe30183ac73d8ba, + 0x3fd7c1c2cbd9037c, 0x3fd79f2b9799008c, 0x3fbd1c50ad43d140, + 0x3fe7802237a54ebe, 0x3ff446da99cec6fa, 0x3fe1223524c62842, + 0x3ff91a7b814c8eb3, 0x3fe27eda6c540f88, 0x3ff16451e78104dd, + 0x3ff0dc8fdd78c58f); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE7(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 0xb0bd, 0xb1c0, 0xb85d, 0x352a, 0x352e, 0xb9c5, 0xb828, 0xb733, + 0x344a, 0x39d7, 0x380c, 0x38aa, 0x351e, 0xb17b, 0xb925, 0x37bf); + double dscalar_16; + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfadd.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VCMP_U16(18, v2, 0, 0xbc78, 0, 0xb8ec, 0, 0xbea3, 0, 0xbd8d, 0, 0xb2a8, 0, + 0xb5ae, 0, 0xbc70, 0, 0xb743); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3f5d8a7f, 0xbf0a0c89, 0xbef14c9d, 0x3f7e1eaa, 0x3ea47f0b, + 0xbef25bbc, 0xbe99bf6c, 0x3ee34c20, 0xbf39b46b, 0xbe877ff1, + 0x3f46050b, 0xbe5e78a0, 0xbe80fb14, 0xbf07ff20, 0x3f4dd62f, + 0x3f5183c2); + double dscalar_32; + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfadd.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VCMP_U32(19, v4, 0, 0xbfbff9f6, 0, 0x3d037480, 0, 0xbfb78aa0, 0, 0xbf044152, + 0, 0xbf9cd3ad, 0, 0xbf96c2c5, 0, 0xbfbef341, 0, 0xbe118e80); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xbfd6543dea86cb60, 0xbfdccf40105d6e5c, 0x3febf098bf37400c, + 0x3fd4d756ceb279f4, 0x3fe4ceec35a6a266, 0xbfd448ad61fd7c88, + 0xbfe144f8f7861540, 0xbfe1564491a616b8, 0xbfe9825047ca1cd6, + 0xbfc696e097352100, 0x3fd6cfb5ac55edec, 0xbfd8074a7158dd78, + 0x3fe50f1ca5268668, 0xbfd54dffe23d0eec, 0x3fc68b25c63dcaf0, + 0x3fc24d1575fbd080); + double dscalar_64; + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfadd.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCMP_U64(20, v8, 0, 0x3fdd7c74aa87bfa0, 0, 0x3ff3c8c2e265e9fc, 0, + 0x3fe30183ac73d8ba, 0, 0x3fd79f2b9799008c, 0, 0x3fe7802237a54ebe, 0, + 0x3fe1223524c62842, 0, 0x3fe27eda6c540f88, 0, 0x3ff0dc8fdd78c58f); +}; + +// Raise exceptions only on active elements! +void TEST_CASE8(void) { + // Overflow and Inexact. Invalid operation should not be raised. + CLEAR_FFLAGS; + VSET(16, e16, m2); + CHECK_FFLAGS(0); + VLOAD_16(v4, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh, pInfh, + pMaxh, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh); + VLOAD_16(v6, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh, mInfh, + pMaxh, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfadd.vv v2, v4, v6, v0.t"); + VCMP_U16(21, v2, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh, + 0, pInfh, 0, pInfh); + CHECK_FFLAGS(OF | NX); + + // Invalid operation. Overflow and Inexact should not be raised. + CLEAR_FFLAGS; + VSET(16, e32, m4); + CHECK_FFLAGS(0); + VLOAD_32(v8, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff, pMaxf, + pInff, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff); + VLOAD_32(v12, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff, pMaxf, + mInff, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfadd.vv v4, v8, v12, v0.t"); + VCMP_U32(22, v4, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf, + 0, qNaNf, 0, qNaNf); + CHECK_FFLAGS(NV); + + // No exception should be raised + CLEAR_FFLAGS; + VSET(16, e64, m8); + CHECK_FFLAGS(0); + VLOAD_64(v16, pMaxd, 0, pInfd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, + pMaxd, 0, pMaxd, 0); + VLOAD_64(v24, pMaxd, 0, mInfd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, + pMaxd, 0, pMaxd, 0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfadd.vv v8, v16, v24, v0.t"); + VCMP_U64(23, v8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + CHECK_FFLAGS(0); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfclass.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfclass.c new file mode 100644 index 000000000..63f9b973a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfclass.c @@ -0,0 +1,90 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Test all the different output possibilities +void TEST_CASE1(void) { + CLEAR_FFLAGS; + CHECK_FFLAGS(0); + + VSET(16, e16, m2); + VLOAD_16(v4, mInfh, pInfh, qNaNh, sNaNh, 0x3b27, 0xc767, pZero, mZeroh, + 0x8075, 0x00c5, mInfh, pInfh, qNaNh, sNaNh, 0x3b27, 0xb767); + asm volatile("vfclass.v v2, v4"); + VCMP_U16(1, v2, CLASS_mInf, CLASS_pInf, CLASS_qNAN, CLASS_sNAN, CLASS_pNorm, + CLASS_mNorm, CLASS_pZero, CLASS_mZero, CLASS_mSub, CLASS_pSub, + CLASS_mInf, CLASS_pInf, CLASS_qNAN, CLASS_sNAN, CLASS_pNorm, + CLASS_mNorm); + + VSET(16, e32, m4); + VLOAD_32(v8, mInff, pInff, qNaNf, sNaNf, 0x3f738772, 0xbdef32e4, pZero, + mZerof, 0x80000075, 0x000000c5, mInff, pInff, qNaNf, sNaNf, + 0x3f738772, 0xbdef32e4); + asm volatile("vfclass.v v4, v8"); + VCMP_U32(2, v4, CLASS_mInf, CLASS_pInf, CLASS_qNAN, CLASS_sNAN, CLASS_pNorm, + CLASS_mNorm, CLASS_pZero, CLASS_mZero, CLASS_mSub, CLASS_pSub, + CLASS_mInf, CLASS_pInf, CLASS_qNAN, CLASS_sNAN, CLASS_pNorm, + CLASS_mNorm); + + VSET(16, e64, m8); + VLOAD_64(v16, mInfd, pInfd, qNaNd, sNaNd, 0x3def3136584672de, + 0xbdef3136584672de, pZero, mZerod, 0x8000000000000075, + 0x0000000000000c5, mInfd, pInfd, qNaNd, sNaNd, 0x3def313584672de4, + 0xbdef313654672de4); + asm volatile("vfclass.v v8, v16"); + VCMP_U64(3, v8, CLASS_mInf, CLASS_pInf, CLASS_qNAN, CLASS_sNAN, CLASS_pNorm, + CLASS_mNorm, CLASS_pZero, CLASS_mZero, CLASS_mSub, CLASS_pSub, + CLASS_mInf, CLASS_pInf, CLASS_qNAN, CLASS_sNAN, CLASS_pNorm, + CLASS_mNorm); +}; + +// Test all the different output possibilities +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, mInfh, pInfh, qNaNh, sNaNh, 0x3b27, 0xc767, pZero, mZeroh, + 0x8075, 0x00c5, mInfh, pInfh, qNaNh, sNaNh, 0x3b27, 0xb767); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfclass.v v2, v4, v0.t"); + VCMP_U16(4, v2, 0, CLASS_pInf, 0, CLASS_sNAN, 0, CLASS_mNorm, 0, CLASS_mZero, + 0, CLASS_pSub, 0, CLASS_pInf, 0, CLASS_sNAN, 0, CLASS_mNorm); + + VSET(16, e32, m4); + VLOAD_32(v8, mInff, pInff, qNaNf, sNaNf, 0x3f738772, 0xbdef32e4, pZero, + mZerof, 0x80000075, 0x000000c5, mInff, pInff, qNaNf, sNaNf, + 0x3f738772, 0xbdef32e4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfclass.v v4, v8, v0.t"); + VCMP_U32(5, v4, 0, CLASS_pInf, 0, CLASS_sNAN, 0, CLASS_mNorm, 0, CLASS_mZero, + 0, CLASS_pSub, 0, CLASS_pInf, 0, CLASS_sNAN, 0, CLASS_mNorm); + + VSET(16, e64, m8); + VLOAD_64(v16, mInfd, pInfd, qNaNd, sNaNd, 0x3def313584672de4, + 0xbdef313658467de4, pZero, mZerod, 0x8000000000000075, + 0x0000000000000c5, mInfd, pInfd, qNaNd, sNaNd, 0x3def313658672de4, + 0xbdef313654672de4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfclass.v v8, v16, v0.t"); + VCMP_U64(6, v8, 0, CLASS_pInf, 0, CLASS_sNAN, 0, CLASS_mNorm, 0, CLASS_mZero, + 0, CLASS_pSub, 0, CLASS_pInf, 0, CLASS_sNAN, 0, CLASS_mNorm); + CHECK_FFLAGS(0); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + // No exception should be raised by vfclass.v + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfcvt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfcvt.c new file mode 100644 index 000000000..64aea6e58 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfcvt.c @@ -0,0 +1,834 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// We assume RNE rounding when not specified by the encoding + +//////////////// +// vfcvt.xu.f // +//////////////// + +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -2372.000, 2978.000, -5132.000, -3426.000, -3878.000, + // 9680.000, 76.000, -8128.000, -2314.000, -4660.000, 8672.000, + // 8824.000, -5732.000, -1557.000, -2302.000, -407.250 + VLOAD_16(v4, 0xe8a2, 0x69d1, 0xed03, 0xeab1, 0xeb93, 0x70ba, 0x54c0, 0xeff0, + 0xe885, 0xec8d, 0x703c, 0x704f, 0xed99, 0xe615, 0xe87f, 0xde5d); + asm volatile("vfcvt.xu.f.v v6, v4"); + // 0, 2978, 0, 0, 0, + // 9680, 76, 0, 0, 0, + // 8672, 8824, 0, 0, 0, + // 0 + VCMP_U16(1, v6, 0x0000, 0x0ba2, 0x0000, 0x0000, 0x0000, 0x25d0, 0x004c, + 0x0000, 0x0000, 0x0000, 0x21e0, 0x2278, 0x0000, 0x0000, 0x0000, + 0x0000); + + VSET(16, e32, m4); + // -82436.352, -5427.481, -30119.082, 53784.066, 76500.719, + // 65152.020, -94151.375, 71894.320, -20547.545, 95485.906, + // 92834.711, -28081.711, -9716.506, 62508.508, 90410.883, + // 42708.285 + VLOAD_32(v8, 0xc7a1022d, 0xc5a99bd9, 0xc6eb4e2a, 0x47521811, 0x47956a5c, + 0x477e8005, 0xc7b7e3b0, 0x478c6b29, 0xc6a08717, 0x47ba7ef4, + 0x47b5515b, 0xc6db636c, 0xc617d206, 0x47742c82, 0x47b09571, + 0x4726d449); + asm volatile("vfcvt.xu.f.v v12, v8"); + // 0, 0, 0, 53784, 76501, + // 65152, 0, 71894, 0, 95486, + // 92835, 0, 0, 62509, + // 90411, 42708 + VCMP_U32(2, v12, 0x00000000, 0x00000000, 0x00000000, 0x0000d218, 0x00012ad5, + 0x0000fe80, 0x00000000, 0x000118d6, 0x00000000, 0x000174fe, + 0x00016aa3, 0x00000000, 0x00000000, 0x0000f42d, 0x0001612b, + 0x0000a6d4); + + VSET(16, e64, m8); + // 3554390.405, 3670449.443, 3880983.535, 3452087.537, + // -5447847.496, 498812.179, 9535291.051, 113884.868, + // 2124622.198, -2164534.614, 1377445.305, -2114478.485, + // -4704971.356, -7866057.432, 7002504.380, -2981734.692 + VLOAD_64(v16, 0x414b1e2b33d13be4, 0x414c00d8b8b34d48, 0x414d9c0bc4751d78, + 0x414a565bc4adf2d0, 0xc154c829dfc2d9f6, 0x411e71f0b7161c00, + 0x41622fe7619e55e2, 0x40fbcdcde34f1a00, 0x414035a7194d9794, + 0xc140839b4e886550, 0x413504a54df56888, 0xc14021d73e1606dc, + 0xc151f2b2d6cc57c8, 0xc15e01b25baceaba, 0x415ab6621850fa94, + 0xc146bfb358869da2); + asm volatile("vfcvt.xu.f.v v24, v16"); + // 3554390, 3670449, 3880984, + // 3452088, 0, 498812, + // 9535291, 113885, 2124622, 0, + // 1377445, 0, 0, 0, 7002504, + // 0 + VCMP_U64(3, v24, 0x0000000000363c56, 0x00000000003801b1, 0x00000000003b3818, + 0x000000000034acb8, 0x0000000000000000, 0x0000000000079c7c, + 0x0000000000917f3b, 0x000000000001bcdd, 0x0000000000206b4e, + 0x0000000000000000, 0x00000000001504a5, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x00000000006ad988, + 0x0000000000000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -2372.000, 2978.000, -5132.000, -3426.000, -3878.000, + // 9680.000, 76.000, -8128.000, -2314.000, -4660.000, 8672.000, + // 8824.000, -5732.000, -1557.000, -2302.000, -407.250 + VLOAD_16(v4, 0xe8a2, 0x69d1, 0xed03, 0xeab1, 0xeb93, 0x70ba, 0x54c0, 0xeff0, + 0xe885, 0xec8d, 0x703c, 0x704f, 0xed99, 0xe615, 0xe87f, 0xde5d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vfcvt.xu.f.v v6, v4, v0.t"); + // 0, 2978, 0, 0, 0, + // 9680, 0, 0, 0, 0, + // 0, 8824, 0, 0, 0, 0 + VCMP_U16(4, v6, 0x0000, 0x0ba2, 0x0000, 0x0000, 0x0000, 0x25d0, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x2278, 0x0000, 0x0000, 0x0000, + 0x0000); + + VSET(16, e32, m4); + // -82436.352, -5427.481, -30119.082, 53784.066, 76500.719, + // 65152.020, -94151.375, 71894.320, -20547.545, 95485.906, + // 92834.711, -28081.711, -9716.506, 62508.508, 90410.883, + // 42708.285 + VLOAD_32(v8, 0xc7a1022d, 0xc5a99bd9, 0xc6eb4e2a, 0x47521811, 0x47956a5c, + 0x477e8005, 0xc7b7e3b0, 0x478c6b29, 0xc6a08717, 0x47ba7ef4, + 0x47b5515b, 0xc6db636c, 0xc617d206, 0x47742c82, 0x47b09571, + 0x4726d449); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vfcvt.xu.f.v v12, v8, v0.t"); + // 0, 0, 0, 53784, 0, + // 65152, 0, 71894, 0, 95486, + // 0, 0, 0, 62509, 0, + // 42708 + VCMP_U32(5, v12, 0x00000000, 0x00000000, 0x00000000, 0x0000d218, 0x00000000, + 0x0000fe80, 0x00000000, 0x000118d6, 0x00000000, 0x000174fe, + 0x00000000, 0x00000000, 0x00000000, 0x0000f42d, 0x00000000, + 0x0000a6d4); + + VSET(16, e64, m8); + // 3554390.405, 3670449.443, 3880983.535, 3452087.537, + // -5447847.496, 498812.179, 9535291.051, 113884.868, + // 2124622.198, -2164534.614, 1377445.305, -2114478.485, + // -4704971.356, -7866057.432, 7002504.380, -2981734.692 + VLOAD_64(v16, 0x414b1e2b33d13be4, 0x414c00d8b8b34d48, 0x414d9c0bc4751d78, + 0x414a565bc4adf2d0, 0xc154c829dfc2d9f6, 0x411e71f0b7161c00, + 0x41622fe7619e55e2, 0x40fbcdcde34f1a00, 0x414035a7194d9794, + 0xc140839b4e886550, 0x413504a54df56888, 0xc14021d73e1606dc, + 0xc151f2b2d6cc57c8, 0xc15e01b25baceaba, 0x415ab6621850fa94, + 0xc146bfb358869da2); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vfcvt.xu.f.v v24, v16, v0.t"); + // 0, 3670449, 0, 3452088, 0, + // 498812, 0, 113885, + // 0, 0, 0, 0, 0, 0, 0, + // 0 + VCMP_U64(6, v24, 0x0000000000000000, 0x00000000003801b1, 0x0000000000000000, + 0x000000000034acb8, 0x0000000000000000, 0x0000000000079c7c, + 0x0000000000000000, 0x000000000001bcdd, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000); +}; + +/////////////// +// vfcvt.x.f // +/////////////// + +// Unmasked vfcvt.x.f.c +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 4144.000, -861.500, -8680.000, 3792.000, 8800.000, + // -2330.000, -3066.000, -6148.000, 4776.000, 7360.000, + // -7880.000, -1843.000, -7896.000, -6520.000, -7092.000, + // -8344.000 + VLOAD_16(v4, 0x6c0c, 0xe2bb, 0xf03d, 0x6b68, 0x704c, 0xe88d, 0xe9fd, 0xee01, + 0x6caa, 0x6f30, 0xefb2, 0xe733, 0xefb6, 0xee5e, 0xeeed, 0xf013); + asm volatile("vfcvt.x.f.v v6, v4"); + // 4144, -862, -8680, 3792, 8800, + // -2330, -3066, -6148, 4776, 7360, + // -7880, -1843, -7896, -6520, -7092, + // -8344 + VCMP_U16(7, v6, 0x1030, 0xfca2, 0xde18, 0x0ed0, 0x2260, 0xf6e6, 0xf406, + 0xe7fc, 0x12a8, 0x1cc0, 0xe138, 0xf8cd, 0xe128, 0xe688, 0xe44c, + 0xdf68); + + VSET(16, e32, m4); + // -28075.818, -5455.616, 6106.086, -11952.592, -50887.914, + // -23028.832, -9221.246, -71657.047, -6655.005, -21208.561, + // -30018.096, -19766.838, 48541.953, -62313.625, 13515.192, + // -83224.820 + VLOAD_32(v8, 0xc6db57a3, 0xc5aa7ced, 0x45bed0b1, 0xc63ac25e, 0xc746c7ea, + 0xc6b3e9aa, 0xc61014fc, 0xc78bf486, 0xc5cff80a, 0xc6a5b11f, + 0xc6ea8431, 0xc69a6dad, 0x473d9df4, 0xc77369a0, 0x46532cc5, + 0xc7a28c69); + asm volatile("vfcvt.x.f.v v12, v8"); + // -28076, -5456, 6106, -11953, + // -50888, -23029, -9221, -71657, -6655, + // -21209, -30018, -19767, 48542, -62314, + // 13515, -83225 + VCMP_U32(8, v12, 0xffff9254, 0xffffeab0, 0x000017da, 0xffffd14f, 0xffff3938, + 0xffffa60b, 0xffffdbfb, 0xfffee817, 0xffffe601, 0xffffad27, + 0xffff8abe, 0xffffb2c9, 0x0000bd9e, 0xffff0c96, 0x000034cb, + 0xfffebae7); + + VSET(16, e64, m8); + // 3087905.033, -2534011.630, 7824302.813, + // -9294206.521, 6436555.847, 6645117.193, + // 1358075.867, 5694551.012, -9840938.636, + // 4621816.383, 2584370.751, 5569558.860, + // 495487.041, 4759865.418, -6831172.669, + // 8371055.296 + VLOAD_64(v16, 0x41478f10842c8b9c, 0xc143553dd0971c82, 0x415dd8ebb40e1fe0, + 0xc161ba2fd0a8a593, 0x41588db2f632700c, 0x4159595f4c588b60, + 0x4134b8fbde131210, 0x4155b915c0cb4294, 0xc162c52554566300, + 0x4151a17e187d1aa8, 0x4143b7996029dc68, 0x41553f05b70b6824, + 0x411e3dfc2a598ba0, 0x4152284e5ac4da5a, 0xc15a0f112acbf258, + 0x415feedbd2ed6038); + asm volatile("vfcvt.x.f.v v24, v16"); + // 3087905, -2534012, 7824303, + // -9294207, 6436556, 6645117, + // 1358076, 5694551, -9840939, + // 4621816, 2584371, 5569559, + // 495487, 4759865, -6831173, + // 8371055 + VCMP_U64(9, v24, 0x00000000002f1e21, 0xffffffffffd95584, 0x00000000007763af, + 0xffffffffff722e81, 0x00000000006236cc, 0x000000000065657d, + 0x000000000014b8fc, 0x000000000056e457, 0xffffffffff69d6d5, + 0x00000000004685f8, 0x0000000000276f33, 0x000000000054fc17, + 0x0000000000078f7f, 0x000000000048a139, 0xffffffffff97c3bb, + 0x00000000007fbb6f); +}; + +// Masked vfcvt.x.f.c +void TEST_CASE4(void) { + VSET(16, e16, m2); + // 4144.000, -861.500, -8680.000, 3792.000, 8800.000, + // -2330.000, -3066.000, -6148.000, 4776.000, 7360.000, + // -7880.000, -1843.000, -7896.000, -6520.000, -7092.000, + // -8344.000 + VLOAD_16(v4, 0x6c0c, 0xe2bb, 0xf03d, 0x6b68, 0x704c, 0xe88d, 0xe9fd, 0xee01, + 0x6caa, 0x6f30, 0xefb2, 0xe733, 0xefb6, 0xee5e, 0xeeed, 0xf013); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vfcvt.x.f.v v6, v4, v0.t"); + // 0, -862, 0, 3792, 0, + // -2330, 0, -6148, 0, 7360, 0, + // -1843, 0, -6520, 0, -8344 + VCMP_U16(10, v6, 0x0000, 0xfca2, 0x0000, 0x0ed0, 0x0000, 0xf6e6, 0x0000, + 0xe7fc, 0x0000, 0x1cc0, 0x0000, 0xf8cd, 0x0000, 0xe688, 0x0000, + 0xdf68); + + VSET(16, e32, m4); + // -28075.818, -5455.616, 6106.086, -11952.592, + // -50887.914, -23028.832, -9221.246, -71657.047, + // -6655.005, -21208.561, -30018.096, -19766.838, 48541.953, + // -62313.625, 13515.192, -83224.820 + VLOAD_32(v8, 0xc6db57a3, 0xc5aa7ced, 0x45bed0b1, 0xc63ac25e, 0xc746c7ea, + 0xc6b3e9aa, 0xc61014fc, 0xc78bf486, 0xc5cff80a, 0xc6a5b11f, + 0xc6ea8431, 0xc69a6dad, 0x473d9df4, 0xc77369a0, 0x46532cc5, + 0xc7a28c69); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vfcvt.x.f.v v12, v8, v0.t"); + // 0, -5456, 0, -11953, 0, + // -23029, 0, -71657, 0, + // -21209, 0, -19767, 0, + // -62314, 0, -83225 + VCMP_U32(11, v12, 0x00000000, 0xffffeab0, 0x00000000, 0xffffd14f, 0x00000000, + 0xffffa60b, 0x00000000, 0xfffee817, 0x00000000, 0xffffad27, + 0x00000000, 0xffffb2c9, 0x00000000, 0xffff0c96, 0x00000000, + 0xfffebae7); + + VSET(16, e64, m8); + // 3087905.033, -2534011.630, 7824302.813, -9294206.521, + // 6436555.847, 6645117.193, 1358075.867, 5694551.012, + // -9840938.636, 4621816.383, 2584370.751, 5569558.860, + // 495487.041, 4759865.418, -6831172.669, 8371055.296 + VLOAD_64(v16, 0x41478f10842c8b9c, 0xc143553dd0971c82, 0x415dd8ebb40e1fe0, + 0xc161ba2fd0a8a593, 0x41588db2f632700c, 0x4159595f4c588b60, + 0x4134b8fbde131210, 0x4155b915c0cb4294, 0xc162c52554566300, + 0x4151a17e187d1aa8, 0x4143b7996029dc68, 0x41553f05b70b6824, + 0x411e3dfc2a598ba0, 0x4152284e5ac4da5a, 0xc15a0f112acbf258, + 0x415feedbd2ed6038); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vfcvt.x.f.v v24, v16, v0.t"); + // 0, -2534012, 0, -9294207, + // 0, 6645117, 0, 5694551, 0, + // 4621816, 0, 5569559, + // 0, 4759865, 0, 8371055 + VCMP_U64(12, v24, 0x0000000000000000, 0xffffffffffd95584, 0x0000000000000000, + 0xffffffffff722e81, 0x0000000000000000, 0x000000000065657d, + 0x0000000000000000, 0x000000000056e457, 0x0000000000000000, + 0x00000000004685f8, 0x0000000000000000, 0x000000000054fc17, + 0x0000000000000000, 0x000000000048a139, 0x0000000000000000, + 0x00000000007fbb6f); +}; + +//////////////////// +// vfcvt.rtz.xu.f // +//////////////////// + +// Simple random test with similar values +void TEST_CASE5(void) { + VSET(16, e16, m2); + // 6996.000, -7512.000, -4792.000, 7240.000, 8336.000, + // 6332.000, -277.750, -4074.000, 9352.000, 8832.000, + // -65.000, 5860.000, 6892.000, 2944.000, 9608.000, + // 4608.000 + VLOAD_16(v4, 0x6ed5, 0xef56, 0xecae, 0x6f12, 0x7012, 0x6e2f, 0xdc57, 0xebf5, + 0x7091, 0x7050, 0xd410, 0x6db9, 0x6ebb, 0x69c0, 0x70b1, 0x6c80); + asm volatile("vfcvt.rtz.xu.f.v v6, v4"); + // 6996, 0, 0, 7240, 8336, + // 6332, 0, 0, 9352, 8832, 0, + // 5860, 6892, 2944, 9608, 4608 + VCMP_U16(13, v6, 0x1b54, 0x0000, 0x0000, 0x1c48, 0x2090, 0x18bc, 0x0000, + 0x0000, 0x2488, 0x2280, 0x0000, 0x16e4, 0x1aec, 0x0b80, 0x2588, + 0x1200); + + VSET(16, e32, m4); + // 85074.883, -2035.769, 67397.633, -57745.480, 82113.172, + // 18415.770, 57859.465, 83291.773, -83693.375, 43321.199, + // 94626.156, -53520.090, 9604.658, -5764.834, 94299.633, + // 57572.980 + VLOAD_32(v8, 0x47a62971, 0xc4fe789f, 0x4783a2d1, 0xc761917b, 0x47a06096, + 0x468fdf8a, 0x47620377, 0x47a2ade3, 0xc7a376b0, 0x47293933, + 0x47b8d114, 0xc7511017, 0x461612a2, 0xc5b426ac, 0x47b82dd1, + 0x4760e4fb); + asm volatile("vfcvt.rtz.xu.f.v v12, v8"); + // 85074, 0, 67397, 0, 82113, + // 18415, 57859, 83291, 0, 43321, + // 94626, 0, 9604, 0, 94299, + // 57572 + VCMP_U32(14, v12, 0x00014c52, 0x00000000, 0x00010745, 0x00000000, 0x000140c1, + 0x000047ef, 0x0000e203, 0x0001455b, 0x00000000, 0x0000a939, + 0x000171a2, 0x00000000, 0x00002584, 0x00000000, 0x0001705b, + 0x0000e0e4); + + VSET(16, e64, m8); + // -5386285.220, -9081004.335, -9603879.062, -4621060.923, + // 2017661.058, 1106405.978, -2095853.299, 1911589.313, + // 4833261.528, 1291127.404, -9941577.120, 9259799.184, + // -8569693.727, 4926687.920, -7537625.130, -6328586.289 + VLOAD_64(v16, 0xc1548c0b4e12be63, 0xc16152158ab92a41, 0xc1625164e1fd6af4, + 0xc151a0c13b0c041c, 0x413ec97d0edd7a68, 0x4130e1e5fa5c8120, + 0xc13ffaed4c78fc7c, 0x413d2b2550357b50, 0x41526ffb61d23f42, + 0x4133b377675b6328, 0xc162f64923d5cce3, 0x4161a962e5e3a1e8, + 0xc160586bb74734b0, 0x4152cb37fae70f80, 0xc15cc0f6484f174c, + 0xc1582442928257b8); + asm volatile("vfcvt.rtz.xu.f.v v24, v16"); + // 0, 0, 0, 0, 2017661, + // 1106405, 0, 1911589, + // 4833261, 1291127, 0, + // 9259799, 0, 4926687, + // 0, 0 + VCMP_U64(15, v24, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x00000000001ec97d, 0x000000000010e1e5, + 0x0000000000000000, 0x00000000001d2b25, 0x000000000049bfed, + 0x000000000013b377, 0x0000000000000000, 0x00000000008d4b17, + 0x0000000000000000, 0x00000000004b2cdf, 0x0000000000000000, + 0x0000000000000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE6(void) { + VSET(16, e16, m2); + // 6996.000, -7512.000, -4792.000, 7240.000, 8336.000, + // 6332.000, -277.750, -4074.000, 9352.000, 8832.000, + // -65.000, 5860.000, 6892.000, 2944.000, 9608.000, + // 4608.000 + VLOAD_16(v4, 0x6ed5, 0xef56, 0xecae, 0x6f12, 0x7012, 0x6e2f, 0xdc57, 0xebf5, + 0x7091, 0x7050, 0xd410, 0x6db9, 0x6ebb, 0x69c0, 0x70b1, 0x6c80); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vfcvt.rtz.xu.f.v v6, v4, v0.t"); + // 0, 0, 0, 7240, 0, + // 6332, 0, 0, 0, 8832, + // 0, 5860, 0, 2944, 0, + // 4608 + VCMP_U16(16, v6, 0x0000, 0x0000, 0x0000, 0x1c48, 0x0000, 0x18bc, 0x0000, + 0x0000, 0x0000, 0x2280, 0x0000, 0x16e4, 0x0000, 0x0b80, 0x0000, + 0x1200); + + VSET(16, e32, m4); + // 85074.883, -2035.769, 67397.633, -57745.480, 82113.172, + // 18415.770, 57859.465, 83291.773, -83693.375, 43321.199, + // 94626.156, -53520.090, 9604.658, -5764.834, 94299.633, + // 57572.980 + VLOAD_32(v8, 0x47a62971, 0xc4fe789f, 0x4783a2d1, 0xc761917b, 0x47a06096, + 0x468fdf8a, 0x47620377, 0x47a2ade3, 0xc7a376b0, 0x47293933, + 0x47b8d114, 0xc7511017, 0x461612a2, 0xc5b426ac, 0x47b82dd1, + 0x4760e4fb); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vfcvt.rtz.xu.f.v v12, v8, v0.t"); + // 0, 0, 0, 0, 0, 18415, + // 0, 83291, 0, 43321, 0, 0, + // 0, 0, 0, 57572 + VCMP_U32(17, v12, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x000047ef, 0x00000000, 0x0001455b, 0x00000000, 0x0000a939, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0000e0e4); + + VSET(16, e64, m8); + // -5386285.220, -9081004.335, -9603879.062, -4621060.923, + // 2017661.058, 1106405.978, -2095853.299, 1911589.313, + // 4833261.528, 1291127.404, -9941577.120, 9259799.184, + // -8569693.727, 4926687.920, -7537625.130, -6328586.289 + VLOAD_64(v16, 0xc1548c0b4e12be63, 0xc16152158ab92a41, 0xc1625164e1fd6af4, + 0xc151a0c13b0c041c, 0x413ec97d0edd7a68, 0x4130e1e5fa5c8120, + 0xc13ffaed4c78fc7c, 0x413d2b2550357b50, 0x41526ffb61d23f42, + 0x4133b377675b6328, 0xc162f64923d5cce3, 0x4161a962e5e3a1e8, + 0xc160586bb74734b0, 0x4152cb37fae70f80, 0xc15cc0f6484f174c, + 0xc1582442928257b8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vfcvt.rtz.xu.f.v v24, v16, v0.t"); + // 0, 0, 0, 0, 0, + // 1106405, 0, 1911589, + // 0, 1291127, 0, 9259799, 0, + // 4926687, 0, 0 + VCMP_U64(18, v24, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x000000000010e1e5, + 0x0000000000000000, 0x00000000001d2b25, 0x0000000000000000, + 0x000000000013b377, 0x0000000000000000, 0x00000000008d4b17, + 0x0000000000000000, 0x00000000004b2cdf, 0x0000000000000000, + 0x0000000000000000); +}; + +/////////////////// +// vfcvt.rtz.x.f // +/////////////////// + +// Simple random test with similar values +void TEST_CASE7(void) { + VSET(16, e16, m2); + // 5116.000, 4640.000, 5720.000, 1316.000, 8104.000, + // 9952.000, 9400.000, -4120.000, -9368.000, 6076.000, + // 1782.000, -5332.000, -4284.000, -2878.000, -2752.000, + // 3714.000 + VLOAD_16(v4, 0x6cff, 0x6c88, 0x6d96, 0x6524, 0x6fea, 0x70dc, 0x7097, 0xec06, + 0xf093, 0x6def, 0x66f6, 0xed35, 0xec2f, 0xe99f, 0xe960, 0x6b41); + asm volatile("vfcvt.rtz.x.f.v v6, v4"); + // 5116, 4640, 5720, 1316, 8104, + // 9952, 9400, -4120, -9368, 6076, + // 1782, -5332, -4284, -2878, -2752, + // 3714 + VCMP_U16(19, v6, 0x13fc, 0x1220, 0x1658, 0x0524, 0x1fa8, 0x26e0, 0x24b8, + 0xefe8, 0xdb68, 0x17bc, 0x06f6, 0xeb2c, 0xef44, 0xf4c2, 0xf540, + 0x0e82); + + VSET(16, e32, m4); + // -31395.312, 38407.539, 39625.664, -19419.770, -77414.898, + // -96104.727, -8227.330, -45789.250, -74805.781, 78266.945, + // 1635.832, -33150.762, 17428.920, -93694.898, 93592.562, + // -83328.680 + VLOAD_32(v8, 0xc6f546a0, 0x4716078a, 0x471ac9aa, 0xc697b78a, 0xc7973373, + 0xc7bbb45d, 0xc6008d52, 0xc732dd40, 0xc7921ae4, 0x4798dd79, + 0x44cc7aa0, 0xc7017ec3, 0x468829d7, 0xc7b6ff73, 0x47b6cc48, + 0xc7a2c057); + asm volatile("vfcvt.rtz.x.f.v v12, v8"); + // -31395, 38407, 39625, -19419, + // -77414, -96104, -8227, -45789, + // -74805, 78266, 1635, -33150, 17428, + // -93694, 93592, -83328 + VCMP_U32(20, v12, 0xffff855d, 0x00009607, 0x00009ac9, 0xffffb425, 0xfffed19a, + 0xfffe8898, 0xffffdfdd, 0xffff4d23, 0xfffedbcb, 0x000131ba, + 0x00000663, 0xffff7e82, 0x00004414, 0xfffe9202, 0x00016d98, + 0xfffeba80); + + VSET(16, e64, m8); + // 1347922.217, 7326256.926, 2532328.150, -4365139.352, + // -3892733.643, -3401324.772, -2109243.969, 61221.157, + // -307581.498, -6001564.901, -1299579.664, -2048360.900, + // 3486773.936, -5491246.977, -2222467.648, 1432204.815 + VLOAD_64(v16, 0x413491523797bd28, 0x415bf28c3b410560, 0x414351f41339c8f8, + 0xc150a6d4d6864763, 0xc14db2fed245a01c, 0xc149f33662d1f60e, + 0xc140179dfc15a4ac, 0x40ede4a503831a00, 0xc112c5f5fdac3c80, + 0xc156e4e739a40168, 0xc133d47ba9e7da00, 0xc13f4168e650cc0c, + 0x414a9a1af7c5dda0, 0xc154f28bbe844db6, 0xc140f4c1d2e7a21a, + 0x4135da8cd09570f8); + asm volatile("vfcvt.rtz.x.f.v v24, v16"); + // 1347922, 7326256, 2532328, + // -4365139, -3892733, -3401324, + // -2109243, 61221, -307581, + // -6001564, -1299579, -2048360, + // 3486773, -5491246, -2222467, + // 1432204 + VCMP_U64(21, v24, 0x0000000000149152, 0x00000000006fca30, 0x000000000026a3e8, + 0xffffffffffbd64ad, 0xffffffffffc49a03, 0xffffffffffcc1994, + 0xffffffffffdfd0c5, 0x000000000000ef25, 0xfffffffffffb4e83, + 0xffffffffffa46c64, 0xffffffffffec2b85, 0xffffffffffe0be98, + 0x0000000000353435, 0xffffffffffac35d2, 0xffffffffffde167d, + 0x000000000015da8c); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE8(void) { + VSET(16, e16, m2); + // 5116.000, 4640.000, 5720.000, 1316.000, 8104.000, + // 9952.000, 9400.000, -4120.000, -9368.000, 6076.000, + // 1782.000, -5332.000, -4284.000, -2878.000, -2752.000, + // 3714.000 + VLOAD_16(v4, 0x6cff, 0x6c88, 0x6d96, 0x6524, 0x6fea, 0x70dc, 0x7097, 0xec06, + 0xf093, 0x6def, 0x66f6, 0xed35, 0xec2f, 0xe99f, 0xe960, 0x6b41); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vfcvt.rtz.x.f.v v6, v4, v0.t"); + // 0, 4640, 0, 1316, 0, + // 9952, 0, -4120, 0, 6076, + // 0, -5332, 0, -2878, 0, + // 3714 + VCMP_U16(22, v6, 0x0000, 0x1220, 0x0000, 0x0524, 0x0000, 0x26e0, 0x0000, + 0xefe8, 0x0000, 0x17bc, 0x0000, 0xeb2c, 0x0000, 0xf4c2, 0x0000, + 0x0e82); + + VSET(16, e32, m4); + // -31395.312, 38407.539, 39625.664, -19419.770, -77414.898, + // -96104.727, -8227.330, -45789.250, -74805.781, 78266.945, + // 1635.832, -33150.762, 17428.920, -93694.898, 93592.562, + // -83328.680 + VLOAD_32(v8, 0xc6f546a0, 0x4716078a, 0x471ac9aa, 0xc697b78a, 0xc7973373, + 0xc7bbb45d, 0xc6008d52, 0xc732dd40, 0xc7921ae4, 0x4798dd79, + 0x44cc7aa0, 0xc7017ec3, 0x468829d7, 0xc7b6ff73, 0x47b6cc48, + 0xc7a2c057); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vfcvt.rtz.x.f.v v12, v8, v0.t"); + // 0, 38407, 0, -19419, 0, + // -96104, 0, -45789, 0, 78266, + // 0, -33150, 0, -93694, 0, + // -83328 + VCMP_U32(23, v12, 0x00000000, 0x00009607, 0x00000000, 0xffffb425, 0x00000000, + 0xfffe8898, 0x00000000, 0xffff4d23, 0x00000000, 0x000131ba, + 0x00000000, 0xffff7e82, 0x00000000, 0xfffe9202, 0x00000000, + 0xfffeba80); + + VSET(16, e64, m8); + // 1347922.217, 7326256.926, 2532328.150, -4365139.352, + // -3892733.643, -3401324.772, -2109243.969, 61221.157, + // -307581.498, -6001564.901, -1299579.664, -2048360.900, + // 3486773.936, -5491246.977, -2222467.648, 1432204.815 + VLOAD_64(v16, 0x413491523797bd28, 0x415bf28c3b410560, 0x414351f41339c8f8, + 0xc150a6d4d6864763, 0xc14db2fed245a01c, 0xc149f33662d1f60e, + 0xc140179dfc15a4ac, 0x40ede4a503831a00, 0xc112c5f5fdac3c80, + 0xc156e4e739a40168, 0xc133d47ba9e7da00, 0xc13f4168e650cc0c, + 0x414a9a1af7c5dda0, 0xc154f28bbe844db6, 0xc140f4c1d2e7a21a, + 0x4135da8cd09570f8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vfcvt.rtz.x.f.v v24, v16, v0.t"); + // 0, 7326256, 0, -4365139, + // 0, -3401324, 0, 61221, 0, + // -6001564, 0, + // -2048360, 0, + // -5491246, 0, 1432204 + VCMP_U64(24, v24, 0x0000000000000000, 0x00000000006fca30, 0x0000000000000000, + 0xffffffffffbd64ad, 0x0000000000000000, 0xffffffffffcc1994, + 0x0000000000000000, 0x000000000000ef25, 0x0000000000000000, + 0xffffffffffa46c64, 0x0000000000000000, 0xffffffffffe0be98, + 0x0000000000000000, 0xffffffffffac35d2, 0x0000000000000000, + 0x000000000015da8c); +}; + +//////////////// +// vfcvt.f.xu // +//////////////// + +void TEST_CASE9(void) { + VSET(16, e16, m2); + // 55973, 61786, 64322, 55940, 55857, + // 3425, 1068, 4246, 57901, 7342, + // 8693, 60988, 9047, 63358, 58389, + // 8076 + VLOAD_16(v4, 0xdaa5, 0xf15a, 0xfb42, 0xda84, 0xda31, 0x0d61, 0x042c, 0x1096, + 0xe22d, 0x1cae, 0x21f5, 0xee3c, 0x2357, 0xf77e, 0xe415, 0x1f8c); + asm volatile("vfcvt.f.xu.v v6, v4"); + // 55968.000, 61792.000, 64320.000, 55936.000, 55872.000, + // 3424.000, 1068.000, 4248.000, 57888.000, 7344.000, + // 8696.000, 60992.000, 9048.000, 63360.000, 58400.000, + // 8076.000 + VCMP_U16(25, v6, 0x7ad5, 0x7b8b, 0x7bda, 0x7ad4, 0x7ad2, 0x6ab0, 0x642c, + 0x6c26, 0x7b11, 0x6f2c, 0x703f, 0x7b72, 0x706b, 0x7bbc, 0x7b21, + 0x6fe3); + + VSET(16, e32, m4); + // 72473, 4294949057, 50975, 4294915723, + // 4294876584, 4294895088, 24967, 34761, + // 83805, 68361, 49397, 51562, 24877, + // 4294942241, 4294909502, 42562 + VLOAD_32(v8, 0x00011b19, 0xffffb8c1, 0x0000c71f, 0xffff368b, 0xfffe9da8, + 0xfffee5f0, 0x00006187, 0x000087c9, 0x0001475d, 0x00010b09, + 0x0000c0f5, 0x0000c96a, 0x0000612d, 0xffff9e21, 0xffff1e3e, + 0x0000a642); + asm volatile("vfcvt.f.xu.v v12, v8"); + // 72473.000, 4294949120.000, 50975.000, 4294915840.000, + // 4294876672.000, 4294895104.000, 24967.000, 34761.000, + // 83805.000, 68361.000, 49397.000, 51562.000, 24877.000, + // 4294942208.000, 4294909440.000, 42562.000 + VCMP_U32(26, v12, 0x478d8c80, 0x4f7fffb9, 0x47471f00, 0x4f7fff37, 0x4f7ffe9e, + 0x4f7ffee6, 0x46c30e00, 0x4707c900, 0x47a3ae80, 0x47858480, + 0x4740f500, 0x47496a00, 0x46c25a00, 0x4f7fff9e, 0x4f7fff1e, + 0x47264200); + + VSET(16, e64, m8); + // 18446744073707704187, 18446744073702261660, 4325496, + // 3834488, 18446744073707063867, 18446744073706356425, + // 5215660, 18446744073707545423, 69532, + // 18446744073707444829, 4236283, 3402850, + // 18446744073708706866, 275183, 4230347, + // 18446744073704794800 + VLOAD_64(v16, 0xffffffffffe3cf7b, 0xffffffffff90c39c, 0x0000000000420078, + 0x00000000003a8278, 0xffffffffffda0a3b, 0xffffffffffcf3ec9, + 0x00000000004f95ac, 0xffffffffffe1634f, 0x0000000000010f9c, + 0xffffffffffdfda5d, 0x000000000040a3fb, 0x000000000033ec62, + 0xfffffffffff31c32, 0x00000000000432ef, 0x0000000000408ccb, + 0xffffffffffb76ab0); + asm volatile("vfcvt.f.xu.v v24, v16"); + // 18446744073707704320.000, 18446744073702260736.000, + // 4325496.000, 3834488.000, 18446744073707063296.000, + // 18446744073706356736.000, 5215660.000, + // 18446744073707544576.000, 69532.000, + // 18446744073707444224.000, 4236283.000, 3402850.000, + // 18446744073708707840.000, 275183.000, 4230347.000, + // 18446744073704794112.000 + VCMP_U64(27, v24, 0x43effffffffffc7a, 0x43effffffffff218, 0x4150801e00000000, + 0x414d413c00000000, 0x43effffffffffb41, 0x43effffffffff9e8, + 0x4153e56b00000000, 0x43effffffffffc2c, 0x40f0f9c000000000, + 0x43effffffffffbfb, 0x415028fec0000000, 0x4149f63100000000, + 0x43effffffffffe64, 0x4110cbbc00000000, 0x41502332c0000000, + 0x43effffffffff6ed); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE10(void) { + VSET(16, e16, m2); + // 55973, 61786, 64322, 55940, 55857, + // 3425, 1068, 4246, 57901, 7342, + // 8693, 60988, 9047, 63358, 58389, + // 8076 + VLOAD_16(v4, 0xdaa5, 0xf15a, 0xfb42, 0xda84, 0xda31, 0x0d61, 0x042c, 0x1096, + 0xe22d, 0x1cae, 0x21f5, 0xee3c, 0x2357, 0xf77e, 0xe415, 0x1f8c); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vfcvt.f.xu.v v6, v4, v0.t"); + // 0.000, 61792.000, 0.000, 55936.000, 0.000, 3424.000, + // 0.000, 4248.000, 0.000, 7344.000, 0.000, 60992.000, + // 0.000, 63360.000, 0.000, 8076.000 + VCMP_U16(28, v6, 0x0, 0x7b8b, 0x0, 0x7ad4, 0x0, 0x6ab0, 0x0, 0x6c26, 0x0, + 0x6f2c, 0x0, 0x7b72, 0x0, 0x7bbc, 0x0, 0x6fe3); + + VSET(16, e32, m4); + // 72473, 4294949057, 50975, 4294915723, + // 4294876584, 4294895088, 24967, 34761, + // 83805, 68361, 49397, 51562, 24877, + // 4294942241, 4294909502, 42562 + VLOAD_32(v8, 0x00011b19, 0xffffb8c1, 0x0000c71f, 0xffff368b, 0xfffe9da8, + 0xfffee5f0, 0x00006187, 0x000087c9, 0x0001475d, 0x00010b09, + 0x0000c0f5, 0x0000c96a, 0x0000612d, 0xffff9e21, 0xffff1e3e, + 0x0000a642); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vfcvt.f.xu.v v12, v8, v0.t"); + // 0.000, 4294949120.000, 0.000, 4294915840.000, 0.000, + // 4294895104.000, 0.000, 34761.000, 0.000, 68361.000, + // 0.000, 51562.000, 0.000, 4294942208.000, 0.000, + // 42562.000 + VCMP_U32(29, v12, 0x0, 0x4f7fffb9, 0x0, 0x4f7fff37, 0x0, 0x4f7ffee6, 0x0, + 0x4707c900, 0x0, 0x47858480, 0x0, 0x47496a00, 0x0, 0x4f7fff9e, 0x0, + 0x47264200); + + VSET(16, e64, m8); + // 18446744073707704187, 18446744073702261660, 4325496, + // 3834488, 18446744073707063867, 18446744073706356425, + // 5215660, 18446744073707545423, 69532, + // 18446744073707444829, 4236283, 3402850, + // 18446744073708706866, 275183, 4230347, + // 18446744073704794800 + VLOAD_64(v16, 0xffffffffffe3cf7b, 0xffffffffff90c39c, 0x0000000000420078, + 0x00000000003a8278, 0xffffffffffda0a3b, 0xffffffffffcf3ec9, + 0x00000000004f95ac, 0xffffffffffe1634f, 0x0000000000010f9c, + 0xffffffffffdfda5d, 0x000000000040a3fb, 0x000000000033ec62, + 0xfffffffffff31c32, 0x00000000000432ef, 0x0000000000408ccb, + 0xffffffffffb76ab0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vfcvt.f.xu.v v24, v16, v0.t"); + // 0.000, 18446744073702260736.000, 0.000, 3834488.000, + // 0.000, 18446744073706356736.000, 0.000, + // 18446744073707544576.000, 0.000, 18446744073707444224.000, + // 0.000, 3402850.000, 0.000, 275183.000, 0.000, + // 18446744073704794112.000 + VCMP_U64(30, v24, 0x0, 0x43effffffffff218, 0x0, 0x414d413c00000000, 0x0, + 0x43effffffffff9e8, 0x0, 0x43effffffffffc2c, 0x0, 0x43effffffffffbfb, + 0x0, 0x4149f63100000000, 0x0, 0x4110cbbc00000000, 0x0, + 0x43effffffffff6ed); +}; + +/////////////// +// vfcvt.f.x // +/////////////// + +void TEST_CASE11(void) { + VSET(16, e16, m2); + // -4779, 465, 9893, -6763, -4072, + // 1612, -9552, 2426, 325, 7561, + // -8581, -1741, -8518, -4699, 3653, + // 9937 + VLOAD_16(v4, 0xed55, 0x01d1, 0x26a5, 0xe595, 0xf018, 0x064c, 0xdab0, 0x097a, + 0x0145, 0x1d89, 0xde7b, 0xf933, 0xdeba, 0xeda5, 0x0e45, 0x26d1); + asm volatile("vfcvt.f.x.v v6, v4"); + // -4780.000, 465.000, 9896.000, -6764.000, -4072.000, + // 1612.000, -9552.000, 2426.000, 325.000, 7560.000, + // -8584.000, -1741.000, -8520.000, -4700.000, 3652.000, + // 9936.000 + VCMP_U16(31, v6, 0xecab, 0x5f44, 0x70d5, 0xee9b, 0xebf4, 0x664c, 0xf0aa, + 0x68bd, 0x5d14, 0x6f62, 0xf031, 0xe6cd, 0xf029, 0xec97, 0x6b22, + 0x70da); + + VSET(16, e32, m4); + // -39422, 54262, 12833, -40266, + // -64918, 28317, 89178, 54320, -99922, + // -73005, 95070, -24716, 60663, 59516, + // 14865, 26328 + VLOAD_32(v8, 0xffff6602, 0x0000d3f6, 0x00003221, 0xffff62b6, 0xffff026a, + 0x00006e9d, 0x00015c5a, 0x0000d430, 0xfffe79ae, 0xfffee2d3, + 0x0001735e, 0xffff9f74, 0x0000ecf7, 0x0000e87c, 0x00003a11, + 0x000066d8); + asm volatile("vfcvt.f.x.v v12, v8"); + // -39422.000, 54262.000, 12833.000, -40266.000, -64918.000, + // 28317.000, 89178.000, 54320.000, -99922.000, -73005.000, + // 95070.000, -24716.000, 60663.000, 59516.000, 14865.000, + // 26328.000 + VCMP_U32(32, v12, 0xc719fe00, 0x4753f600, 0x46488400, 0xc71d4a00, 0xc77d9600, + 0x46dd3a00, 0x47ae2d00, 0x47543000, 0xc7c32900, 0xc78e9680, + 0x47b9af00, 0xc6c11800, 0x476cf700, 0x47687c00, 0x46684400, + 0x46cdb000); + + VSET(16, e64, m8); + // -8860682, 8064547, -5636078, + // -3712253, 8492493, 9839246, + // -8271278, -6234598, -4538479, + // 8807688, 5640899, 3839761, + // -1394518, -6118355, 1783927, + // 5819812 + VLOAD_64(v16, 0xffffffffff78cbf6, 0x00000000007b0e23, 0xffffffffffaa0012, + 0xffffffffffc75b03, 0x00000000008195cd, 0x000000000096228e, + 0xffffffffff81ca52, 0xffffffffffa0de1a, 0xffffffffffbabf91, + 0x0000000000866508, 0x00000000005612c3, 0x00000000003a9711, + 0xffffffffffeab8aa, 0xffffffffffa2a42d, 0x00000000001b3877, + 0x000000000058cda4); + asm volatile("vfcvt.f.x.v v24, v16"); + // -8860682.000, 8064547.000, -5636078.000, -3712253.000, + // 8492493.000, 9839246.000, -8271278.000, -6234598.000, + // -4538479.000, 8807688.000, 5640899.000, 3839761.000, + // -1394518.000, -6118355.000, 1783927.000, 5819812.000 + VCMP_U64(33, v24, 0xc160e68140000000, 0x415ec388c0000000, 0xc1557ffb80000000, + 0xc14c527e80000000, 0x416032b9a0000000, 0x4162c451c0000000, + 0xc15f8d6b80000000, 0xc157c87980000000, 0xc151501bc0000000, + 0x4160cca100000000, 0x415584b0c0000000, 0x414d4b8880000000, + 0xc135475600000000, 0xc15756f4c0000000, 0x413b387700000000, + 0x4156336900000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE12(void) { + VSET(16, e16, m2); + // -4779, 465, 9893, -6763, -4072, + // 1612, -9552, 2426, 325, 7561, + // -8581, -1741, -8518, -4699, 3653, + // 9937 + VLOAD_16(v4, 0xed55, 0x01d1, 0x26a5, 0xe595, 0xf018, 0x064c, 0xdab0, 0x097a, + 0x0145, 0x1d89, 0xde7b, 0xf933, 0xdeba, 0xeda5, 0x0e45, 0x26d1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vfcvt.f.x.v v6, v4, v0.t"); + // 0.000, 465.000, 0.000, -6764.000, 0.000, 1612.000, + // 0.000, 2426.000, 0.000, 7560.000, 0.000, -1741.000, + // 0.000, -4700.000, 0.000, 9936.000 + VCMP_U16(34, v6, 0x0, 0x5f44, 0x0, 0xee9b, 0x0, 0x664c, 0x0, 0x68bd, 0x0, + 0x6f62, 0x0, 0xe6cd, 0x0, 0xec97, 0x0, 0x70da); + + VSET(16, e32, m4); + // -39422, 54262, 12833, -40266, + // -64918, 28317, 89178, 54320, -99922, + // -73005, 95070, -24716, 60663, 59516, + // 14865, 26328 + VLOAD_32(v8, 0xffff6602, 0x0000d3f6, 0x00003221, 0xffff62b6, 0xffff026a, + 0x00006e9d, 0x00015c5a, 0x0000d430, 0xfffe79ae, 0xfffee2d3, + 0x0001735e, 0xffff9f74, 0x0000ecf7, 0x0000e87c, 0x00003a11, + 0x000066d8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vfcvt.f.x.v v12, v8, v0.t"); + // 0.000, 54262.000, 0.000, -40266.000, 0.000, 28317.000, + // 0.000, 54320.000, 0.000, -73005.000, 0.000, + // -24716.000, 0.000, 59516.000, 0.000, 26328.000 + VCMP_U32(35, v12, 0x0, 0x4753f600, 0x0, 0xc71d4a00, 0x0, 0x46dd3a00, 0x0, + 0x47543000, 0x0, 0xc78e9680, 0x0, 0xc6c11800, 0x0, 0x47687c00, 0x0, + 0x46cdb000); + + VSET(16, e64, m8); + // -8860682, 8064547, -5636078, + // -3712253, 8492493, 9839246, + // -8271278, -6234598, -4538479, + // 8807688, 5640899, 3839761, + // -1394518, -6118355, 1783927, + // 5819812 + VLOAD_64(v16, 0xffffffffff78cbf6, 0x00000000007b0e23, 0xffffffffffaa0012, + 0xffffffffffc75b03, 0x00000000008195cd, 0x000000000096228e, + 0xffffffffff81ca52, 0xffffffffffa0de1a, 0xffffffffffbabf91, + 0x0000000000866508, 0x00000000005612c3, 0x00000000003a9711, + 0xffffffffffeab8aa, 0xffffffffffa2a42d, 0x00000000001b3877, + 0x000000000058cda4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vfcvt.f.x.v v24, v16, v0.t"); + // 0.000, 8064547.000, 0.000, -3712253.000, 0.000, + // 9839246.000, 0.000, -6234598.000, 0.000, 8807688.000, + // 0.000, 3839761.000, 0.000, -6118355.000, 0.000, + // 5819812.000 + VCMP_U64(36, v24, 0x0, 0x415ec388c0000000, 0x0, 0xc14c527e80000000, 0x0, + 0x4162c451c0000000, 0x0, 0xc157c87980000000, 0x0, 0x4160cca100000000, + 0x0, 0x414d4b8880000000, 0x0, 0xc15756f4c0000000, 0x0, + 0x4156336900000000); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + + TEST_CASE7(); + TEST_CASE8(); + + TEST_CASE9(); + TEST_CASE10(); + + TEST_CASE11(); + TEST_CASE12(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfdiv.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfdiv.c new file mode 100644 index 000000000..f7262f26b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfdiv.c @@ -0,0 +1,355 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.8057, -0.8564, 0.3425, -0.3066, -0.7314, -0.6396, 0.7588, + // -0.3743, 0.8706, -0.3064, 0.0390, 0.6123, 0.0237, -0.6201, + // -0.4524, 0.3337 + VLOAD_16(v4, 0xba72, 0xbada, 0x357b, 0xb4e8, 0xb9da, 0xb91e, 0x3a12, 0xb5fd, + 0x3af7, 0xb4e7, 0x28fe, 0x38e6, 0x2612, 0xb8f6, 0xb73d, 0x3557); + // -0.4094, 0.0410, -0.7305, 0.9038, -0.3545, 0.2830, -0.7051, + // -0.7124, -0.6348, 0.1256, 0.5576, 0.1334, 0.8779, -0.4836, + // 0.3215, 0.4167 + VLOAD_16(v6, 0xb68d, 0x293e, 0xb9d8, 0x3b3b, 0xb5ac, 0x3487, 0xb9a4, 0xb9b3, + 0xb914, 0x3005, 0x3876, 0x3045, 0x3b06, 0xb7bd, 0x3525, 0x36ab); + asm volatile("vfdiv.vv v2, v4, v6"); + // 1.9678, -20.9062, -0.4690, -0.3394, 2.0625, -2.2598, + // -1.0762, 0.5254, -1.3711, -2.4395, 0.0699, 4.5898, 0.0270, + // 1.2822, -1.4072, 0.8008 + VCMP_U16(1, v2, 0x3fdf, 0xcd3a, 0xb780, 0xb56d, 0x4020, 0xc085, 0xbc4e, + 0x3833, 0xbd7c, 0xc0e0, 0x2c79, 0x4496, 0x26ea, 0x3d20, 0xbda0, + 0x3a68); + + VSET(16, e32, m4); + // 0.64838839, 0.00666664, -0.13619921, 0.21094505, + // -0.51040554, -0.77216595, 0.42111391, 0.82974166, + // -0.31227046, 0.68854737, -0.72970057, 0.10843290, + // -0.38442346, 0.18102080, 0.57249051, 0.76465768 + VLOAD_32(v8, 0x3f25fcc8, 0x3bda73da, 0xbe0b77ce, 0x3e5801fb, 0xbf02a9f0, + 0xbf45acab, 0x3ed79c3e, 0x3f5469f3, 0xbe9fe1ea, 0x3f3044a4, + 0xbf3acda8, 0x3dde1212, 0xbec4d327, 0x3e395d84, 0x3f128ebd, + 0x3f43c09b); + // -0.59629226, -0.46890569, 0.99662799, -0.49397555, + // 0.80701596, 0.55786854, -0.26524273, -0.04642257, + // -0.67671824, 0.64403933, 0.06642481, 0.26544699, + // -0.00225505, 0.27478188, 0.76509053, 0.36194146 + VLOAD_32(v12, 0xbf18a69c, 0xbef01468, 0x3f7f2303, 0xbefcea5d, 0x3f4e9899, + 0x3f0ed079, 0xbe87cde5, 0xbd3e2597, 0xbf2d3d68, 0x3f24dfc3, + 0x3d8809bb, 0x3e87e8ab, 0xbb13c97d, 0x3e8cb036, 0x3f43dcf9, + 0x3eb95064); + asm volatile("vfdiv.vv v4, v8, v12"); + // -1.08736682, -0.01421745, -0.13666002, -0.42703542, + // -0.63246030, -1.38413608, -1.58765483, -17.87367058, + // 0.46144828, 1.06910765, -10.98536205, 0.40849173, + // 170.47213745, 0.65877998, 0.74826509, 2.11265564 + VCMP_U32(2, v4, 0xbf8b2ed5, 0xbc68f04d, 0xbe0bf09b, 0xbedaa462, 0xbf21e8ea, + 0xbfb12b5e, 0xbfcb3846, 0xc18efd46, 0x3eec42f2, 0x3f88d884, + 0xc12fc40a, 0x3ed125d4, 0x432a78dd, 0x3f28a5cd, 0x3f3f8e4c, + 0x400735c0); + + VSET(16, e64, m8); + // -0.6201645522687720, 0.7701971477336478, 0.3292637140913006, + // -0.8434179184761514, -0.7347451981263740, 0.6543864439701519, + // 0.1228421097534835, -0.5052233099528094, -0.5128552707464591, + // 0.9434287237802566, -0.5723896115412233, -0.5719579148082712, + // -0.6537028651114556, 0.1091378410914579, -0.7602559429758879, + // 0.2908894437497427 + VLOAD_64(v16, 0xbfe3d86354c44060, 0x3fe8a5747d1fa1c6, 0x3fd512a81cf2063c, + 0xbfeafd479316e516, 0xbfe783085c9b10ee, 0x3fe4f0bbd6f98570, + 0x3fbf72949bf67da0, 0xbfe02aca132d92f2, 0xbfe0694f74edfe18, + 0x3fee30916f57c874, 0xbfe2510404c47868, 0xbfe24d7aaf5946b0, + 0xbfe4eb22455e9102, 0x3fbbf075223e6d60, 0xbfe854044575797c, + 0x3fd29deec1ea08a0); + // 0.6660375425590812, -0.9603615652916235, -0.1168804546788573, + // -0.3258082002843947, 0.0488865860405421, + // -0.1515621417461690, -0.1189568642850463, + // -0.1213016259965920, -0.1369814061459547, 0.5914369694708146, + // 0.7538814889966272, 0.2346701936201294, 0.9227364529293489, + // 0.9447507336323382, -0.4250995717346850, -0.0882167932097473 + VLOAD_64(v24, 0x3fe5502df6e661fe, 0xbfeebb482d68699c, 0xbfbdebe0a2632640, + 0xbfd4da0aa33f5db0, 0x3fa907a9a083b220, 0xbfc36663650e4608, + 0xbfbe73f501bd2e10, 0xbfbf0d9f949b6370, 0xbfc1889b51c74ac0, + 0x3fe2ed0d3930b850, 0x3fe81fcc12899c0a, 0x3fce09ac4378e388, + 0x3fed870e9905133a, 0x3fee3b65e3fa5532, 0xbfdb34d4d5893894, + 0xbfb6956031cb3a60); + asm volatile("vfdiv.vv v8, v16, v24"); + // -0.9311255186696326, -0.8019866429158581, -2.8170981623573508, + // 2.5886945685834193, -15.0295870019854370, -4.3176114854994303, + // -1.0326609606918302, 4.1650167984310764, 3.7439772679805032, + // 1.5951466892987514, -0.7592567530780479, -2.4372840282141826, + // -0.7084394065458121, 0.1155202501636058, 1.7884185106880846, + // -3.2974384260161655 + VCMP_U64(3, v8, 0xbfedcbc7be65070a, 0xbfe9a9dfe464e0ca, 0xc006896ac2e79279, + 0x4004b5a57f7b305c, 0xc02e0f26070bd40a, 0xc011453bf1fc3753, + 0xbff085c77fe07008, 0x4010a8fa29e23558, 0x400df3aa5a978cad, + 0x3ff985b888edc5e0, 0xbfe84bd4d177987a, 0xc0037f8ec4c1f1c6, + 0xbfe6ab891e49fb2d, 0x3fbd92bc307a7a1b, 0x3ffc9d5cba6f762a, + 0xc00a612765c28153); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.8057, -0.8564, 0.3425, -0.3066, -0.7314, -0.6396, 0.7588, + // -0.3743, 0.8706, -0.3064, 0.0390, 0.6123, 0.0237, -0.6201, + // -0.4524, 0.3337 + VLOAD_16(v4, 0xba72, 0xbada, 0x357b, 0xb4e8, 0xb9da, 0xb91e, 0x3a12, 0xb5fd, + 0x3af7, 0xb4e7, 0x28fe, 0x38e6, 0x2612, 0xb8f6, 0xb73d, 0x3557); + // -0.4094, 0.0410, -0.7305, 0.9038, -0.3545, 0.2830, -0.7051, + // -0.7124, -0.6348, 0.1256, 0.5576, 0.1334, 0.8779, -0.4836, + // 0.3215, 0.4167 + VLOAD_16(v6, 0xb68d, 0x293e, 0xb9d8, 0x3b3b, 0xb5ac, 0x3487, 0xb9a4, 0xb9b3, + 0xb914, 0x3005, 0x3876, 0x3045, 0x3b06, 0xb7bd, 0x3525, 0x36ab); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfdiv.vv v2, v4, v6, v0.t"); + // 0.0000, -20.9062, 0.0000, -0.3394, 0.0000, -2.2598, 0.0000, + // 0.5254, 0.0000, -2.4395, 0.0000, 4.5898, 0.0000, 1.2822, + // 0.0000, 0.8008 + VCMP_U16(4, v2, 0x0, 0xcd3a, 0x0, 0xb56d, 0x0, 0xc085, 0x0, 0x3833, 0x0, + 0xc0e0, 0x0, 0x4496, 0x0, 0x3d20, 0x0, 0x3a68); + + VSET(16, e32, m4); + // 0.64838839, 0.00666664, -0.13619921, 0.21094505, + // -0.51040554, -0.77216595, 0.42111391, 0.82974166, + // -0.31227046, 0.68854737, -0.72970057, 0.10843290, + // -0.38442346, 0.18102080, 0.57249051, 0.76465768 + VLOAD_32(v8, 0x3f25fcc8, 0x3bda73da, 0xbe0b77ce, 0x3e5801fb, 0xbf02a9f0, + 0xbf45acab, 0x3ed79c3e, 0x3f5469f3, 0xbe9fe1ea, 0x3f3044a4, + 0xbf3acda8, 0x3dde1212, 0xbec4d327, 0x3e395d84, 0x3f128ebd, + 0x3f43c09b); + // -0.59629226, -0.46890569, 0.99662799, -0.49397555, + // 0.80701596, 0.55786854, -0.26524273, -0.04642257, + // -0.67671824, 0.64403933, 0.06642481, 0.26544699, + // -0.00225505, 0.27478188, 0.76509053, 0.36194146 + VLOAD_32(v12, 0xbf18a69c, 0xbef01468, 0x3f7f2303, 0xbefcea5d, 0x3f4e9899, + 0x3f0ed079, 0xbe87cde5, 0xbd3e2597, 0xbf2d3d68, 0x3f24dfc3, + 0x3d8809bb, 0x3e87e8ab, 0xbb13c97d, 0x3e8cb036, 0x3f43dcf9, + 0x3eb95064); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfdiv.vv v4, v8, v12, v0.t"); + // 0.00000000, -0.01421745, 0.00000000, -0.42703542, + // 0.00000000, -1.38413608, 0.00000000, -17.87367058, + // 0.00000000, 1.06910765, 0.00000000, 0.40849173, + // 0.00000000, 0.65877998, 0.00000000, 2.11265564 + VCMP_U32(5, v4, 0x0, 0xbc68f04d, 0x0, 0xbedaa462, 0x0, 0xbfb12b5e, 0x0, + 0xc18efd46, 0x0, 0x3f88d884, 0x0, 0x3ed125d4, 0x0, 0x3f28a5cd, 0x0, + 0x400735c0); + + VSET(16, e64, m8); + // -0.6201645522687720, 0.7701971477336478, 0.3292637140913006, + // -0.8434179184761514, -0.7347451981263740, 0.6543864439701519, + // 0.1228421097534835, -0.5052233099528094, -0.5128552707464591, + // 0.9434287237802566, -0.5723896115412233, -0.5719579148082712, + // -0.6537028651114556, 0.1091378410914579, -0.7602559429758879, + // 0.2908894437497427 + VLOAD_64(v16, 0xbfe3d86354c44060, 0x3fe8a5747d1fa1c6, 0x3fd512a81cf2063c, + 0xbfeafd479316e516, 0xbfe783085c9b10ee, 0x3fe4f0bbd6f98570, + 0x3fbf72949bf67da0, 0xbfe02aca132d92f2, 0xbfe0694f74edfe18, + 0x3fee30916f57c874, 0xbfe2510404c47868, 0xbfe24d7aaf5946b0, + 0xbfe4eb22455e9102, 0x3fbbf075223e6d60, 0xbfe854044575797c, + 0x3fd29deec1ea08a0); + // 0.6660375425590812, -0.9603615652916235, -0.1168804546788573, + // -0.3258082002843947, 0.0488865860405421, + // -0.1515621417461690, -0.1189568642850463, + // -0.1213016259965920, -0.1369814061459547, 0.5914369694708146, + // 0.7538814889966272, 0.2346701936201294, 0.9227364529293489, + // 0.9447507336323382, -0.4250995717346850, -0.0882167932097473 + VLOAD_64(v24, 0x3fe5502df6e661fe, 0xbfeebb482d68699c, 0xbfbdebe0a2632640, + 0xbfd4da0aa33f5db0, 0x3fa907a9a083b220, 0xbfc36663650e4608, + 0xbfbe73f501bd2e10, 0xbfbf0d9f949b6370, 0xbfc1889b51c74ac0, + 0x3fe2ed0d3930b850, 0x3fe81fcc12899c0a, 0x3fce09ac4378e388, + 0x3fed870e9905133a, 0x3fee3b65e3fa5532, 0xbfdb34d4d5893894, + 0xbfb6956031cb3a60); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfdiv.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, -0.8019866429158581, + // 0.0000000000000000, 2.5886945685834193, 0.0000000000000000, + // -4.3176114854994303, 0.0000000000000000, 4.1650167984310764, + // 0.0000000000000000, 1.5951466892987514, 0.0000000000000000, + // -2.4372840282141826, 0.0000000000000000, 0.1155202501636058, + // 0.0000000000000000, -3.2974384260161655 + VCMP_U64(6, v8, 0x0, 0xbfe9a9dfe464e0ca, 0x0, 0x4004b5a57f7b305c, 0x0, + 0xc011453bf1fc3753, 0x0, 0x4010a8fa29e23558, 0x0, 0x3ff985b888edc5e0, + 0x0, 0xc0037f8ec4c1f1c6, 0x0, 0x3fbd92bc307a7a1b, 0x0, + 0xc00a612765c28153); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + // -0.0933, 0.4983, 0.5918, -0.0608, 0.0790, -0.2864, -0.7656, + // 0.4878, 0.8862, 0.4255, 0.9561, -0.7158, -0.3247, 0.9961, + // -0.4963, -0.4114 + VLOAD_16(v4, 0xadf9, 0x37f9, 0x38bc, 0xabc7, 0x2d0f, 0xb495, 0xba20, 0x37ce, + 0x3b17, 0x36cf, 0x3ba6, 0xb9ba, 0xb532, 0x3bf8, 0xb7f1, 0xb695); + double dscalar_16; + // -0.3206 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb521); + asm volatile("vfdiv.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // 0.2910, -1.5547, -1.8457, 0.1896, -0.2466, 0.8936, 2.3887, + // -1.5215, -2.7656, -1.3271, -2.9824, 2.2324, 1.0127, + // -3.1074, 1.5488, 1.2832 + VCMP_U16(7, v2, 0x34a8, 0xbe37, 0xbf62, 0x3210, 0xb3e3, 0x3b25, 0x40c6, + 0xbe16, 0xc187, 0xbd4f, 0xc1f7, 0x4077, 0x3c0d, 0xc236, 0x3e31, + 0x3d22); + + VSET(16, e32, m4); + // 0.74354362, 0.49774653, 0.25714639, 0.51635689, + // 0.74569613, 0.41876560, 0.21346331, 0.08743033, + // -0.15111920, -0.93289024, 0.08753468, -0.33427054, + // 0.06167563, -0.54564798, 0.78990245, -0.77273035 + VLOAD_32(v8, 0x3f3e58e0, 0x3efed8a2, 0x3e83a8b1, 0x3f042ff7, 0x3f3ee5f1, + 0x3ed66872, 0x3e5a9620, 0x3db30eac, 0xbe1abefe, 0xbf6ed1e5, + 0x3db34562, 0xbeab2582, 0x3d7c9f95, 0xbf0baf96, 0x3f4a370c, + 0xbf45d1a8); + double dscalar_32; + // -0.45971388 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbeeb5f9e); + asm volatile("vfdiv.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // -1.61740518, -1.08273113, -0.55936182, -1.12321365, + // -1.62208748, -0.91092664, -0.46433949, -0.19018422, + // 0.32872447, 2.02928448, -0.19041122, 0.72712737, + // -0.13416091, 1.18692958, -1.71824801, 1.68089414 + VCMP_U32(8, v4, 0xbfcf0722, 0xbf8a96ef, 0xbf0f3255, 0xbf8fc576, 0xbfcfa090, + 0xbf69327c, 0xbeedbde7, 0xbe42bfa7, 0x3ea84e93, 0x4001dfcc, + 0xbe42fb28, 0x3f3a2504, 0xbe096179, 0x3f97ed4e, 0xbfdbef8c, + 0x3fd72789); + + VSET(16, e64, m8); + // -0.8580137874650531, -0.4775160339931992, 0.3831482495481682, + // -0.3582952848420831, 0.0009796501269754, 0.5485795361059773, + // -0.8055070333165963, -0.2632019868496005, + // -0.0782680309690278, -0.7119901734202212, + // -0.5084969452651125, -0.7586325258910223, 0.6253847342253853, + // 0.5751160060426936, 0.0609762717873976, -0.7366654110036495 + VLOAD_64(v16, 0xbfeb74d95495be72, 0xbfde8f9f69544024, 0x3fd885803c550ed0, + 0xbfd6ee4f58ad4bcc, 0x3f500cf35070c000, 0x3fe18df6abda8f8e, + 0xbfe9c6b6af995e52, 0xbfd0d84d2570b86c, 0xbfb4095fa9559400, + 0xbfe6c89f9dbd3916, 0xbfe0459b62c0f228, 0xbfe846b7b80c4834, + 0x3fe40326d89d4d44, 0x3fe26759aeab8116, 0x3faf38482a5158c0, + 0xbfe792c3570b5cc2); + double dscalar_64; + // 0.0072652319849018 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3f7dc225dc5e3c00); + asm volatile("vfdiv.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // -118.0986084474836275, + // -65.7261922242188206, 52.7372354171766275, + // -49.3164272781202087, + // 0.1348408597290880, 75.5075044053657223, + // -110.8714814599939871, -36.2276094413188474, + // -10.7729568899768022, -97.9996474854262800, + // -69.9904622896889919, + // -104.4195873535172723, 86.0791142698573424, 79.1600333255520070, + // 8.3928870976336363, -101.3959929338175954 + VCMP_U64(9, v8, 0xc05d864f99ce434b, 0xc0506e79eef36846, 0x404a5e5dbaeb1cfe, + 0xc048a880b0658b57, 0x3fc142771d59b8f1, 0x4052e07af3c1e7c9, + 0xc05bb7c65a2c6fa6, 0xc0421d224e615cd6, 0xc0258bc101675622, + 0xc0587ffa39725bce, 0xc0517f63bbf188ac, 0xc05a1ada84ea4b00, + 0x40558510354c6bc9, 0x4053ca3dfc6ae106, 0x4020c9287f66b6b2, + 0xc0595957f2bf0c64); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + // -0.0933, 0.4983, 0.5918, -0.0608, 0.0790, -0.2864, + // -0.7656, 0.4878, 0.8862, 0.4255, 0.9561, -0.7158, + // -0.3247, 0.9961, -0.4963, -0.4114 + VLOAD_16(v4, 0xadf9, 0x37f9, 0x38bc, 0xabc7, 0x2d0f, 0xb495, 0xba20, 0x37ce, + 0x3b17, 0x36cf, 0x3ba6, 0xb9ba, 0xb532, 0x3bf8, 0xb7f1, 0xb695); + double dscalar_16; + // -0.3206 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb521); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfdiv.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, -1.5547, 0.0000, 0.1896, 0.0000, 0.8936, 0.0000, + // -1.5215, 0.0000, -1.3271, 0.0000, 2.2324, 0.0000, + // -3.1074, 0.0000, 1.2832 + VCMP_U16(10, v2, 0x0, 0xbe37, 0x0, 0x3210, 0x0, 0x3b25, 0x0, 0xbe16, 0x0, + 0xbd4f, 0x0, 0x4077, 0x0, 0xc236, 0x0, 0x3d22); + + VSET(16, e32, m4); + // 0.74354362, 0.49774653, 0.25714639, 0.51635689, + // 0.74569613, 0.41876560, 0.21346331, 0.08743033, + // -0.15111920, -0.93289024, 0.08753468, -0.33427054, + // 0.06167563, -0.54564798, 0.78990245, -0.77273035 + VLOAD_32(v8, 0x3f3e58e0, 0x3efed8a2, 0x3e83a8b1, 0x3f042ff7, 0x3f3ee5f1, + 0x3ed66872, 0x3e5a9620, 0x3db30eac, 0xbe1abefe, 0xbf6ed1e5, + 0x3db34562, 0xbeab2582, 0x3d7c9f95, 0xbf0baf96, 0x3f4a370c, + 0xbf45d1a8); + double dscalar_32; + // -0.45971388 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbeeb5f9e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfdiv.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, -1.08273113, 0.00000000, -1.12321365, + // 0.00000000, -0.91092664, 0.00000000, -0.19018422, + // 0.00000000, 2.02928448, 0.00000000, 0.72712737, + // 0.00000000, 1.18692958, 0.00000000, 1.68089414 + VCMP_U32(11, v4, 0x0, 0xbf8a96ef, 0x0, 0xbf8fc576, 0x0, 0xbf69327c, 0x0, + 0xbe42bfa7, 0x0, 0x4001dfcc, 0x0, 0x3f3a2504, 0x0, 0x3f97ed4e, 0x0, + 0x3fd72789); + + VSET(16, e64, m8); + // -0.8580137874650531, -0.4775160339931992, + // 0.3831482495481682, -0.3582952848420831, 0.0009796501269754, + // 0.5485795361059773, -0.8055070333165963, + // -0.2632019868496005, -0.0782680309690278, + // -0.7119901734202212, -0.5084969452651125, + // -0.7586325258910223, 0.6253847342253853, + // 0.5751160060426936, 0.0609762717873976, -0.7366654110036495 + VLOAD_64(v16, 0xbfeb74d95495be72, 0xbfde8f9f69544024, 0x3fd885803c550ed0, + 0xbfd6ee4f58ad4bcc, 0x3f500cf35070c000, 0x3fe18df6abda8f8e, + 0xbfe9c6b6af995e52, 0xbfd0d84d2570b86c, 0xbfb4095fa9559400, + 0xbfe6c89f9dbd3916, 0xbfe0459b62c0f228, 0xbfe846b7b80c4834, + 0x3fe40326d89d4d44, 0x3fe26759aeab8116, 0x3faf38482a5158c0, + 0xbfe792c3570b5cc2); + double dscalar_64; + // 0.0072652319849018 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3f7dc225dc5e3c00); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfdiv.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, -65.7261922242188206, + // 0.0000000000000000, -49.3164272781202087, + // 0.0000000000000000, 75.5075044053657223, + // 0.0000000000000000, -36.2276094413188474, + // 0.0000000000000000, -97.9996474854262800, + // 0.0000000000000000, -104.4195873535172723, + // 0.0000000000000000, 79.1600333255520070, + // 0.0000000000000000, -101.3959929338175954 + VCMP_U64(12, v8, 0x0, 0xc0506e79eef36846, 0x0, 0xc048a880b0658b57, 0x0, + 0x4052e07af3c1e7c9, 0x0, 0xc0421d224e615cd6, 0x0, 0xc0587ffa39725bce, + 0x0, 0xc05a1ada84ea4b00, 0x0, 0x4053ca3dfc6ae106, 0x0, + 0xc0595957f2bf0c64); +}; + +int main(void) { + enable_vec(); + enable_fp(); + // Change RM to RTZ since there are issues with FDIV + RNE in fpnew + // Update: there are issues also with RTZ... + CHANGE_RM(RM_RTZ); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfirst.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfirst.c new file mode 100644 index 000000000..fd9615af4 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfirst.c @@ -0,0 +1,48 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() +{ + VSET(4, e32, m1); + VLOAD_32(v2, 3); + VLOAD_32(v0, 2, 0, 0, 0); + volatile uint32_t scalar = 1337; + volatile uint32_t OUP[] = {0}; + __asm__ volatile( + "vfirst.m %[A], v2, v0.t \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); + XCMP(1, OUP[0], 1); +} + +void TEST_CASE2() +{ + VSET(4, e32, m1); + VLOAD_32(v2, 1, 2, 3, 4); + VLOAD_32(v0, 0, 0, 0, 0); + volatile int32_t scalar = 1337; + volatile int32_t OUP[] = {0}; + __asm__ volatile( + "vfirst.m %[A], v2, v0.t \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); + XCMP(2, OUP[0], -1); +} + +int main(void) +{ + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmacc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmacc.c new file mode 100644 index 000000000..70503aa4c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmacc.c @@ -0,0 +1,356 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.6353, -0.2290, 0.6870, -0.1031, 0.5410, 0.4211, -0.4939, + // -0.8779, -0.3213, -0.6846, 0.9229, 0.0103, -0.5068, 0.8706, + // 0.6309, -0.3054 + VLOAD_16(v4, 0xb915, 0xb354, 0x397f, 0xae9a, 0x3854, 0x36bd, 0xb7e7, 0xbb06, + 0xb524, 0xb97a, 0x3b62, 0x2142, 0xb80e, 0x3af7, 0x390c, 0xb4e3); + // -0.8042, -0.9463, 0.4431, 0.3757, -0.5259, -0.1290, 0.4697, + // 0.0952, -0.9995, 0.8823, -0.6128, -0.5010, -0.9976, 0.0081, + // 0.9746, -0.7734 + VLOAD_16(v6, 0xba6f, 0xbb92, 0x3717, 0x3603, 0xb835, 0xb021, 0x3784, 0x2e17, + 0xbbff, 0x3b0f, 0xb8e7, 0xb802, 0xbbfb, 0x2022, 0x3bcc, 0xba30); + // 0.6509, 0.3452, 0.9360, 0.3616, -0.4258, -0.0945, -0.7295, + // -0.7734, 0.3411, -0.1519, -0.3557, 0.6060, 0.2598, + // -0.0171, -0.8042, -0.4419 + VLOAD_16(v2, 0x3935, 0x3586, 0x3b7d, 0x35c9, 0xb6d0, 0xae0d, 0xb9d6, 0xba30, + 0x3575, 0xb0dc, 0xb5b1, 0x38d9, 0x3428, 0xa45e, 0xba6f, 0xb712); + asm volatile("vfmacc.vv v2, v4, v6"); + // 1.1621, 0.5620, 1.2402, 0.3228, -0.7100, -0.1489, -0.9614, + // -0.8569, 0.6621, -0.7559, -0.9209, 0.6006, 0.7651, + // -0.0100, -0.1895, -0.2057 + VCMP_U16(1, v2, 0x3ca6, 0x387f, 0x3cf6, 0x352a, 0xb9af, 0xb0c4, 0xbbb1, + 0xbadb, 0x394c, 0xba0c, 0xbb5f, 0x38ce, 0x3a1f, 0xa123, 0xb20f, + 0xb295); + + VSET(16, e32, m4); + // 0.72754014, 0.34003398, 0.70107144, -0.41727209, + // -0.52331781, -0.11821542, -0.16069038, 0.30835113, + // -0.59407759, -0.53240144, -0.92390168, 0.33251825, + // -0.45979658, 0.32465541, -0.99342769, -0.16221718 + VLOAD_32(v8, 0x3f3a4012, 0x3eae18ef, 0x3f33796b, 0xbed5a4b0, 0xbf05f828, + 0xbdf21aed, 0xbe248c05, 0x3e9de033, 0xbf181578, 0xbf084b76, + 0xbf6c84d2, 0x3eaa3fd5, 0xbeeb6a75, 0x3ea6393c, 0xbf7e5147, + 0xbe261c43); + // 0.95104939, -0.11575679, 0.13276713, 0.22784369, + // 0.93318671, -0.32301557, 0.41414812, 0.81797487, + // -0.21847244, -0.00211347, -0.72070456, -0.58624452, + // 0.07381243, -0.16745377, 0.55389816, -0.23427610 + VLOAD_32(v12, 0x3f7377f9, 0xbded11e6, 0x3e07f41b, 0x3e694fdb, 0x3f6ee553, + 0xbea5624c, 0x3ed40b39, 0x3f5166cd, 0xbe5fb73d, 0xbb0a8224, + 0xbf388018, 0xbf16141f, 0x3d972af9, 0xbe2b7900, 0x3f0dcc45, + 0xbe6fe613); + // -0.07459558, -0.00461283, -0.97654468, 0.94394064, + // 0.24971253, 0.97819000, 0.55116856, -0.97427863, 0.61764765, + // 0.86367106, 0.48787504, -0.26353455, -0.22228357, 0.40454853, + // 0.64000225, -0.51787829 + VLOAD_32(v4, 0xbd98c591, 0xbb97273a, 0xbf79fed5, 0x3f71a618, 0x3e7fb4a4, + 0x3f7a6aa9, 0x3f0d1962, 0xbf796a53, 0x3f1e1e28, 0x3f5d198c, + 0x3ef9cac2, 0xbe86ee00, 0xbe639e4e, 0x3ecf20fc, 0x3f23d730, + 0xbf0493ac); + asm volatile("vfmacc.vv v4, v8, v12"); + // 0.61733103, -0.04397407, -0.88346541, 0.84886783, + // -0.23864070, 1.01637542, 0.48461893, -0.72205520, + // 0.74743724, 0.86479628, 1.15373516, -0.45847154, + // -0.25622228, 0.35018376, 0.08974451, -0.47987467 + VCMP_U32(2, v4, 0x3f1e0968, 0xbd341e29, 0xbf622aca, 0x3f594f67, 0xbe745e3a, + 0x3f821897, 0x3ef81ff9, 0xbf38d89b, 0x3f3f580c, 0x3f5d634a, + 0x3f93ad98, 0xbeeabcc8, 0xbe832f91, 0x3eb34b49, 0x3db7cbf5, + 0xbef5b222); + + VSET(16, e64, m8); + // -0.8992497708533775, 0.5795977429472710, -0.9421852470430045, + // 0.3407052467776674, -0.1137141395145149, 0.3284679540868891, + // 0.9781857174570949, 0.6033619236526551, -0.1287683269222892, + // 0.6555379481826638, 0.6785468173738887, 0.6923267883951645, + // 0.2185923779321672, -0.1310544396012536, -0.7596952716763763, + // -0.4011231994121780, + VLOAD_64(v16, 0xbfecc6a774980626, 0x3fe28c1090d967fc, 0xbfee2661acda592c, + 0x3fd5ce1d611f1590, 0xbfbd1c5eae4ec060, 0x3fd5059e742594fc, + 0x3fef4d4c223c8f84, 0x3fe34ebdaa37ac76, 0xbfc07b7b047228c0, + 0x3fe4fa2ab8176850, 0x3fe5b6a7d0ad9fa2, 0x3fe6278a8249a986, + 0x3fcbfad5c52fcfd8, 0xbfc0c664520a9f78, 0xbfe84f6c7558d3f0, + 0xbfd9ac00a3c919a8); + // 0.3028184794479449, 0.5016121947684244, 0.1900289524299839, + // 0.3294240614689632, 0.5945396967575391, -0.8758223026547887, + // 0.3719808177193829, 0.9159354723876536, 0.0805670751146079, + // 0.1775335284298603, -0.7021940272509897, 0.9279338928738479, + // -0.7358371767028979, 0.2529700403354449, + // -0.8333759771774525, -0.4016540133317048, + VLOAD_64(v24, 0x3fd36160c2769da4, 0x3fe00d350479c3ea, 0x3fc852de63fd6e08, + 0x3fd51548a8a19488, 0x3fe306781d37ea9a, 0xbfec06bc7e604fb8, + 0x3fd7ce88a1b60584, 0x3fed4f57e864d750, 0x3fb4a00b38c069f0, + 0x3fc6b96b2d465dc0, 0xbfe6785f9bcfaa42, 0x3fedb1a26b57c7d6, + 0xbfe78bfa6823d662, 0x3fd030a94086f244, 0xbfeaab0418e7f974, + 0xbfd9b4b308e446c8); + // -0.0664052564688480, -0.6742544994800144, 0.4321518669568931, + // -0.1627512425330113, 0.0193121553139675, -0.3517684494272582, + // -0.4834881433176264, 0.8328623424117183, 0.0264604353835154, + // 0.0322804237161178, -0.8345203693668675, 0.7175251091228996, + // -0.7419013213335950, -0.2977694001417877, 0.4556506623709609, + // -0.7832443836668095, + VLOAD_64(v8, 0xbfb0ffef54d0f220, 0xbfe5937e2c0e5202, 0x3fdba8604ddf0d80, + 0xbfc4d508600804d8, 0x3f93c690cdf47e40, 0xbfd6835fd0838044, + 0xbfdef17840e363cc, 0x3feaa6ceed574e1a, 0x3f9b1871c270c340, + 0x3fa0870f4852d0c0, 0xbfeab4640fc8d962, 0x3fe6f5f737b7bbe2, + 0xbfe7bda7d6ff9552, 0xbfd30ea762d6f1ec, 0x3fdd296165522d4c, + 0xbfe910568693fcea); + asm volatile("vfmacc.vv v8, v16, v24"); + // -0.3387147047225807, -0.3835212035574087, 0.2531093914663254, + // -0.0505147363757267, -0.0482954147100367, -0.6394480093239447, + // -0.1196218202565150, 1.3855029309732363, 0.0160859479159849, + // 0.1486603886766570, -1.3109918917369803, 1.3599586010192732, + // -0.9027497195599737, -0.3309222470138559, 1.0887624517613512, + // -0.6221316407824544, + VCMP_U64(3, v8, 0xbfd5ad8070dd4c48, 0xbfd88b9c84a68118, 0x3fd032f1bbaa2211, + 0xbfa9dd1149664d37, 0xbfa8ba2d3573e621, 0xbfe4765babf13c96, + 0xbfbe9f891de3c4d6, 0x3ff62b051f10acd5, 0x3f9078d5b0e5b2ba, + 0x3fc3074db9c9d78e, 0xbff4f9d2a2454dd5, 0x3ff5c263f334aac4, + 0xbfece353613f76db, 0xbfd52dd4811c5fc3, 0x3ff16b922d36d831, + 0xbfe3e8809d5ef572); +}; + +// Simple random test with similar values (masked, the numbers are taken from +// TEST_CASE1) +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 0xb915, 0xb354, 0x397f, 0xae9a, 0x3854, 0x36bd, 0xb7e7, 0xbb06, + 0xb524, 0xb97a, 0x3b62, 0x2142, 0xb80e, 0x3af7, 0x390c, 0xb4e3); + VLOAD_16(v6, 0xba6f, 0xbb92, 0x3717, 0x3603, 0xb835, 0xb021, 0x3784, 0x2e17, + 0xbbff, 0x3b0f, 0xb8e7, 0xb802, 0xbbfb, 0x2022, 0x3bcc, 0xba30); + VLOAD_16(v2, 0x3935, 0x3586, 0x3b7d, 0x35c9, 0xb6d0, 0xae0d, 0xb9d6, 0xba30, + 0x3575, 0xb0dc, 0xb5b1, 0x38d9, 0x3428, 0xa45e, 0xba6f, 0xb712); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfmacc.vv v2, v4, v6, v0.t"); + VCMP_U16(4, v2, 0x3935, 0x387f, 0x3b7d, 0x352a, 0xb6d0, 0xb0c4, 0xb9d6, + 0xbadb, 0x3575, 0xba0c, 0xb5b1, 0x38ce, 0x3428, 0xa123, 0xba6f, + 0xb295); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3f3a4012, 0x3eae18ef, 0x3f33796b, 0xbed5a4b0, 0xbf05f828, + 0xbdf21aed, 0xbe248c05, 0x3e9de033, 0xbf181578, 0xbf084b76, + 0xbf6c84d2, 0x3eaa3fd5, 0xbeeb6a75, 0x3ea6393c, 0xbf7e5147, + 0xbe261c43); + VLOAD_32(v12, 0x3f7377f9, 0xbded11e6, 0x3e07f41b, 0x3e694fdb, 0x3f6ee553, + 0xbea5624c, 0x3ed40b39, 0x3f5166cd, 0xbe5fb73d, 0xbb0a8224, + 0xbf388018, 0xbf16141f, 0x3d972af9, 0xbe2b7900, 0x3f0dcc45, + 0xbe6fe613); + VLOAD_32(v4, 0xbd98c591, 0xbb97273a, 0xbf79fed5, 0x3f71a618, 0x3e7fb4a4, + 0x3f7a6aa9, 0x3f0d1962, 0xbf796a53, 0x3f1e1e28, 0x3f5d198c, + 0x3ef9cac2, 0xbe86ee00, 0xbe639e4e, 0x3ecf20fc, 0x3f23d730, + 0xbf0493ac); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfmacc.vv v4, v8, v12, v0.t"); + VCMP_U32(5, v4, 0xbd98c591, 0xbd341e29, 0xbf79fed5, 0x3f594f67, 0x3e7fb4a4, + 0x3f821897, 0x3f0d1962, 0xbf38d89b, 0x3f1e1e28, 0x3f5d634a, + 0x3ef9cac2, 0xbeeabcc8, 0xbe639e4e, 0x3eb34b49, 0x3f23d730, + 0xbef5b222); + + VSET(16, e64, m8); + VLOAD_64(v24, 0xbfecc6a774980626, 0x3fe28c1090d967fc, 0xbfee2661acda592c, + 0x3fd5ce1d611f1590, 0xbfbd1c5eae4ec060, 0x3fd5059e742594fc, + 0x3fef4d4c223c8f84, 0x3fe34ebdaa37ac76, 0xbfc07b7b047228c0, + 0x3fe4fa2ab8176850, 0x3fe5b6a7d0ad9fa2, 0x3fe6278a8249a986, + 0x3fcbfad5c52fcfd8, 0xbfc0c664520a9f78, 0xbfe84f6c7558d3f0, + 0xbfd9ac00a3c919a8); + VLOAD_64(v16, 0x3fd36160c2769da4, 0x3fe00d350479c3ea, 0x3fc852de63fd6e08, + 0x3fd51548a8a19488, 0x3fe306781d37ea9a, 0xbfec06bc7e604fb8, + 0x3fd7ce88a1b60584, 0x3fed4f57e864d750, 0x3fb4a00b38c069f0, + 0x3fc6b96b2d465dc0, 0xbfe6785f9bcfaa42, 0x3fedb1a26b57c7d6, + 0xbfe78bfa6823d662, 0x3fd030a94086f244, 0xbfeaab0418e7f974, + 0xbfd9b4b308e446c8); + VLOAD_64(v8, 0xbfb0ffef54d0f220, 0xbfe5937e2c0e5202, 0x3fdba8604ddf0d80, + 0xbfc4d508600804d8, 0x3f93c690cdf47e40, 0xbfd6835fd0838044, + 0xbfdef17840e363cc, 0x3feaa6ceed574e1a, 0x3f9b1871c270c340, + 0x3fa0870f4852d0c0, 0xbfeab4640fc8d962, 0x3fe6f5f737b7bbe2, + 0xbfe7bda7d6ff9552, 0xbfd30ea762d6f1ec, 0x3fdd296165522d4c, + 0xbfe910568693fcea); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfmacc.vv v8, v16, v24, v0.t"); + VCMP_U64(6, v8, 0xbfb0ffef54d0f220, 0xbfd88b9c84a68118, 0x3fdba8604ddf0d80, + 0xbfa9dd1149664d37, 0x3f93c690cdf47e40, 0xbfe4765babf13c96, + 0xbfdef17840e363cc, 0x3ff62b051f10acd5, 0x3f9b1871c270c340, + 0x3fc3074db9c9d78e, 0xbfeab4640fc8d962, 0x3ff5c263f334aac4, + 0xbfe7bda7d6ff9552, 0xbfd52dd4811c5fc3, 0x3fdd296165522d4c, + 0xbfe3e8809d5ef572); +}; + +// Simple random test with similar values (with scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 0.7407, -0.1365, 0.0000, -0.8525, -0.0812, 0.9609, -0.3740, + // 0.2800, 0.9692, 0.4045, 0.0205, -0.5503, 0.6499, 0.4470, + // -0.9360, -0.4426 + VLOAD_16(v4, 0x39ed, 0xb05e, 0x0000, 0xbad2, 0xad33, 0x3bb0, 0xb5fc, 0x347b, + 0x3bc1, 0x3679, 0x253e, 0xb867, 0x3933, 0x3727, 0xbb7d, 0xb715); + double dscalar_16; + // 0.5757 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x389b); + // -0.1472, -0.8906, 0.2247, 0.6118, -0.0908, -0.6450, -0.5415, + // 0.0505, -0.4595, 0.1157, -0.3494, 0.6670, -0.9658, -0.2944, + // -0.8096, -0.3364 + VLOAD_16(v2, 0xb0b6, 0xbb20, 0x3331, 0x38e5, 0xadcf, 0xb929, 0xb855, 0x2a77, + 0xb75a, 0x2f68, 0xb597, 0x3956, 0xbbba, 0xb4b6, 0xba7a, 0xb562); + asm volatile("vfmacc.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // 0.2793, -0.9692, 0.2247, 0.1210, -0.1375, -0.0918, -0.7568, + // 0.2118, 0.0986, 0.3486, -0.3376, 0.3501, -0.5918, -0.0371, + // -1.3486, -0.5913 + VCMP_U16(7, v2, 0x3478, 0xbbc1, 0x3331, 0x2fbf, 0xb067, 0xade0, 0xba0e, + 0x32c6, 0x2e4e, 0x3594, 0xb567, 0x359a, 0xb8bc, 0xa8bf, 0xbd65, + 0xb8bb); + + VSET(16, e32, m4); + // -0.79164708, -0.13258822, -0.94492996, -0.93729085, + // 0.80344391, 0.77393818, 0.31253836, -0.42539355, + // -0.20085664, -0.63946086, 0.24876182, -0.45639724, + // 0.92842573, 0.39117134, -0.70563781, 0.13946204 + VLOAD_32(v8, 0xbf4aa962, 0xbe07c535, 0xbf71e6ee, 0xbf6ff24b, 0x3f4dae80, + 0x3f4620d0, 0x3ea00507, 0xbed9cd2f, 0xbe4dad5d, 0xbf23b3b5, + 0x3e7ebb6b, 0xbee9ace6, 0x3f6dad4f, 0x3ec8479c, 0xbf34a4ae, + 0x3e0ecf23); + double dscalar_32; + // 0.97630060 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f79eed6); + // -0.43768027, -0.74227923, 0.60234988, 0.43624315, + // 0.34759882, 0.65410614, 0.99296939, -0.31534156, + // -0.89647168, 0.47623411, -0.68185741, 0.77072626, + // 0.19827089, -0.16254151, 0.81625229, -0.24369264 + VLOAD_32(v4, 0xbee017a1, 0xbf3e0603, 0x3f1a339a, 0x3edf5b43, 0x3eb1f879, + 0x3f277380, 0x3f7e333e, 0xbea17473, 0xbf657f2b, 0x3ef3d4f5, + 0xbf2e8e35, 0x3f454e51, 0x3e4b0786, 0xbe267148, 0x3f50f5e9, + 0xbe798a90); + asm volatile("vfmacc.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // -1.21056581, -0.87172520, -0.32018578, + // -0.47883448, 1.13200164, 1.40970242, 1.29810071, + // -0.73065352, -1.09256816, -0.14807191, -0.43899110, + // 0.32514536, 1.10469353, 0.21935931, 0.12733769, -0.10753576 + VCMP_U32(8, v4, 0xbf9af3d2, 0xbf5f2962, 0xbea3ef65, 0xbef529cb, 0x3f90e56e, + 0x3fb47121, 0x3fa6282b, 0xbf3b0c1c, 0xbf8bd946, 0xbe17a02a, + 0xbee0c371, 0x3ea67974, 0x3f8d6699, 0x3e609fb9, 0x3e0264cf, + 0xbddc3bb6); + + VSET(16, e64, m8); + // -0.1981785436218435, 0.2324321764718080, 0.3529425082887112, + // -0.4889737836823891, 0.1335009259637479, -0.7964186221277452, + // -0.2707335519445100, 0.8070543770008602, -0.1237072120160827, + // -0.2357903062216291, -0.0812498320849093, 0.8656662449573254, + // 0.7178262144151533, -0.3106178959409680, -0.1410836751949509, + // 0.6904294937898030 + VLOAD_64(v16, 0xbfc95dea1dcff710, 0x3fcdc0566a3e04a0, 0x3fd6969c2c9df760, + 0xbfdf4b58b2611a74, 0x3fc1168eef800078, 0xbfe97c42e7fed97a, + 0xbfd153b2d1e20588, 0x3fe9d363b369fec4, 0xbfbfab469de36f10, + 0xbfce2e6072f7c5c0, 0xbfb4ccc9fb9c3490, 0x3febb389b26af886, + 0x3fe6f86eae63fc74, 0xbfd3e129e2279a3c, 0xbfc20f07a57b1c48, + 0x3fe617ff9800ac5a); + double dscalar_64; + // 0.8738839355493300 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3febf6db7175e482); + // -0.9433584234417285, -0.0696473591160720, -0.8171557896146857, + // -0.9495656113293445, -0.6353537919969880, 0.8159507202507001, + // 0.0288919190409849, -0.6024741558584952, -0.9583084411212592, + // 0.7665070398551490, -0.7817863527411446, -0.2155326059803253, + // -0.7807395886866346, 0.2528540140694266, -0.1740695080779533, + // 0.7247829241803623 + VLOAD_64(v8, 0xbfee2ffe0122d3b6, 0xbfb1d468c9a80310, 0xbfea2623e6043a6c, + 0xbfee62d76bc21ae2, 0xbfe454d179c08866, 0x3fea1c44af53fb1a, + 0x3f9d95d7dd994d80, 0xbfe34777e1831e42, 0xbfeeaa7676c316f0, + 0x3fe88739c58a9cbe, 0xbfe90464d02f6f4c, 0xbfcb96928af41d88, + 0xbfe8fbd197034034, 0x3fd02ec29a45caf0, 0xbfc647e8de367aa0, + 0x3fe7316bf581b994); + asm volatile("vfmacc.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // -1.1165434690834197, 0.1334713860074080, -0.5087250014486948, + // -1.3768719457941574, -0.5186894774163083, 0.1199732804009315, + // -0.2076977828175325, 0.1027976993173292, -1.0664141864137089, + // 0.5604536790898100, -0.8527892757662274, 0.5409592190351925, + // -0.1534427913930433, -0.0185899752875187, -0.2973602653990804, + // 1.3281381674327271 + VCMP_U64(9, v8, 0xbff1dd5caf44692a, 0x3fc1159722ed4311, 0xbfe04779a77c2679, + 0xbff607aae09f73e1, 0xbfe0991aacc90937, 0x3fbeb691a3b74133, + 0xbfca95d7485395ec, 0x3fba50f334ac0644, 0xbff11008526a327e, + 0x3fe1ef3c8dd3a2b9, 0xbfeb4a0cbc397482, 0x3fe14f89b5473a2f, + 0xbfc3a4036d6b8775, 0xbf9309401f92c802, 0xbfd307f359c13629, + 0x3ff5400dce9b1643); +}; + +// Simple random test with similar values (masked with scalar, values taken from +// TEST_CASE3) +void TEST_CASE4(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 0x39ed, 0xb05e, 0x0000, 0xbad2, 0xad33, 0x3bb0, 0xb5fc, 0x347b, + 0x3bc1, 0x3679, 0x253e, 0xb867, 0x3933, 0x3727, 0xbb7d, 0xb715); + double dscalar_16; + BOX_HALF_IN_DOUBLE(dscalar_16, 0x389b); + VLOAD_16(v2, 0xb0b6, 0xbb20, 0x3331, 0x38e5, 0xadcf, 0xb929, 0xb855, 0x2a77, + 0xb75a, 0x2f68, 0xb597, 0x3956, 0xbbba, 0xb4b6, 0xba7a, 0xb562); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfmacc.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + VCMP_U16(10, v2, 0xb0b6, 0xbbc1, 0x3331, 0x2fbf, 0xadcf, 0xade0, 0xb855, + 0x32c6, 0xb75a, 0x3594, 0xb597, 0x359a, 0xbbba, 0xa8bf, 0xba7a, + 0xb8bb); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xbf4aa962, 0xbe07c535, 0xbf71e6ee, 0xbf6ff24b, 0x3f4dae80, + 0x3f4620d0, 0x3ea00507, 0xbed9cd2f, 0xbe4dad5d, 0xbf23b3b5, + 0x3e7ebb6b, 0xbee9ace6, 0x3f6dad4f, 0x3ec8479c, 0xbf34a4ae, + 0x3e0ecf23); + double dscalar_32; + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f79eed6); + VLOAD_32(v4, 0xbee017a1, 0xbf3e0603, 0x3f1a339a, 0x3edf5b43, 0x3eb1f879, + 0x3f277380, 0x3f7e333e, 0xbea17473, 0xbf657f2b, 0x3ef3d4f5, + 0xbf2e8e35, 0x3f454e51, 0x3e4b0786, 0xbe267148, 0x3f50f5e9, + 0xbe798a90); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfmacc.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + VCMP_U32(11, v4, 0xbee017a1, 0xbf5f2962, 0x3f1a339a, 0xbef529cb, 0x3eb1f879, + 0x3fb47121, 0x3f7e333e, 0xbf3b0c1c, 0xbf657f2b, 0xbe17a02a, + 0xbf2e8e35, 0x3ea67974, 0x3e4b0786, 0x3e609fb9, 0x3f50f5e9, + 0xbddc3bb6); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xbfc95dea1dcff710, 0x3fcdc0566a3e04a0, 0x3fd6969c2c9df760, + 0xbfdf4b58b2611a74, 0x3fc1168eef800078, 0xbfe97c42e7fed97a, + 0xbfd153b2d1e20588, 0x3fe9d363b369fec4, 0xbfbfab469de36f10, + 0xbfce2e6072f7c5c0, 0xbfb4ccc9fb9c3490, 0x3febb389b26af886, + 0x3fe6f86eae63fc74, 0xbfd3e129e2279a3c, 0xbfc20f07a57b1c48, + 0x3fe617ff9800ac5a); + double dscalar_64; + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3febf6db7175e482); + VLOAD_64(v8, 0xbfee2ffe0122d3b6, 0xbfb1d468c9a80310, 0xbfea2623e6043a6c, + 0xbfee62d76bc21ae2, 0xbfe454d179c08866, 0x3fea1c44af53fb1a, + 0x3f9d95d7dd994d80, 0xbfe34777e1831e42, 0xbfeeaa7676c316f0, + 0x3fe88739c58a9cbe, 0xbfe90464d02f6f4c, 0xbfcb96928af41d88, + 0xbfe8fbd197034034, 0x3fd02ec29a45caf0, 0xbfc647e8de367aa0, + 0x3fe7316bf581b994); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfmacc.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + VCMP_U64(12, v8, 0xbfee2ffe0122d3b6, 0x3fc1159722ed4311, 0xbfea2623e6043a6c, + 0xbff607aae09f73e1, 0xbfe454d179c08866, 0x3fbeb691a3b74133, + 0x3f9d95d7dd994d80, 0x3fba50f334ac0644, 0xbfeeaa7676c316f0, + 0x3fe1ef3c8dd3a2b9, 0xbfe90464d02f6f4c, 0x3fe14f89b5473a2f, + 0xbfe8fbd197034034, 0xbf9309401f92c802, 0xbfc647e8de367aa0, + 0x3ff5400dce9b1643); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmadd.c new file mode 100644 index 000000000..d7d816f0c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmadd.c @@ -0,0 +1,433 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.3501, -0.3289, -0.8853, -0.4082, -0.4346, -0.2659, 0.9316, + // 0.5444, -0.0538, 0.7686, 0.8203, -0.8623, 0.3059, 0.0372, + // 0.5337, -0.5815 + VLOAD_16(v4, 0x359a, 0xb543, 0xbb15, 0xb688, 0xb6f4, 0xb441, 0x3b74, 0x385b, + 0xaae4, 0x3a26, 0x3a90, 0xbae6, 0x34e5, 0x28c4, 0x3845, 0xb8a7); + // -0.8105, 0.5000, -0.8374, -0.8394, 0.3098, 0.1328, -0.2864, + // -0.4041, -0.1729, 0.0196, 0.2739, 0.8071, -0.1553, 0.2815, + // -0.9067, -0.2495 + VLOAD_16(v6, 0xba7c, 0x3800, 0xbab3, 0xbab7, 0x34f5, 0x3040, 0xb495, 0xb677, + 0xb188, 0x2502, 0x3462, 0x3a75, 0xb0f8, 0x3481, 0xbb41, 0xb3fc); + // -0.6558, -0.1006, 0.4558, -0.0784, 0.1539, 0.6748, 0.3347, + // -0.3416, 0.0614, 0.2289, -0.0829, 0.3838, -0.6348, 0.0843, + // -0.6890, -0.2598 + VLOAD_16(v2, 0xb93f, 0xae71, 0x374b, 0xad05, 0x30ed, 0x3966, 0x355b, 0xb577, + 0x2bdc, 0x3353, 0xad4f, 0x3624, 0xb914, 0x2d65, 0xb983, 0xb428); + asm volatile("vfmadd.vv v2, v4, v6"); + // -1.0400, 0.5332, -1.2412, -0.8071, 0.2429, -0.0466, 0.0254, + // -0.5898, -0.1761, 0.1954, 0.2058, 0.4761, -0.3496, 0.2847, + // -1.2744, -0.0984 + VCMP_U16(1, v2, 0xbc29, 0x3844, 0xbcf7, 0xba75, 0x33c6, 0xa9f7, 0x2684, + 0xb8b8, 0xb1a3, 0x3241, 0x3297, 0x379e, 0xb597, 0x348e, 0xbd19, + 0xae4d); + + VSET(16, e32, m4); + // -0.20637949, -0.63321692, 0.40850523, 0.58702314, + // -0.25534528, -0.22053087, 0.96057665, 0.85530519, + // 0.74252450, -0.87175107, -0.00987994, -0.52556008, 0.26113954, + // -0.71307814, 0.78942811, 0.48685852 + VLOAD_32(v8, 0xbe535525, 0xbf221a81, 0x3ed12799, 0x3f164726, 0xbe82bc9e, + 0xbe61d2d8, 0x3f75e85a, 0x3f5af548, 0x3f3e1616, 0xbf5f2b14, + 0xbc21df78, 0xbf068b1b, 0x3e85b415, 0xbf368c4a, 0x3f4a17f6, + 0x3ef94585); + // -0.15712014, 0.83088422, 0.57509524, 0.85365236, + // -0.96695948, 0.71368766, 0.23281342, -0.67807233, + // 0.79363507, 0.62817359, 0.37205252, 0.27726358, + // -0.85021532, -0.16634122, -0.58148408, 0.06963744 + VLOAD_32(v12, 0xbe20e41a, 0x3f54b4d4, 0x3f133971, 0x3f5a88f6, 0xbf778aa8, + 0x3f36b43c, 0x3e6e66a4, 0xbf2d9626, 0x3f4b2bab, 0x3f20cffc, + 0x3ebe7dab, 0x3e8df57e, 0xbf59a7b6, 0xbe2a555a, 0xbf14dc24, + 0x3d8e9e13); + // -0.63061494, 0.57643133, 0.08198822, -0.06029604, + // -0.84276563, 0.00681775, 0.30881208, 0.27571887, + // 0.12349209, 0.29805747, -0.55497122, -0.52685922, 0.82809180, + // -0.83231467, 0.20959182, 0.15603130 + VLOAD_32(v4, 0xbf216ffb, 0x3f139101, 0x3da7e970, 0xbd76f8fa, 0xbf57bf7d, + 0x3bdf676d, 0x3e9e1c9e, 0x3e8d2b06, 0x3dfce96c, 0x3e989afd, + 0xbf0e1298, 0xbf06e03f, 0x3f53fdd3, 0xbf551293, 0x3e569f3d, + 0x3e1fc6ab); + asm volatile("vfmadd.vv v4, v8, v12"); + // -0.02697416, 0.46587816, 0.60858786, 0.81825721, + // -0.75176322, 0.71218413, 0.52945113, -0.44224855, + // 0.88533098, 0.36834168, 0.37753561, 0.55415976, + // -0.63396782, 0.42716417, -0.41602641, 0.14560261 + VCMP_U32(2, v4, 0xbcdcf8e5, 0x3eee8795, 0x3f1bcc6a, 0x3f51794e, 0xbf40738e, + 0x3f3651b3, 0x3f078a1b, 0xbee26e67, 0x3f62a50d, 0x3ebc9748, + 0x3ec14c59, 0x3f0ddd6a, 0xbf224bb7, 0x3edab544, 0xbed5016a, + 0x3e1518d9); + + VSET(16, e64, m8); + // 0.0308264568094008, 0.5865382185158325, 0.4543411851187289, + // 0.0036656924511687, -0.3103508259554966, 0.9658177901158624, + // -0.3381631341283657, -0.2003719333831677, 0.8989532087589025, + // -0.8054516243685412, 0.8701363884969631, + // -0.3585976675814562, 0.4150155349314333, + // -0.6908185611649824, 0.8412555125501906, -0.3357469205066645 + VLOAD_64(v16, 0x3f9f90f87f644880, 0x3fe2c4ebcc4c25b4, 0x3fdd13ed0cd3e484, + 0x3f6e0783a63d2400, 0xbfd3dcc9b5f0fd10, 0x3feee7fab5ce29f4, + 0xbfd5a476fc72d40c, 0xbfc9a5c99a756020, 0x3fecc43985081eb2, + 0xbfe9c6427c2588e6, 0x3febd8284474eda0, 0xbfd6f343a1abca7c, + 0x3fda8f9d51773268, 0xbfe61b2f86f58c4a, 0x3feaeb90ae3f72fc, + 0xbfd57ce0a6d3c3f8); + // 0.5881481456806663, -0.9882550591195853, -0.8483939717953815, + // -0.9684864200393222, -0.6743741213041285, 0.4372709704288931, + // -0.8339944484196176, -0.1519222509233684, + // -0.2540075520951832, 0.6661048539265222, 0.3013290199421905, + // -0.0367795249610035, -0.7178804756969177, 0.1577316726139908, + // -0.1242681642824526, -0.9006297759672148 + VLOAD_64(v24, 0x3fe2d21c0f5cd922, 0xbfef9fc912e0ce28, 0xbfeb260b1d5f82be, + 0xbfeefdd73b960c5a, 0xbfe594790988a396, 0x3fdbfc3f615edda8, + 0xbfeab01520204008, 0xbfc3723035a012c8, 0xbfd041a8e44be49c, + 0x3fe550bb206a47d8, 0x3fd348f9837f3238, 0xbfa2d4c411bd66e0, + 0xbfe6f8e079b1c412, 0x3fc4308d2bb5d1f0, 0xbfbfd009d586ef50, + 0xbfecd1f58932a7e4); + // -0.8344616273245185, 0.7077884806720691, -0.1882041492960900, + // -0.2751607560371576, 0.2338395078923734, -0.9938305657796487, + // -0.5345602642671559, 0.0887204597208056, 0.3045224871958914, + // -0.3946645040604191, 0.6818539464440989, 0.9719861381061521, + // -0.8471643748461517, 0.8077493118513845, 0.2789872574353331, + // 0.7073875082318823 + VLOAD_64(v8, 0xbfeab3e8dee4061e, 0x3fe6a634071f1b28, 0xbfc81712d5195ee0, + 0xbfd19c3bdc149f5c, 0x3fcdee73f7748a88, 0xbfefcd75c2393d96, + 0xbfe11b1e209897fa, 0x3fb6b66250fca870, 0x3fd37d4be2d9c9a4, + 0xbfd9422ee8753844, 0x3fe5d1bf5e1407b4, 0x3fef1a82ac6a99b4, + 0xbfeb1bf876899dc0, 0x3fe9d91515b8951c, 0x3fd1daed5eabdf0c, + 0x3fe6a2eb20ae8e42); + asm volatile("vfmadd.vv v8, v16, v24"); + // 0.5624246503668447, -0.5731100645801621, -0.9339028680308291, + // -0.9694950747455855, -0.7469464057195535, + // -0.5225882703620045, -0.6532258740745487, + // -0.1696993409682697, 0.0197439149088051, 0.9839880198025914, + // 0.8946349503834604, -0.3853314870073767, -1.0694668518985466, + // -0.4002765447811873, 0.1104314039662806, -1.1381329534609521 + VCMP_U64(3, v8, 0x3fe1ff61faf9464f, 0xbfe256eaeb0c2af6, 0xbfede288447aa80f, + 0xbfef061a88f54aac, 0xbfe7e6fc260dc471, 0xbfe0b90b094f4be7, + 0xbfe4e739f2c1a370, 0xbfc5b8b53fce44b1, 0x3f9437bfb3503463, + 0x3fef7cd47196a75e, 0x3feca0d979b82d6d, 0xbfd8a94565b434f4, + 0xbff11c894610f720, 0xbfd99e21834c3aa4, 0x3fbc453b847c8ddf, + 0xbff235cae659d5ba); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.3501, -0.3289, -0.8853, -0.4082, -0.4346, -0.2659, 0.9316, + // 0.5444, -0.0538, 0.7686, 0.8203, -0.8623, 0.3059, 0.0372, + // 0.5337, -0.5815 + VLOAD_16(v4, 0x359a, 0xb543, 0xbb15, 0xb688, 0xb6f4, 0xb441, 0x3b74, 0x385b, + 0xaae4, 0x3a26, 0x3a90, 0xbae6, 0x34e5, 0x28c4, 0x3845, 0xb8a7); + // -0.8105, 0.5000, -0.8374, -0.8394, 0.3098, 0.1328, -0.2864, + // -0.4041, -0.1729, 0.0196, 0.2739, 0.8071, -0.1553, 0.2815, + // -0.9067, -0.2495 + VLOAD_16(v6, 0xba7c, 0x3800, 0xbab3, 0xbab7, 0x34f5, 0x3040, 0xb495, 0xb677, + 0xb188, 0x2502, 0x3462, 0x3a75, 0xb0f8, 0x3481, 0xbb41, 0xb3fc); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.6558, -0.1006, 0.4558, -0.0784, 0.1539, 0.6748, 0.3347, + // -0.3416, 0.0614, 0.2289, -0.0829, 0.3838, -0.6348, 0.0843, + // -0.6890, -0.2598 + VLOAD_16(v2, 0xb93f, 0xae71, 0x374b, 0xad05, 0x30ed, 0x3966, 0x355b, 0xb577, + 0x2bdc, 0x3353, 0xad4f, 0x3624, 0xb914, 0x2d65, 0xb983, 0xb428); + asm volatile("vfmadd.vv v2, v4, v6, v0.t"); + VCMP_U16(4, v2, 0xb93f, 0x3844, 0x374b, 0xba75, 0x30ed, 0xa9f7, 0x355b, + 0xb8b8, 0x2bdc, 0x3241, 0xad4f, 0x379e, 0xb914, 0x348e, 0xb983, + 0xae4d); + + VSET(16, e32, m4); + // -0.20637949, -0.63321692, 0.40850523, 0.58702314, + // -0.25534528, -0.22053087, 0.96057665, 0.85530519, + // 0.74252450, -0.87175107, -0.00987994, -0.52556008, 0.26113954, + // -0.71307814, 0.78942811, 0.48685852 + VLOAD_32(v8, 0xbe535525, 0xbf221a81, 0x3ed12799, 0x3f164726, 0xbe82bc9e, + 0xbe61d2d8, 0x3f75e85a, 0x3f5af548, 0x3f3e1616, 0xbf5f2b14, + 0xbc21df78, 0xbf068b1b, 0x3e85b415, 0xbf368c4a, 0x3f4a17f6, + 0x3ef94585); + // -0.15712014, 0.83088422, 0.57509524, 0.85365236, + // -0.96695948, 0.71368766, 0.23281342, -0.67807233, + // 0.79363507, 0.62817359, 0.37205252, 0.27726358, + // -0.85021532, -0.16634122, -0.58148408, 0.06963744 + VLOAD_32(v12, 0xbe20e41a, 0x3f54b4d4, 0x3f133971, 0x3f5a88f6, 0xbf778aa8, + 0x3f36b43c, 0x3e6e66a4, 0xbf2d9626, 0x3f4b2bab, 0x3f20cffc, + 0x3ebe7dab, 0x3e8df57e, 0xbf59a7b6, 0xbe2a555a, 0xbf14dc24, + 0x3d8e9e13); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.63061494, 0.57643133, 0.08198822, -0.06029604, + // -0.84276563, 0.00681775, 0.30881208, 0.27571887, + // 0.12349209, 0.29805747, -0.55497122, -0.52685922, 0.82809180, + // -0.83231467, 0.20959182, 0.15603130 + VLOAD_32(v4, 0xbf216ffb, 0x3f139101, 0x3da7e970, 0xbd76f8fa, 0xbf57bf7d, + 0x3bdf676d, 0x3e9e1c9e, 0x3e8d2b06, 0x3dfce96c, 0x3e989afd, + 0xbf0e1298, 0xbf06e03f, 0x3f53fdd3, 0xbf551293, 0x3e569f3d, + 0x3e1fc6ab); + asm volatile("vfmadd.vv v4, v8, v12, v0.t"); + VCMP_U32(5, v4, 0xbf216ffb, 0x3eee8795, 0x3da7e970, 0x3f51794e, 0xbf57bf7d, + 0x3f3651b3, 0x3e9e1c9e, 0xbee26e67, 0x3dfce96c, 0x3ebc9748, + 0xbf0e1298, 0x3f0ddd6a, 0x3f53fdd3, 0x3edab544, 0x3e569f3d, + 0x3e1518d9); + + VSET(16, e64, m8); + // 0.0308264568094008, 0.5865382185158325, 0.4543411851187289, + // 0.0036656924511687, -0.3103508259554966, 0.9658177901158624, + // -0.3381631341283657, -0.2003719333831677, 0.8989532087589025, + // -0.8054516243685412, 0.8701363884969631, + // -0.3585976675814562, 0.4150155349314333, + // -0.6908185611649824, 0.8412555125501906, -0.3357469205066645 + VLOAD_64(v16, 0x3f9f90f87f644880, 0x3fe2c4ebcc4c25b4, 0x3fdd13ed0cd3e484, + 0x3f6e0783a63d2400, 0xbfd3dcc9b5f0fd10, 0x3feee7fab5ce29f4, + 0xbfd5a476fc72d40c, 0xbfc9a5c99a756020, 0x3fecc43985081eb2, + 0xbfe9c6427c2588e6, 0x3febd8284474eda0, 0xbfd6f343a1abca7c, + 0x3fda8f9d51773268, 0xbfe61b2f86f58c4a, 0x3feaeb90ae3f72fc, + 0xbfd57ce0a6d3c3f8); + // 0.5881481456806663, -0.9882550591195853, -0.8483939717953815, + // -0.9684864200393222, -0.6743741213041285, 0.4372709704288931, + // -0.8339944484196176, -0.1519222509233684, + // -0.2540075520951832, 0.6661048539265222, 0.3013290199421905, + // -0.0367795249610035, -0.7178804756969177, 0.1577316726139908, + // -0.1242681642824526, -0.9006297759672148 + VLOAD_64(v24, 0x3fe2d21c0f5cd922, 0xbfef9fc912e0ce28, 0xbfeb260b1d5f82be, + 0xbfeefdd73b960c5a, 0xbfe594790988a396, 0x3fdbfc3f615edda8, + 0xbfeab01520204008, 0xbfc3723035a012c8, 0xbfd041a8e44be49c, + 0x3fe550bb206a47d8, 0x3fd348f9837f3238, 0xbfa2d4c411bd66e0, + 0xbfe6f8e079b1c412, 0x3fc4308d2bb5d1f0, 0xbfbfd009d586ef50, + 0xbfecd1f58932a7e4); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.8344616273245185, 0.7077884806720691, -0.1882041492960900, + // -0.2751607560371576, 0.2338395078923734, -0.9938305657796487, + // -0.5345602642671559, 0.0887204597208056, 0.3045224871958914, + // -0.3946645040604191, 0.6818539464440989, 0.9719861381061521, + // -0.8471643748461517, 0.8077493118513845, 0.2789872574353331, + // 0.7073875082318823 + VLOAD_64(v8, 0xbfeab3e8dee4061e, 0x3fe6a634071f1b28, 0xbfc81712d5195ee0, + 0xbfd19c3bdc149f5c, 0x3fcdee73f7748a88, 0xbfefcd75c2393d96, + 0xbfe11b1e209897fa, 0x3fb6b66250fca870, 0x3fd37d4be2d9c9a4, + 0xbfd9422ee8753844, 0x3fe5d1bf5e1407b4, 0x3fef1a82ac6a99b4, + 0xbfeb1bf876899dc0, 0x3fe9d91515b8951c, 0x3fd1daed5eabdf0c, + 0x3fe6a2eb20ae8e42); + asm volatile("vfmadd.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, -0.5731100645801621, 0.0000000000000000, + // -0.9694950747455855, 0.0000000000000000, + // -0.5225882703620045, 0.0000000000000000, + // -0.1696993409682697, 0.0000000000000000, 0.9839880198025914, + // 0.0000000000000000, -0.3853314870073767, 0.0000000000000000, + // -0.4002765447811873, 0.0000000000000000, -1.1381329534609521 + VCMP_U64(6, v8, 0xbfeab3e8dee4061e, 0xbfe256eaeb0c2af6, 0xbfc81712d5195ee0, + 0xbfef061a88f54aac, 0x3fcdee73f7748a88, 0xbfe0b90b094f4be7, + 0xbfe11b1e209897fa, 0xbfc5b8b53fce44b1, 0x3fd37d4be2d9c9a4, + 0x3fef7cd47196a75e, 0x3fe5d1bf5e1407b4, 0xbfd8a94565b434f4, + 0xbfeb1bf876899dc0, 0xbfd99e21834c3aa4, 0x3fd1daed5eabdf0c, + 0xbff235cae659d5ba); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.6299 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x390a); + // -0.5352, 0.1115, 0.9541, -0.8857, -0.4143, 0.4045, 0.2949, + // -0.5479, 0.6733, 0.8965, 0.8882, 0.6294, 0.7568, 0.8735, + // -0.8569, 0.8271 + VLOAD_16(v4, 0xb848, 0x2f23, 0x3ba2, 0xbb16, 0xb6a1, 0x3679, 0x34b8, 0xb862, + 0x3963, 0x3b2c, 0x3b1b, 0x3909, 0x3a0e, 0x3afd, 0xbadb, 0x3a9e); + // 0.2844, 0.1008, 0.3777, 0.9790, -0.8613, 0.4951, 0.4126, + // 0.5518, -0.6680, -0.8340, 0.2094, 0.5884, -0.6509, -0.9360, + // -0.1609, -0.2527 + VLOAD_16(v2, 0x348d, 0x2e74, 0x360b, 0x3bd5, 0xbae4, 0x37ec, 0x369a, 0x386a, + 0xb958, 0xbaac, 0x32b3, 0x38b5, 0xb935, 0xbb7d, 0xb126, 0xb40b); + asm volatile("vfmadd.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.3560, 0.1750, 1.1924, -0.2690, -0.9570, 0.7163, 0.5547, + // -0.2002, 0.2527, 0.3711, 1.0195, 1.0000, 0.3469, 0.2842, + // -0.9580, 0.6680 + VCMP_U16(7, v2, 0xb5b2, 0x319a, 0x3cc5, 0xb44e, 0xbba8, 0x39bb, 0x3870, + 0xb269, 0x340b, 0x35f0, 0x3c15, 0x3c00, 0x358d, 0x348b, 0xbbab, + 0x3958); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80368215 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4dbe1d); + // 0.13072050, -0.19741143, 0.09370349, 0.41049519, + // -0.69910282, -0.90573430, 0.86481184, 0.33341369, + // 0.30657578, -0.90526944, -0.97891974, -0.50830764, + // 0.79750061, 0.96885878, 0.48752418, 0.64305341 + VLOAD_32(v8, 0x3e05db98, 0xbe4a2639, 0x3dbfe79e, 0x3ed22c6d, 0xbf32f867, + 0xbf67de34, 0x3f5d644f, 0x3eaab533, 0x3e9cf780, 0xbf67bfbd, + 0xbf7a9a7c, 0xbf022073, 0x3f4c2900, 0x3f780721, 0x3ef99cc5, + 0x3f249f26); + // -0.61117887, 0.81778014, -0.46267223, -0.30897874, + // -0.84296966, 0.50125730, 0.96147668, 0.65802389, + // 0.19629262, -0.73197508, -0.06948850, -0.60436314, + // -0.80817568, 0.72047287, -0.78180677, -0.40237895 + VLOAD_32(v4, 0xbf1c7638, 0x3f515a0a, 0xbeece360, 0xbe9e3276, 0xbf57ccdc, + 0x3f005266, 0x3f762356, 0x3f287441, 0x3e4900ef, 0xbf3b62b8, + 0xbd8e4ffc, 0xbf1ab78b, 0xbf4ee49a, 0x3f3870e9, 0xbf48247d, + 0xbece049d); + asm volatile("vfmadd.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // -0.36047307, 0.45982391, -0.27813792, 0.16217449, + // -1.37658250, -0.50288272, 1.63753343, 0.86225569, + // 0.46433264, -1.49354482, -1.03476644, -0.99402350, 0.14798427, + // 1.54788995, -0.14079997, 0.31966865 + VCMP_U32(8, v4, 0xbeb88fed, 0x3eeb6e09, 0xbe8e6818, 0x3e261112, 0xbfb033db, + 0xbf00bced, 0x3fd19ab2, 0x3f5cbccb, 0x3eedbd02, 0xbfbf2c79, + 0xbf84733a, 0xbf7e7853, 0x3e17892e, 0x3fc62142, 0xbe102ddd, + 0x3ea3ab9c); + + VSET(16, e64, m8); + double dscalar_64; + // 0.5717861827636179 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe24c128968c808); + // -0.0978999279373105, -0.8066508697961206, 0.1001298330116740, + // 0.6183499729699258, -0.7091158569428311, 0.4713309006442494, + // -0.9363173157138223, 0.6720957973877764, + // -0.1684678230133414, -0.0206783343753454, + // -0.6941474840922310, 0.4809970389518419, + // -0.4671263490725479, 0.5176516826232249, + // -0.9714116214357187, 0.0212574845134876 + VLOAD_64(v16, 0xbfb90ff83cc58160, 0xbfe9d0157c220eae, 0x3fb9a21bd6239ad0, + 0x3fe3c985e1ec6d14, 0xbfe6b113bcd49f88, 0x3fde2a4914f71f28, + 0xbfedf64fbb356b82, 0x3fe581cf0bb1e7c4, 0xbfc5905a8722a398, + 0xbf952cb38782ee00, 0xbfe63674c8d8dba6, 0x3fdec8a7cdf1580c, + 0xbfdde565ea17744c, 0x3fe0909a42f2c184, 0xbfef15cdd320a9f0, + 0x3f95c485a31f6440); + // -0.8599787754583945, -0.8609648323347547, + // -0.5642848553711928, 0.8958493001041692, 0.3661374487395561, + // 0.1017901385891375, 0.2298954297904690, -0.4970717320718749, + // -0.5860844501192310, -0.6581386742527398, + // -0.8379133193505066, -0.6497652150347000, + // -0.4444119628309799, -0.8810041425660891, 0.4421772814931029, + // 0.0606105644967410 + VLOAD_64(v8, 0xbfeb84f2357b2242, 0xbfeb8d061ebc48de, 0xbfe20e9f1cee50cc, + 0x3fecaacc26c2f0d0, 0x3fd76ecbc40b5864, 0x3fba0eeb244baff0, + 0x3fcd6d36a4399740, 0xbfdfd005f440f21c, 0xbfe2c1342d3e9986, + 0xbfe50f78d644befe, 0xbfead02f97efc88a, 0xbfe4cae06b94e0d2, + 0xbfdc713edf93dac0, 0xbfec312f997ec7ac, 0x3fdc4ca1f0c30314, + 0x3faf0859109c77c0); + asm volatile("vfmadd.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // -0.5896239092143964, -1.2989386647705283, -0.2225204504323405, + // 1.1305842246079476, -0.4997635227612305, 0.5295330954311120, + // -0.8048662854791286, 0.3878770491466992, -0.5035828135241305, + // -0.3969929346554276, -1.1732547424504496, 0.1094702669545694, + // -0.7212349688741608, 0.0139056869464266, -0.7185807615459836, + // 0.0559137678222272 + VCMP_U64(9, v8, 0xbfe2de32f5e07f06, 0xbff4c873e8cb3071, 0xbfcc7b8cd4a627de, + 0x3ff216df7be108d1, 0xbfdffc20247f9130, 0x3fe0f1ef63e0d73b, + 0xbfe9c176f0b925be, 0x3fd8d2fa423d0f48, 0xbfe01d59b45d3de9, + 0xbfd968550dc5a3de, 0xbff2c5a6c3cb39b0, 0x3fbc063e50744ab8, + 0xbfe7145b5b8179a4, 0x3f8c7a95b54e78da, 0xbfe6fe9d14cbfe26, + 0x3faca0bab8629cdd); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.6299 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x390a); + // -0.5352, 0.1115, 0.9541, -0.8857, -0.4143, 0.4045, 0.2949, + // -0.5479, 0.6733, 0.8965, 0.8882, 0.6294, 0.7568, 0.8735, + // -0.8569, 0.8271 + VLOAD_16(v4, 0xb848, 0x2f23, 0x3ba2, 0xbb16, 0xb6a1, 0x3679, 0x34b8, 0xb862, + 0x3963, 0x3b2c, 0x3b1b, 0x3909, 0x3a0e, 0x3afd, 0xbadb, 0x3a9e); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.2844, 0.1008, 0.3777, 0.9790, -0.8613, 0.4951, 0.4126, + // 0.5518, -0.6680, -0.8340, 0.2094, 0.5884, -0.6509, + // -0.9360, -0.1609, -0.2527 + VLOAD_16(v2, 0x348d, 0x2e74, 0x360b, 0x3bd5, 0xbae4, 0x37ec, 0x369a, 0x386a, + 0xb958, 0xbaac, 0x32b3, 0x38b5, 0xb935, 0xbb7d, 0xb126, 0xb40b); + asm volatile("vfmadd.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + VCMP_U16(10, v2, 0x348d, 0x319a, 0x360b, 0xb44e, 0xbae4, 0x39bb, 0x369a, + 0xb269, 0xb958, 0x35f0, 0x32b3, 0x3c00, 0xb935, 0x348b, 0xb126, + 0x3958); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80368215 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4dbe1d); + // 0.13072050, -0.19741143, 0.09370349, 0.41049519, + // -0.69910282, -0.90573430, 0.86481184, 0.33341369, + // 0.30657578, -0.90526944, -0.97891974, -0.50830764, + // 0.79750061, 0.96885878, 0.48752418, 0.64305341 + VLOAD_32(v8, 0x3e05db98, 0xbe4a2639, 0x3dbfe79e, 0x3ed22c6d, 0xbf32f867, + 0xbf67de34, 0x3f5d644f, 0x3eaab533, 0x3e9cf780, 0xbf67bfbd, + 0xbf7a9a7c, 0xbf022073, 0x3f4c2900, 0x3f780721, 0x3ef99cc5, + 0x3f249f26); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.61117887, 0.81778014, -0.46267223, -0.30897874, + // -0.84296966, 0.50125730, 0.96147668, 0.65802389, + // 0.19629262, -0.73197508, -0.06948850, -0.60436314, + // -0.80817568, 0.72047287, -0.78180677, -0.40237895 + VLOAD_32(v4, 0xbf1c7638, 0x3f515a0a, 0xbeece360, 0xbe9e3276, 0xbf57ccdc, + 0x3f005266, 0x3f762356, 0x3f287441, 0x3e4900ef, 0xbf3b62b8, + 0xbd8e4ffc, 0xbf1ab78b, 0xbf4ee49a, 0x3f3870e9, 0xbf48247d, + 0xbece049d); + asm volatile("vfmadd.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + VCMP_U32(11, v4, 0xbf1c7638, 0x3eeb6e09, 0xbeece360, 0x3e261112, 0xbf57ccdc, + 0xbf00bced, 0x3f762356, 0x3f5cbccb, 0x3e4900ef, 0xbfbf2c79, + 0xbd8e4ffc, 0xbf7e7853, 0xbf4ee49a, 0x3fc62142, 0xbf48247d, + 0x3ea3ab9c); + + VSET(16, e64, m8); + double dscalar_64; + // 0.5717861827636179 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe24c128968c808); + // -0.0978999279373105, -0.8066508697961206, + // 0.1001298330116740, 0.6183499729699258, + // -0.7091158569428311, 0.4713309006442494, + // -0.9363173157138223, 0.6720957973877764, + // -0.1684678230133414, -0.0206783343753454, + // -0.6941474840922310, 0.4809970389518419, + // -0.4671263490725479, 0.5176516826232249, + // -0.9714116214357187, 0.0212574845134876 + VLOAD_64(v16, 0xbfb90ff83cc58160, 0xbfe9d0157c220eae, 0x3fb9a21bd6239ad0, + 0x3fe3c985e1ec6d14, 0xbfe6b113bcd49f88, 0x3fde2a4914f71f28, + 0xbfedf64fbb356b82, 0x3fe581cf0bb1e7c4, 0xbfc5905a8722a398, + 0xbf952cb38782ee00, 0xbfe63674c8d8dba6, 0x3fdec8a7cdf1580c, + 0xbfdde565ea17744c, 0x3fe0909a42f2c184, 0xbfef15cdd320a9f0, + 0x3f95c485a31f6440); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.8599787754583945, -0.8609648323347547, + // -0.5642848553711928, 0.8958493001041692, + // 0.3661374487395561, 0.1017901385891375, 0.2298954297904690, + // -0.4970717320718749, -0.5860844501192310, + // -0.6581386742527398, -0.8379133193505066, + // -0.6497652150347000, -0.4444119628309799, + // -0.8810041425660891, 0.4421772814931029, 0.0606105644967410 + VLOAD_64(v8, 0xbfeb84f2357b2242, 0xbfeb8d061ebc48de, 0xbfe20e9f1cee50cc, + 0x3fecaacc26c2f0d0, 0x3fd76ecbc40b5864, 0x3fba0eeb244baff0, + 0x3fcd6d36a4399740, 0xbfdfd005f440f21c, 0xbfe2c1342d3e9986, + 0xbfe50f78d644befe, 0xbfead02f97efc88a, 0xbfe4cae06b94e0d2, + 0xbfdc713edf93dac0, 0xbfec312f997ec7ac, 0x3fdc4ca1f0c30314, + 0x3faf0859109c77c0); + asm volatile("vfmadd.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + VCMP_U64(12, v8, 0xbfeb84f2357b2242, 0xbff4c873e8cb3071, 0xbfe20e9f1cee50cc, + 0x3ff216df7be108d1, 0x3fd76ecbc40b5864, 0x3fe0f1ef63e0d73b, + 0x3fcd6d36a4399740, 0x3fd8d2fa423d0f48, 0xbfe2c1342d3e9986, + 0xbfd968550dc5a3de, 0xbfead02f97efc88a, 0x3fbc063e50744ab8, + 0xbfdc713edf93dac0, 0x3f8c7a95b54e78da, 0x3fdc4ca1f0c30314, + 0x3faca0bab8629cdd); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmax.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmax.c new file mode 100644 index 000000000..8612260a8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmax.c @@ -0,0 +1,351 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.0445, -0.4812, 0.5732, 0.0634, 0.2072, -0.6416, 0.7759, + // -0.0042, 0.6138, 0.7847, -0.5337, 0.3455, 0.0304, 0.7920, + // 0.8179, -0.5659 + VLOAD_16(v4, 0x29b3, 0xb7b3, 0x3896, 0x2c0f, 0x32a1, 0xb922, 0x3a35, 0x9c4d, + 0x38e9, 0x3a47, 0xb845, 0x3587, 0x27ca, 0x3a56, 0x3a8b, 0xb887); + // 0.6426, -0.4099, -0.1183, 0.2915, 0.5972, -0.1932, -0.0265, + // -0.5913, -0.8560, 0.5029, -0.8975, -0.7373, 0.3701, 0.9546, + // -0.2671, -0.6855 + VLOAD_16(v6, 0x3924, 0xb68f, 0xaf93, 0x34aa, 0x38c7, 0xb22f, 0xa6c7, 0xb8bb, + 0xbad9, 0x3806, 0xbb2e, 0xb9e6, 0x35ec, 0x3ba3, 0xb446, 0xb97c); + asm volatile("vfmax.vv v2, v4, v6"); + // 0.6426, -0.4099, 0.5732, 0.2915, 0.5972, -0.1932, 0.7759, + // -0.0042, 0.6138, 0.7847, -0.5337, 0.3455, 0.3701, 0.9546, + // 0.8179, -0.5659 + VCMP_U16(1, v2, 0x3924, 0xb68f, 0x3896, 0x34aa, 0x38c7, 0xb22f, 0x3a35, + 0x9c4d, 0x38e9, 0x3a47, 0xb845, 0x3587, 0x35ec, 0x3ba3, 0x3a8b, + 0xb887); + + VSET(16, e32, m4); + // -0.19589283, 0.64597517, -0.09556163, 0.96582597, + // 0.93413597, 0.78331935, -0.18831402, -0.29520443, 0.09486515, + // 0.96548969, 0.74523991, 0.81442171, 0.25644442, + // -0.92091519, 0.25139943, -0.77403748 + VLOAD_32(v8, 0xbe489821, 0x3f255ea1, 0xbdc3b5d1, 0x3f77405f, 0x3f6f2389, + 0x3f48879e, 0xbe40d564, 0xbe972509, 0x3dc248a9, 0x3f772a55, + 0x3f3ec80b, 0x3f507df1, 0x3e834caf, 0xbf6bc119, 0x3e80b76d, + 0xbf462752); + // -0.58921623, 0.69345474, 0.64817399, -0.00869324, + // 0.15872470, -0.17028977, -0.99863762, -0.02739566, + // -0.08060763, 0.73060948, 0.62843031, 0.68798363, + // -0.35207590, 0.01353026, 0.25345275, -0.93635505 + VLOAD_32(v12, 0xbf16d6e0, 0x3f318640, 0x3f25eebb, 0xbc0e6e1c, 0x3e2288ba, + 0xbe2e6071, 0xbf7fa6b7, 0xbce06cdd, 0xbda5159d, 0x3f3b0939, + 0x3f20e0cf, 0x3f301fb2, 0xbeb4434b, 0x3c5dae02, 0x3e81c48f, + 0xbf6fb4f7); + asm volatile("vfmax.vv v4, v8, v12"); + // -0.19589283, 0.69345474, 0.64817399, 0.96582597, + // 0.93413597, 0.78331935, -0.18831402, -0.02739566, 0.09486515, + // 0.96548969, 0.74523991, 0.81442171, 0.25644442, 0.01353026, + // 0.25345275, -0.77403748 + VCMP_U32(2, v4, 0xbe489821, 0x3f318640, 0x3f25eebb, 0x3f77405f, 0x3f6f2389, + 0x3f48879e, 0xbe40d564, 0xbce06cdd, 0x3dc248a9, 0x3f772a55, + 0x3f3ec80b, 0x3f507df1, 0x3e834caf, 0x3c5dae02, 0x3e81c48f, + 0xbf462752); + + VSET(16, e64, m8); + // -0.4061329687298849, -0.2985478109200665, 0.0070087316277823, + // -0.2169778494878496, -0.8530745559533048, -0.1247477743553222, + // 0.5680045000966327, 0.9515829310663801, -0.9797693611753244, + // 0.0055288881366042, 0.3717566019240965, 0.0982171502328268, + // -0.1563664923399100, 0.9555697921812856, 0.4810293698835877, + // -0.1835757691555060 + VLOAD_64(v16, 0xbfd9fe1522a16c7c, 0xbfd31b68470c6bc4, 0x3f7cb530120b5400, + 0xbfcbc5ee1fc0dc58, 0xbfeb4c6302dbd036, 0xbfbfef785b1ada80, + 0x3fe22d17c5fcaaf0, 0x3fee735e0c0b94e4, 0xbfef5a45467bddd8, + 0x3f76a5759bade800, 0x3fd7cadc33d5826c, 0x3fb924c2582803f0, + 0xbfc403d135652390, 0x3fee940719ceda38, 0x3fdec92f69043118, + 0xbfc77f692a6e3368); + // -0.5461826062085420, -0.4431702866722571, -0.7458438472286320, + // -0.8611805160192025, 0.5288841839862100, 0.4836992661145783, + // -0.5942889927274901, 0.5287333894552471, 0.3093279352228719, + // -0.5415645292681506, 0.0094485111801912, -0.2151605186231076, + // -0.0785069829906857, 0.6345480854408712, 0.4658290296396683, + // -0.5143497066150833 + VLOAD_64(v24, 0xbfe17a53f1e9e958, 0xbfdc5ce6e7f43e14, 0xbfe7ddf3ea78a228, + 0xbfeb8eca710827f8, 0x3fe0ec9e8632f518, 0x3fdef4edc443ec94, + 0xbfe3046a59846530, 0x3fe0eb6249006ebc, 0x3fd3cc0765615f4c, + 0xbfe1547f22bc2bc2, 0x3f8359bdb41e5580, 0xbfcb8a613f7035f0, + 0xbfb419089c73df20, 0x3fe44e37c956a792, 0x3fddd0248ff51b48, + 0xbfe0758d8413ceaa); + asm volatile("vfmax.vv v8, v16, v24"); + // -0.4061329687298849, -0.2985478109200665, 0.0070087316277823, + // -0.2169778494878496, 0.5288841839862100, 0.4836992661145783, + // 0.5680045000966327, 0.9515829310663801, 0.3093279352228719, + // 0.0055288881366042, 0.3717566019240965, 0.0982171502328268, + // -0.0785069829906857, 0.9555697921812856, 0.4810293698835877, + // -0.1835757691555060 + VCMP_U64(3, v8, 0xbfd9fe1522a16c7c, 0xbfd31b68470c6bc4, 0x3f7cb530120b5400, + 0xbfcbc5ee1fc0dc58, 0x3fe0ec9e8632f518, 0x3fdef4edc443ec94, + 0x3fe22d17c5fcaaf0, 0x3fee735e0c0b94e4, 0x3fd3cc0765615f4c, + 0x3f76a5759bade800, 0x3fd7cadc33d5826c, 0x3fb924c2582803f0, + 0xbfb419089c73df20, 0x3fee940719ceda38, 0x3fdec92f69043118, + 0xbfc77f692a6e3368); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.0445, -0.4812, 0.5732, 0.0634, 0.2072, -0.6416, 0.7759, + // -0.0042, 0.6138, 0.7847, -0.5337, 0.3455, 0.0304, 0.7920, + // 0.8179, -0.5659 + VLOAD_16(v4, 0x29b3, 0xb7b3, 0x3896, 0x2c0f, 0x32a1, 0xb922, 0x3a35, 0x9c4d, + 0x38e9, 0x3a47, 0xb845, 0x3587, 0x27ca, 0x3a56, 0x3a8b, 0xb887); + // 0.6426, -0.4099, -0.1183, 0.2915, 0.5972, -0.1932, -0.0265, + // -0.5913, -0.8560, 0.5029, -0.8975, -0.7373, 0.3701, 0.9546, + // -0.2671, -0.6855 + VLOAD_16(v6, 0x3924, 0xb68f, 0xaf93, 0x34aa, 0x38c7, 0xb22f, 0xa6c7, 0xb8bb, + 0xbad9, 0x3806, 0xbb2e, 0xb9e6, 0x35ec, 0x3ba3, 0xb446, 0xb97c); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfmax.vv v2, v4, v6, v0.t"); + // 0.0000, -0.4099, 0.0000, 0.2915, 0.0000, -0.1932, 0.0000, + // -0.0042, 0.0000, 0.7847, 0.0000, 0.3455, 0.0000, 0.9546, + // 0.0000, -0.5659 + VCMP_U16(4, v2, 0x0, 0xb68f, 0x0, 0x34aa, 0x0, 0xb22f, 0x0, 0x9c4d, 0x0, + 0x3a47, 0x0, 0x3587, 0x0, 0x3ba3, 0x0, 0xb887); + + VSET(16, e32, m4); + // -0.19589283, 0.64597517, -0.09556163, 0.96582597, + // 0.93413597, 0.78331935, -0.18831402, -0.29520443, 0.09486515, + // 0.96548969, 0.74523991, 0.81442171, 0.25644442, + // -0.92091519, 0.25139943, -0.77403748 + VLOAD_32(v8, 0xbe489821, 0x3f255ea1, 0xbdc3b5d1, 0x3f77405f, 0x3f6f2389, + 0x3f48879e, 0xbe40d564, 0xbe972509, 0x3dc248a9, 0x3f772a55, + 0x3f3ec80b, 0x3f507df1, 0x3e834caf, 0xbf6bc119, 0x3e80b76d, + 0xbf462752); + // -0.58921623, 0.69345474, 0.64817399, -0.00869324, + // 0.15872470, -0.17028977, -0.99863762, -0.02739566, + // -0.08060763, 0.73060948, 0.62843031, 0.68798363, + // -0.35207590, 0.01353026, 0.25345275, -0.93635505 + VLOAD_32(v12, 0xbf16d6e0, 0x3f318640, 0x3f25eebb, 0xbc0e6e1c, 0x3e2288ba, + 0xbe2e6071, 0xbf7fa6b7, 0xbce06cdd, 0xbda5159d, 0x3f3b0939, + 0x3f20e0cf, 0x3f301fb2, 0xbeb4434b, 0x3c5dae02, 0x3e81c48f, + 0xbf6fb4f7); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfmax.vv v4, v8, v12, v0.t"); + // 0.00000000, 0.69345474, 0.00000000, 0.96582597, + // 0.00000000, 0.78331935, 0.00000000, -0.02739566, + // 0.00000000, 0.96548969, 0.00000000, 0.81442171, + // 0.00000000, 0.01353026, 0.00000000, -0.77403748 + VCMP_U32(5, v4, 0x0, 0x3f318640, 0x0, 0x3f77405f, 0x0, 0x3f48879e, 0x0, + 0xbce06cdd, 0x0, 0x3f772a55, 0x0, 0x3f507df1, 0x0, 0x3c5dae02, 0x0, + 0xbf462752); + + VSET(16, e64, m8); + // -0.4061329687298849, -0.2985478109200665, 0.0070087316277823, + // -0.2169778494878496, -0.8530745559533048, -0.1247477743553222, + // 0.5680045000966327, 0.9515829310663801, -0.9797693611753244, + // 0.0055288881366042, 0.3717566019240965, 0.0982171502328268, + // -0.1563664923399100, 0.9555697921812856, 0.4810293698835877, + // -0.1835757691555060 + VLOAD_64(v16, 0xbfd9fe1522a16c7c, 0xbfd31b68470c6bc4, 0x3f7cb530120b5400, + 0xbfcbc5ee1fc0dc58, 0xbfeb4c6302dbd036, 0xbfbfef785b1ada80, + 0x3fe22d17c5fcaaf0, 0x3fee735e0c0b94e4, 0xbfef5a45467bddd8, + 0x3f76a5759bade800, 0x3fd7cadc33d5826c, 0x3fb924c2582803f0, + 0xbfc403d135652390, 0x3fee940719ceda38, 0x3fdec92f69043118, + 0xbfc77f692a6e3368); + // -0.5461826062085420, -0.4431702866722571, -0.7458438472286320, + // -0.8611805160192025, 0.5288841839862100, 0.4836992661145783, + // -0.5942889927274901, 0.5287333894552471, 0.3093279352228719, + // -0.5415645292681506, 0.0094485111801912, -0.2151605186231076, + // -0.0785069829906857, 0.6345480854408712, 0.4658290296396683, + // -0.5143497066150833 + VLOAD_64(v24, 0xbfe17a53f1e9e958, 0xbfdc5ce6e7f43e14, 0xbfe7ddf3ea78a228, + 0xbfeb8eca710827f8, 0x3fe0ec9e8632f518, 0x3fdef4edc443ec94, + 0xbfe3046a59846530, 0x3fe0eb6249006ebc, 0x3fd3cc0765615f4c, + 0xbfe1547f22bc2bc2, 0x3f8359bdb41e5580, 0xbfcb8a613f7035f0, + 0xbfb419089c73df20, 0x3fe44e37c956a792, 0x3fddd0248ff51b48, + 0xbfe0758d8413ceaa); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfmax.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, -0.2985478109200665, 0.0000000000000000, + // -0.2169778494878496, 0.0000000000000000, 0.4836992661145783, + // 0.0000000000000000, 0.9515829310663801, 0.0000000000000000, + // 0.0055288881366042, 0.0000000000000000, 0.0982171502328268, + // 0.0000000000000000, 0.9555697921812856, 0.0000000000000000, + // -0.1835757691555060 + VCMP_U64(6, v8, 0x0, 0xbfd31b68470c6bc4, 0x0, 0xbfcbc5ee1fc0dc58, 0x0, + 0x3fdef4edc443ec94, 0x0, 0x3fee735e0c0b94e4, 0x0, 0x3f76a5759bade800, + 0x0, 0x3fb924c2582803f0, 0x0, 0x3fee940719ceda38, 0x0, + 0xbfc77f692a6e3368); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.0368 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x28b5); + // -0.5518, 0.6772, 0.2756, 0.4421, 0.2081, 0.6250, 0.4136, + // 0.8203, -0.3535, -0.1597, -0.5244, 0.8696, 0.1744, 0.0793, + // -0.2445, -0.4031 + VLOAD_16(v4, 0xb86a, 0x396b, 0x3469, 0x3713, 0x32a9, 0x3900, 0x369e, 0x3a90, + 0xb5a8, 0xb11c, 0xb832, 0x3af5, 0x3195, 0x2d14, 0xb3d3, 0xb673); + asm volatile("vfmax.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // 0.0368, 0.6772, 0.2756, 0.4421, 0.2081, 0.6250, 0.4136, + // 0.8203, 0.0368, 0.0368, 0.0368, 0.8696, 0.1744, 0.0793, + // 0.0368, 0.0368 + VCMP_U16(7, v2, 0x28b5, 0x396b, 0x3469, 0x3713, 0x32a9, 0x3900, 0x369e, + 0x3a90, 0x28b5, 0x28b5, 0x28b5, 0x3af5, 0x3195, 0x2d14, 0x28b5, + 0x28b5); + + VSET(16, e32, m4); + double dscalar_32; + // -0.94383347 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf719f12); + // 0.51733643, 0.31252080, 0.47358772, 0.13738893, + // 0.11194360, -0.33637357, 0.83680850, 0.95792335, + // 0.41251704, 0.27496886, -0.06774041, -0.19357064, + // -0.48802575, -0.53921199, 0.32722279, 0.28428423 + VLOAD_32(v8, 0x3f047029, 0x3ea002ba, 0x3ef27a17, 0x3e0cafaf, 0x3de542b0, + 0xbeac3928, 0x3f563915, 0x3f753a77, 0x3ed3356f, 0x3e8cc8b8, + 0xbd8abb7c, 0xbe463762, 0xbef9de83, 0xbf0a09cc, 0x3ea789bf, + 0x3e918db4); + asm volatile("vfmax.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // 0.51733643, 0.31252080, 0.47358772, 0.13738893, + // 0.11194360, -0.33637357, 0.83680850, 0.95792335, + // 0.41251704, 0.27496886, -0.06774041, -0.19357064, + // -0.48802575, -0.53921199, 0.32722279, 0.28428423 + VCMP_U32(8, v4, 0x3f047029, 0x3ea002ba, 0x3ef27a17, 0x3e0cafaf, 0x3de542b0, + 0xbeac3928, 0x3f563915, 0x3f753a77, 0x3ed3356f, 0x3e8cc8b8, + 0xbd8abb7c, 0xbe463762, 0xbef9de83, 0xbf0a09cc, 0x3ea789bf, + 0x3e918db4); + + VSET(16, e64, m8); + double dscalar_64; + // -0.8274885128397702 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfea7ac9308eccb6); + // 0.9632225672084347, 0.4671677538923853, + // -0.1749283847947720, -0.0938698612480795, + // 0.3438198935172891, 0.2938331380713377, + // -0.3607699326176230, 0.6841623039857032, + // -0.6959644979744999, 0.4712155929452235, + // 0.1886883982201846, 0.9268486384654282, + // -0.9639662652720637, -0.2101071651393955, + // 0.0859470276611187, -0.7001184217853196 + VLOAD_64(v16, 0x3feed2b8221dbd8e, 0x3fdde613942dab28, 0xbfc6640da5eaf690, + 0xbfb807daf023fbb0, 0x3fd601252797bdcc, 0x3fd2ce29819fd630, + 0xbfd716dac57e4298, 0x3fe5e4a85818c992, 0xbfe6455756bf47f8, + 0x3fde2865724428b0, 0x3fc826f101bec2b8, 0x3feda8be79d1a2f4, + 0xbfeed8cfc7f94e06, 0xbfcae4caa576e8a8, 0x3fb6009fd8fe2f80, + 0xbfe6675ebf9ca482); + asm volatile("vfmax.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // 0.9632225672084347, 0.4671677538923853, -0.1749283847947720, + // -0.0938698612480795, 0.3438198935172891, 0.2938331380713377, + // -0.3607699326176230, 0.6841623039857032, + // -0.6959644979744999, 0.4712155929452235, 0.1886883982201846, + // 0.9268486384654282, -0.8274885128397702, -0.2101071651393955, + // 0.0859470276611187, -0.7001184217853196 + VCMP_U64(9, v8, 0x3feed2b8221dbd8e, 0x3fdde613942dab28, 0xbfc6640da5eaf690, + 0xbfb807daf023fbb0, 0x3fd601252797bdcc, 0x3fd2ce29819fd630, + 0xbfd716dac57e4298, 0x3fe5e4a85818c992, 0xbfe6455756bf47f8, + 0x3fde2865724428b0, 0x3fc826f101bec2b8, 0x3feda8be79d1a2f4, + 0xbfea7ac9308eccb6, 0xbfcae4caa576e8a8, 0x3fb6009fd8fe2f80, + 0xbfe6675ebf9ca482); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.0368 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x28b5); + // -0.5518, 0.6772, 0.2756, 0.4421, 0.2081, 0.6250, 0.4136, + // 0.8203, -0.3535, -0.1597, -0.5244, 0.8696, 0.1744, 0.0793, + // -0.2445, -0.4031 + VLOAD_16(v4, 0xb86a, 0x396b, 0x3469, 0x3713, 0x32a9, 0x3900, 0x369e, 0x3a90, + 0xb5a8, 0xb11c, 0xb832, 0x3af5, 0x3195, 0x2d14, 0xb3d3, 0xb673); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfmax.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.6772, 0.0000, 0.4421, 0.0000, 0.6250, 0.0000, + // 0.8203, 0.0000, 0.0368, 0.0000, 0.8696, 0.0000, 0.0793, + // 0.0000, 0.0368 + VCMP_U16(10, v2, 0x0, 0x396b, 0x0, 0x3713, 0x0, 0x3900, 0x0, 0x3a90, 0x0, + 0x28b5, 0x0, 0x3af5, 0x0, 0x2d14, 0x0, 0x28b5); + + VSET(16, e32, m4); + double dscalar_32; + // -0.94383347 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf719f12); + // 0.51733643, 0.31252080, 0.47358772, 0.13738893, + // 0.11194360, -0.33637357, 0.83680850, 0.95792335, + // 0.41251704, 0.27496886, -0.06774041, -0.19357064, + // -0.48802575, -0.53921199, 0.32722279, 0.28428423 + VLOAD_32(v8, 0x3f047029, 0x3ea002ba, 0x3ef27a17, 0x3e0cafaf, 0x3de542b0, + 0xbeac3928, 0x3f563915, 0x3f753a77, 0x3ed3356f, 0x3e8cc8b8, + 0xbd8abb7c, 0xbe463762, 0xbef9de83, 0xbf0a09cc, 0x3ea789bf, + 0x3e918db4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfmax.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, 0.31252080, 0.00000000, 0.13738893, + // 0.00000000, -0.33637357, 0.00000000, 0.95792335, + // 0.00000000, 0.27496886, 0.00000000, -0.19357064, + // 0.00000000, -0.53921199, 0.00000000, 0.28428423 + VCMP_U32(11, v4, 0x0, 0x3ea002ba, 0x0, 0x3e0cafaf, 0x0, 0xbeac3928, 0x0, + 0x3f753a77, 0x0, 0x3e8cc8b8, 0x0, 0xbe463762, 0x0, 0xbf0a09cc, 0x0, + 0x3e918db4); + + VSET(16, e64, m8); + double dscalar_64; + // -0.8274885128397702 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfea7ac9308eccb6); + // 0.9632225672084347, 0.4671677538923853, + // -0.1749283847947720, -0.0938698612480795, + // 0.3438198935172891, 0.2938331380713377, + // -0.3607699326176230, 0.6841623039857032, + // -0.6959644979744999, 0.4712155929452235, + // 0.1886883982201846, 0.9268486384654282, + // -0.9639662652720637, -0.2101071651393955, + // 0.0859470276611187, -0.7001184217853196 + VLOAD_64(v16, 0x3feed2b8221dbd8e, 0x3fdde613942dab28, 0xbfc6640da5eaf690, + 0xbfb807daf023fbb0, 0x3fd601252797bdcc, 0x3fd2ce29819fd630, + 0xbfd716dac57e4298, 0x3fe5e4a85818c992, 0xbfe6455756bf47f8, + 0x3fde2865724428b0, 0x3fc826f101bec2b8, 0x3feda8be79d1a2f4, + 0xbfeed8cfc7f94e06, 0xbfcae4caa576e8a8, 0x3fb6009fd8fe2f80, + 0xbfe6675ebf9ca482); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfmax.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, 0.4671677538923853, 0.0000000000000000, + // -0.0938698612480795, 0.0000000000000000, + // 0.2938331380713377, 0.0000000000000000, 0.6841623039857032, + // 0.0000000000000000, 0.4712155929452235, 0.0000000000000000, + // 0.9268486384654282, 0.0000000000000000, + // -0.2101071651393955, 0.0000000000000000, + // -0.7001184217853196 + VCMP_U64(12, v8, 0x0, 0x3fdde613942dab28, 0x0, 0xbfb807daf023fbb0, 0x0, + 0x3fd2ce29819fd630, 0x0, 0x3fe5e4a85818c992, 0x0, 0x3fde2865724428b0, + 0x0, 0x3feda8be79d1a2f4, 0x0, 0xbfcae4caa576e8a8, 0x0, + 0xbfe6675ebf9ca482); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmerge.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmerge.c new file mode 100644 index 000000000..7188a6008 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmerge.c @@ -0,0 +1,94 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.1481, -0.1797, -0.5454, 0.3228, 0.3237, -0.7212, -0.5195, + // -0.4500, 0.2681, 0.7300, 0.5059, 0.5830, 0.3198, -0.1713, + // -0.6431, 0.4841 + VLOAD_16(v4, 0xb0bd, 0xb1c0, 0xb85d, 0x352a, 0x352e, 0xb9c5, 0xb828, 0xb733, + 0x344a, 0x39d7, 0x380c, 0x38aa, 0x351e, 0xb17b, 0xb925, 0x37bf); + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + VLOAD_8(v0, 0x0F, 0xAA); + asm volatile("vfmerge.vfm v2, v4, %[A], v0" ::[A] "f"(dscalar_16)); + // -0.9380, -0.9380, -0.9380, -0.9380, 0.3237, -0.7212, + // -0.5195, -0.4500, 0.2681, -0.9380, 0.5059, -0.9380, 0.3198, + // -0.9380, -0.6431, -0.9380 + VCMP_U16(1, v2, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0x352e, 0xb9c5, 0xb828, + 0xb733, 0x344a, 0xbb81, 0x380c, 0xbb81, 0x351e, 0xbb81, 0xb925, + 0xbb81); + + VSET(16, e32, m4); + // 0.86539453, -0.53925377, -0.47128764, 0.99265540, + // 0.32128176, -0.47335613, -0.30028856, 0.44394016, + // -0.72540921, -0.26464799, 0.77351445, -0.21725702, + // -0.25191557, -0.53123665, 0.80404943, 0.81841671 + VLOAD_32(v8, 0x3f5d8a7f, 0xbf0a0c89, 0xbef14c9d, 0x3f7e1eaa, 0x3ea47f0b, + 0xbef25bbc, 0xbe99bf6c, 0x3ee34c20, 0xbf39b46b, 0xbe877ff1, + 0x3f46050b, 0xbe5e78a0, 0xbe80fb14, 0xbf07ff20, 0x3f4dd62f, + 0x3f5183c2); + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + VLOAD_8(v0, 0x0F, 0xAA); + asm volatile("vfmerge.vfm v4, v8, %[A], v0" ::[A] "f"(dscalar_32)); + // -0.96056187, -0.96056187, -0.96056187, -0.96056187, + // 0.32128176, -0.47335613, -0.30028856, 0.44394016, + // -0.72540921, -0.96056187, 0.77351445, -0.96056187, + // -0.25191557, -0.96056187, 0.80404943, -0.96056187 + VCMP_U32(2, v4, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0x3ea47f0b, + 0xbef25bbc, 0xbe99bf6c, 0x3ee34c20, 0xbf39b46b, 0xbf75e762, + 0x3f46050b, 0xbf75e762, 0xbe80fb14, 0xbf75e762, 0x3f4dd62f, + 0xbf75e762); + + VSET(16, e64, m8); + // -0.3488917150781869, -0.4501495513738740, 0.8731197104152684, + // 0.3256432550932964, 0.6502591178769535, -0.3169358689246526, + // -0.5396694979141685, -0.5417807430937591, + // -0.7971574213160249, -0.1764794100111047, 0.3564275916066595, + // -0.3754449946313438, 0.6580947137446858, + // -0.3328857144699515, 0.1761214464164236, 0.1429774118511240 + VLOAD_64(v16, 0xbfd6543dea86cb60, 0xbfdccf40105d6e5c, 0x3febf098bf37400c, + 0x3fd4d756ceb279f4, 0x3fe4ceec35a6a266, 0xbfd448ad61fd7c88, + 0xbfe144f8f7861540, 0xbfe1564491a616b8, 0xbfe9825047ca1cd6, + 0xbfc696e097352100, 0x3fd6cfb5ac55edec, 0xbfd8074a7158dd78, + 0x3fe50f1ca5268668, 0xbfd54dffe23d0eec, 0x3fc68b25c63dcaf0, + 0x3fc24d1575fbd080); + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + VLOAD_8(v0, 0x0F, 0xAA); + asm volatile("vfmerge.vfm v8, v16, %[A], v0" ::[A] "f"(dscalar_64)); + // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, + // 0.9108707261227378, 0.6502591178769535, + // -0.3169358689246526, -0.5396694979141685, + // -0.5417807430937591 -0.7971574213160249, + // 0.9108707261227378, 0.3564275916066595, 0.9108707261227378, + // 0.6580947137446858, 0.9108707261227378, 0.1761214464164236, + // 0.9108707261227378 + VCMP_U64(3, v8, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, + 0x3fed25da5d7296fe, 0x3fe4ceec35a6a266, 0xbfd448ad61fd7c88, + 0xbfe144f8f7861540, 0xbfe1564491a616b8, 0xbfe9825047ca1cd6, + 0x3fed25da5d7296fe, 0x3fd6cfb5ac55edec, 0x3fed25da5d7296fe, + 0x3fe50f1ca5268668, 0x3fed25da5d7296fe, 0x3fc68b25c63dcaf0, + 0x3fed25da5d7296fe); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmin.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmin.c new file mode 100644 index 000000000..401879889 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmin.c @@ -0,0 +1,348 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.9390, 0.9619, 0.9121, 0.6265, 0.1193, -0.4492, -0.3562, + // 0.2365, -0.9897, 0.8638, -0.0379, -0.6201, 0.1809, 0.9824, + // -0.9922, -0.6851 + VLOAD_16(v4, 0x3b83, 0x3bb2, 0x3b4c, 0x3903, 0x2fa2, 0xb730, 0xb5b3, 0x3391, + 0xbbeb, 0x3ae9, 0xa8da, 0xb8f6, 0x31ca, 0x3bdc, 0xbbf0, 0xb97b); + // 0.9795, -0.1069, 0.7070, -0.7305, 0.0516, -0.1321, 0.3828, + // 0.0230, -0.9424, -0.8652, -0.3865, -0.1719, -0.7021, 0.1664, + // 0.7026, -0.8535 + VLOAD_16(v6, 0x3bd6, 0xaed8, 0x39a8, 0xb9d8, 0x2a9a, 0xb03a, 0x3620, 0x25e2, + 0xbb8a, 0xbaec, 0xb62f, 0xb180, 0xb99e, 0x3153, 0x399f, 0xbad4); + asm volatile("vfmin.vv v2, v4, v6"); + // 0.9390, -0.1069, 0.7070, -0.7305, 0.0516, -0.4492, -0.3562, + // 0.0230, -0.9897, -0.8652, -0.3865, -0.6201, -0.7021, 0.1664, + // -0.9922, -0.8535 + VCMP_U16(1, v2, 0x3b83, 0xaed8, 0x39a8, 0xb9d8, 0x2a9a, 0xb730, 0xb5b3, + 0x25e2, 0xbbeb, 0xbaec, 0xb62f, 0xb8f6, 0xb99e, 0x3153, 0xbbf0, + 0xbad4); + + VSET(16, e32, m4); + // 0.33477312, -0.14129849, -0.94871885, 0.83600986, + // -0.28163233, -0.47814348, 0.77408481, -0.54823470, + // -0.72419900, 0.27495387, -0.76835793, 0.71516198, + // 0.32305571, -0.76598656, -0.36499983, -0.52954155 + VLOAD_32(v8, 0x3eab6762, 0xbe10b08d, 0xbf72df3d, 0x3f5604be, 0xbe90321d, + 0xbef4cf39, 0x3f462a6c, 0xbf0c591c, 0xbf39651b, 0x3e8cc6c1, + 0xbf44b31b, 0x3f3714db, 0x3ea5678f, 0xbf4417b2, 0xbebae142, + 0xbf079009); + // 0.87184614, 0.40005061, 0.40118238, 0.97373396, + // 0.74085194, -0.99458516, -0.73125440, -0.46319291, + // -0.76140571, -0.82557100, 0.15205561, 0.39971715, + // -0.32876521, -0.53106725, 0.84727478, 0.21940185 + VLOAD_32(v12, 0x3f5f314f, 0x3eccd36f, 0x3ecd67c7, 0x3f7946a1, 0x3f3da879, + 0xbf7e9d22, 0xbf3b337d, 0xbeed279f, 0xbf42eb7c, 0xbf53589f, + 0x3e1bb477, 0x3ecca7ba, 0xbea853ea, 0xbf07f406, 0x3f58e700, + 0x3e60aae1); + asm volatile("vfmin.vv v4, v8, v12"); + // 0.33477312, -0.14129849, -0.94871885, 0.83600986, + // -0.28163233, -0.99458516, -0.73125440, -0.54823470, + // -0.76140571, -0.82557100, -0.76835793, 0.39971715, + // -0.32876521, -0.76598656, -0.36499983, -0.52954155 + VCMP_U32(2, v4, 0x3eab6762, 0xbe10b08d, 0xbf72df3d, 0x3f5604be, 0xbe90321d, + 0xbf7e9d22, 0xbf3b337d, 0xbf0c591c, 0xbf42eb7c, 0xbf53589f, + 0xbf44b31b, 0x3ecca7ba, 0xbea853ea, 0xbf4417b2, 0xbebae142, + 0xbf079009); + + VSET(16, e64, m8); + // 0.9387726994461698, 0.8517969615002949, -0.8864275043807637, + // 0.3621349692771021, 0.5392486258321831, -0.1288714247798126, + // -0.9149173505741688, -0.9378576380992047, + // -0.2263428385339852, 0.1016628884386184, 0.4783549203499486, + // 0.5394596797016060, 0.7861587828590215, 0.0194772848204161, + // -0.9126826319328591, 0.3997583898469530 + VLOAD_64(v16, 0x3fee0a6d0b4ff74a, 0x3feb41ebb38f3ae2, 0xbfec5d9d36b2e38c, + 0x3fd72d3826721e9c, 0x3fe14186558b96e0, 0xbfc07edbdd68bb68, + 0xbfed4700c06849e8, 0xbfee02ee057e1390, 0xbfccf8cd5897f8a0, + 0x3fba06943d0f8e20, 0x3fde9d5df4b22860, 0x3fe14340f23a8770, + 0x3fe9283676baf718, 0x3f93f1da754635c0, 0xbfed34b234f8ec38, + 0x3fd995a436ac6f1c); + // 0.4808082103120717, 0.7218925128932789, -0.9454618185734458, + // 0.7335258472548418, 0.9800819535502201, -0.6873536121819364, + // -0.7090903925273744, 0.7813319828098306, 0.6810234154055235, + // 0.1176441554686278, 0.4929731878752270, 0.0942028280153233, + // 0.9496420237972776, -0.4549651855719854, -0.9663401540020158, + // 0.4114236885680320 + VLOAD_64(v24, 0x3fdec58fccbc12a4, 0x3fe719be53c35314, 0xbfee413924cc77e4, + 0x3fe7790b32975e1a, 0x3fef5cd4d43cbc4e, 0xbfe5fecd00a37bfa, + 0xbfe6b0de55ba0314, 0x3fe900abee2f95f8, 0x3fe5caf19e1f4324, + 0x3fbe1ded684da4d0, 0x3fdf8cdf69eea758, 0x3fb81dad31843b10, + 0x3fee6377ab63bade, 0xbfdd1e264c366a78, 0xbfeeec422fc80224, + 0x3fda54c405ccc2c0); + asm volatile("vfmin.vv v8, v16, v24"); + // 0.4808082103120717, 0.7218925128932789, -0.9454618185734458, + // 0.3621349692771021, 0.5392486258321831, -0.6873536121819364, + // -0.9149173505741688, -0.9378576380992047, + // -0.2263428385339852, 0.1016628884386184, 0.4783549203499486, + // 0.0942028280153233, 0.7861587828590215, -0.4549651855719854, + // -0.9663401540020158, 0.3997583898469530 + VCMP_U64(3, v8, 0x3fdec58fccbc12a4, 0x3fe719be53c35314, 0xbfee413924cc77e4, + 0x3fd72d3826721e9c, 0x3fe14186558b96e0, 0xbfe5fecd00a37bfa, + 0xbfed4700c06849e8, 0xbfee02ee057e1390, 0xbfccf8cd5897f8a0, + 0x3fba06943d0f8e20, 0x3fde9d5df4b22860, 0x3fb81dad31843b10, + 0x3fe9283676baf718, 0xbfdd1e264c366a78, 0xbfeeec422fc80224, + 0x3fd995a436ac6f1c); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.9390, 0.9619, 0.9121, 0.6265, 0.1193, -0.4492, -0.3562, + // 0.2365, -0.9897, 0.8638, -0.0379, -0.6201, 0.1809, 0.9824, + // -0.9922, -0.6851 + VLOAD_16(v4, 0x3b83, 0x3bb2, 0x3b4c, 0x3903, 0x2fa2, 0xb730, 0xb5b3, 0x3391, + 0xbbeb, 0x3ae9, 0xa8da, 0xb8f6, 0x31ca, 0x3bdc, 0xbbf0, 0xb97b); + // 0.9795, -0.1069, 0.7070, -0.7305, 0.0516, -0.1321, 0.3828, + // 0.0230, -0.9424, -0.8652, -0.3865, -0.1719, -0.7021, 0.1664, + // 0.7026, -0.8535 + VLOAD_16(v6, 0x3bd6, 0xaed8, 0x39a8, 0xb9d8, 0x2a9a, 0xb03a, 0x3620, 0x25e2, + 0xbb8a, 0xbaec, 0xb62f, 0xb180, 0xb99e, 0x3153, 0x399f, 0xbad4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfmin.vv v2, v4, v6, v0.t"); + // 0.0000, -0.1069, 0.0000, -0.7305, 0.0000, -0.4492, 0.0000, + // 0.0230, 0.0000, -0.8652, 0.0000, -0.6201, 0.0000, 0.1664, + // 0.0000, -0.8535 + VCMP_U16(4, v2, 0x0, 0xaed8, 0x0, 0xb9d8, 0x0, 0xb730, 0x0, 0x25e2, 0x0, + 0xbaec, 0x0, 0xb8f6, 0x0, 0x3153, 0x0, 0xbad4); + + VSET(16, e32, m4); + // 0.33477312, -0.14129849, -0.94871885, 0.83600986, + // -0.28163233, -0.47814348, 0.77408481, -0.54823470, + // -0.72419900, 0.27495387, -0.76835793, 0.71516198, + // 0.32305571, -0.76598656, -0.36499983, -0.52954155 + VLOAD_32(v8, 0x3eab6762, 0xbe10b08d, 0xbf72df3d, 0x3f5604be, 0xbe90321d, + 0xbef4cf39, 0x3f462a6c, 0xbf0c591c, 0xbf39651b, 0x3e8cc6c1, + 0xbf44b31b, 0x3f3714db, 0x3ea5678f, 0xbf4417b2, 0xbebae142, + 0xbf079009); + // 0.87184614, 0.40005061, 0.40118238, 0.97373396, + // 0.74085194, -0.99458516, -0.73125440, -0.46319291, + // -0.76140571, -0.82557100, 0.15205561, 0.39971715, + // -0.32876521, -0.53106725, 0.84727478, 0.21940185 + VLOAD_32(v12, 0x3f5f314f, 0x3eccd36f, 0x3ecd67c7, 0x3f7946a1, 0x3f3da879, + 0xbf7e9d22, 0xbf3b337d, 0xbeed279f, 0xbf42eb7c, 0xbf53589f, + 0x3e1bb477, 0x3ecca7ba, 0xbea853ea, 0xbf07f406, 0x3f58e700, + 0x3e60aae1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfmin.vv v4, v8, v12, v0.t"); + // 0.00000000, -0.14129849, 0.00000000, 0.83600986, + // 0.00000000, -0.99458516, 0.00000000, -0.54823470, + // 0.00000000, -0.82557100, 0.00000000, 0.39971715, + // 0.00000000, -0.76598656, 0.00000000, -0.52954155 + VCMP_U32(5, v4, 0x0, 0xbe10b08d, 0x0, 0x3f5604be, 0x0, 0xbf7e9d22, 0x0, + 0xbf0c591c, 0x0, 0xbf53589f, 0x0, 0x3ecca7ba, 0x0, 0xbf4417b2, 0x0, + 0xbf079009); + + VSET(16, e64, m8); + // 0.9387726994461698, 0.8517969615002949, -0.8864275043807637, + // 0.3621349692771021, 0.5392486258321831, -0.1288714247798126, + // -0.9149173505741688, -0.9378576380992047, + // -0.2263428385339852, 0.1016628884386184, 0.4783549203499486, + // 0.5394596797016060, 0.7861587828590215, 0.0194772848204161, + // -0.9126826319328591, 0.3997583898469530 + VLOAD_64(v16, 0x3fee0a6d0b4ff74a, 0x3feb41ebb38f3ae2, 0xbfec5d9d36b2e38c, + 0x3fd72d3826721e9c, 0x3fe14186558b96e0, 0xbfc07edbdd68bb68, + 0xbfed4700c06849e8, 0xbfee02ee057e1390, 0xbfccf8cd5897f8a0, + 0x3fba06943d0f8e20, 0x3fde9d5df4b22860, 0x3fe14340f23a8770, + 0x3fe9283676baf718, 0x3f93f1da754635c0, 0xbfed34b234f8ec38, + 0x3fd995a436ac6f1c); + // 0.4808082103120717, 0.7218925128932789, -0.9454618185734458, + // 0.7335258472548418, 0.9800819535502201, -0.6873536121819364, + // -0.7090903925273744, 0.7813319828098306, 0.6810234154055235, + // 0.1176441554686278, 0.4929731878752270, 0.0942028280153233, + // 0.9496420237972776, -0.4549651855719854, -0.9663401540020158, + // 0.4114236885680320 + VLOAD_64(v24, 0x3fdec58fccbc12a4, 0x3fe719be53c35314, 0xbfee413924cc77e4, + 0x3fe7790b32975e1a, 0x3fef5cd4d43cbc4e, 0xbfe5fecd00a37bfa, + 0xbfe6b0de55ba0314, 0x3fe900abee2f95f8, 0x3fe5caf19e1f4324, + 0x3fbe1ded684da4d0, 0x3fdf8cdf69eea758, 0x3fb81dad31843b10, + 0x3fee6377ab63bade, 0xbfdd1e264c366a78, 0xbfeeec422fc80224, + 0x3fda54c405ccc2c0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfmin.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, 0.7218925128932789, 0.0000000000000000, + // 0.3621349692771021, 0.0000000000000000, -0.6873536121819364, + // 0.0000000000000000, -0.9378576380992047, 0.0000000000000000, + // 0.1016628884386184, 0.0000000000000000, 0.0942028280153233, + // 0.0000000000000000, -0.4549651855719854, 0.0000000000000000, + // 0.3997583898469530 + VCMP_U64(6, v8, 0x0, 0x3fe719be53c35314, 0x0, 0x3fd72d3826721e9c, 0x0, + 0xbfe5fecd00a37bfa, 0x0, 0xbfee02ee057e1390, 0x0, 0x3fba06943d0f8e20, + 0x0, 0x3fb81dad31843b10, 0x0, 0xbfdd1e264c366a78, 0x0, + 0x3fd995a436ac6f1c); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.4434 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3718); + // -0.2537, 0.5449, 0.2070, -0.5752, -0.3008, 0.0165, -0.8447, + // 0.6279, 0.6802, 0.7300, 0.7720, -0.8525, 0.5264, -0.5249, + // -0.9839, 0.4875 + VLOAD_16(v4, 0xb40f, 0x385c, 0x32a0, 0xb89a, 0xb4d0, 0x2437, 0xbac2, 0x3906, + 0x3971, 0x39d7, 0x3a2d, 0xbad2, 0x3836, 0xb833, 0xbbdf, 0x37cd); + asm volatile("vfmin.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // -0.2537, 0.4434, 0.2070, -0.5752, -0.3008, 0.0165, -0.8447, + // 0.4434, 0.4434, 0.4434, 0.4434, -0.8525, 0.4434, -0.5249, + // -0.9839, 0.4434 + VCMP_U16(7, v2, 0xb40f, 0x3718, 0x32a0, 0xb89a, 0xb4d0, 0x2437, 0xbac2, + 0x3718, 0x3718, 0x3718, 0x3718, 0xbad2, 0x3718, 0xb833, 0xbbdf, + 0x3718); + + VSET(16, e32, m4); + double dscalar_32; + // 0.59499639 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f1851af); + // -0.94244474, -0.52559608, -0.72424960, -0.67824948, + // 0.22003150, -0.67564118, -0.90376341, 0.16465612, + // -0.15494362, -0.01763406, 0.97777683, -0.91671157, + // 0.81712914, -0.10151604, 0.03442690, -0.14597759 + VLOAD_32(v8, 0xbf71440f, 0xbf068d77, 0xbf39686c, 0xbf2da1c2, 0x3e614ff0, + 0xbf2cf6d2, 0xbf675d0a, 0x3e289b9d, 0xbe1ea98a, 0xbc90754e, + 0x3f7a4f95, 0xbf6aad9c, 0x3f512f60, 0xbdcfe7a4, 0x3d0d0338, + 0xbe157b26); + asm volatile("vfmin.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // -0.94244474, -0.52559608, -0.72424960, -0.67824948, + // 0.22003150, -0.67564118, -0.90376341, 0.16465612, + // -0.15494362, -0.01763406, 0.59499639, -0.91671157, + // 0.59499639, -0.10151604, 0.03442690, -0.14597759 + VCMP_U32(8, v4, 0xbf71440f, 0xbf068d77, 0xbf39686c, 0xbf2da1c2, 0x3e614ff0, + 0xbf2cf6d2, 0xbf675d0a, 0x3e289b9d, 0xbe1ea98a, 0xbc90754e, + 0x3f1851af, 0xbf6aad9c, 0x3f1851af, 0xbdcfe7a4, 0x3d0d0338, + 0xbe157b26); + + VSET(16, e64, m8); + double dscalar_64; + // 0.8631130564395617 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3feb9e9f45c51298); + // -0.0203711476424431, 0.4824066080711997, 0.5516514149436702, + // -0.0992829085793798, 0.7425996730256406, 0.3080149644930992, + // -0.6753031265127754, -0.3309631180416657, + // -0.7695072044924456, -0.6726760621514143, + // -0.9995830020822822, 0.2485224245452053, 0.7025040357726613, + // -0.6452676560401207, 0.5090044889036880, 0.0801949752856408 + VLOAD_64(v16, 0xbf94dc2c93a00800, 0x3fdedfbff74290e0, 0x3fe1a720de3f34c0, + 0xbfb96a9acd667320, 0x3fe7c36063b54b1e, 0x3fd3b68465cb4b28, + 0xbfe59c154d684914, 0xbfd52e7fee0af3fc, 0xbfe89fcd92aa9b24, + 0xbfe5868ff2f7c1a4, 0xbfeffc957df296c0, 0x3fcfcf9532df44b8, + 0x3fe67ae9be5e7376, 0xbfe4a6085afb7c12, 0x3fe049c3c82b791e, + 0x3fb487a86c27c560); + asm volatile("vfmin.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // -0.0203711476424431, 0.4824066080711997, 0.5516514149436702, + // -0.0992829085793798, 0.7425996730256406, 0.3080149644930992, + // -0.6753031265127754, -0.3309631180416657, -0.7695072044924456, + // -0.6726760621514143, -0.9995830020822822, 0.2485224245452053, + // 0.7025040357726613, -0.6452676560401207, 0.5090044889036880, + // 0.0801949752856408 + VCMP_U64(9, v8, 0xbf94dc2c93a00800, 0x3fdedfbff74290e0, 0x3fe1a720de3f34c0, + 0xbfb96a9acd667320, 0x3fe7c36063b54b1e, 0x3fd3b68465cb4b28, + 0xbfe59c154d684914, 0xbfd52e7fee0af3fc, 0xbfe89fcd92aa9b24, + 0xbfe5868ff2f7c1a4, 0xbfeffc957df296c0, 0x3fcfcf9532df44b8, + 0x3fe67ae9be5e7376, 0xbfe4a6085afb7c12, 0x3fe049c3c82b791e, + 0x3fb487a86c27c560); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.4434 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3718); + // -0.2537, 0.5449, 0.2070, -0.5752, -0.3008, 0.0165, + // -0.8447, 0.6279, 0.6802, 0.7300, 0.7720, -0.8525, 0.5264, + // -0.5249, -0.9839, 0.4875 + VLOAD_16(v4, 0xb40f, 0x385c, 0x32a0, 0xb89a, 0xb4d0, 0x2437, 0xbac2, 0x3906, + 0x3971, 0x39d7, 0x3a2d, 0xbad2, 0x3836, 0xb833, 0xbbdf, 0x37cd); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfmin.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.4434, 0.0000, -0.5752, 0.0000, 0.0165, 0.0000, + // 0.4434, 0.0000, 0.4434, 0.0000, -0.8525, 0.0000, + // -0.5249, 0.0000, 0.4434 + VCMP_U16(10, v2, 0x0, 0x3718, 0x0, 0xb89a, 0x0, 0x2437, 0x0, 0x3718, 0x0, + 0x3718, 0x0, 0xbad2, 0x0, 0xb833, 0x0, 0x3718); + + VSET(16, e32, m4); + double dscalar_32; + // 0.59499639 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f1851af); + // -0.94244474, -0.52559608, -0.72424960, -0.67824948, + // 0.22003150, -0.67564118, -0.90376341, 0.16465612, + // -0.15494362, -0.01763406, 0.97777683, -0.91671157, + // 0.81712914, -0.10151604, 0.03442690, -0.14597759 + VLOAD_32(v8, 0xbf71440f, 0xbf068d77, 0xbf39686c, 0xbf2da1c2, 0x3e614ff0, + 0xbf2cf6d2, 0xbf675d0a, 0x3e289b9d, 0xbe1ea98a, 0xbc90754e, + 0x3f7a4f95, 0xbf6aad9c, 0x3f512f60, 0xbdcfe7a4, 0x3d0d0338, + 0xbe157b26); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfmin.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, -0.52559608, 0.00000000, -0.67824948, + // 0.00000000, -0.67564118, 0.00000000, 0.16465612, + // 0.00000000, -0.01763406, 0.00000000, -0.91671157, + // 0.00000000, -0.10151604, 0.00000000, -0.14597759 + VCMP_U32(11, v4, 0x0, 0xbf068d77, 0x0, 0xbf2da1c2, 0x0, 0xbf2cf6d2, 0x0, + 0x3e289b9d, 0x0, 0xbc90754e, 0x0, 0xbf6aad9c, 0x0, 0xbdcfe7a4, 0x0, + 0xbe157b26); + + VSET(16, e64, m8); + double dscalar_64; + // 0.8631130564395617 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3feb9e9f45c51298); + // -0.0203711476424431, 0.4824066080711997, + // 0.5516514149436702, -0.0992829085793798, 0.7425996730256406, + // 0.3080149644930992, -0.6753031265127754, + // -0.3309631180416657, -0.7695072044924456, + // -0.6726760621514143, -0.9995830020822822, + // 0.2485224245452053, 0.7025040357726613, + // -0.6452676560401207, 0.5090044889036880, 0.0801949752856408 + VLOAD_64(v16, 0xbf94dc2c93a00800, 0x3fdedfbff74290e0, 0x3fe1a720de3f34c0, + 0xbfb96a9acd667320, 0x3fe7c36063b54b1e, 0x3fd3b68465cb4b28, + 0xbfe59c154d684914, 0xbfd52e7fee0af3fc, 0xbfe89fcd92aa9b24, + 0xbfe5868ff2f7c1a4, 0xbfeffc957df296c0, 0x3fcfcf9532df44b8, + 0x3fe67ae9be5e7376, 0xbfe4a6085afb7c12, 0x3fe049c3c82b791e, + 0x3fb487a86c27c560); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfmin.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, 0.4824066080711997, 0.0000000000000000, + // -0.0992829085793798, 0.0000000000000000, + // 0.3080149644930992, 0.0000000000000000, + // -0.3309631180416657, 0.0000000000000000, + // -0.6726760621514143, 0.0000000000000000, + // 0.2485224245452053, 0.0000000000000000, + // -0.6452676560401207, 0.0000000000000000, 0.0801949752856408 + VCMP_U64(12, v8, 0x0, 0x3fdedfbff74290e0, 0x0, 0xbfb96a9acd667320, 0x0, + 0x3fd3b68465cb4b28, 0x0, 0xbfd52e7fee0af3fc, 0x0, 0xbfe5868ff2f7c1a4, + 0x0, 0x3fcfcf9532df44b8, 0x0, 0xbfe4a6085afb7c12, 0x0, + 0x3fb487a86c27c560); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsac.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsac.c new file mode 100644 index 000000000..dedcbaf58 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsac.c @@ -0,0 +1,454 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.9175, 0.0740, -0.0362, 0.2961, 0.6226, -0.8032, 0.7749, + // 0.8188, 0.2019, 0.4885, 0.5669, -0.1743, 0.4404, 0.0618, + // 0.8252, -0.5947 + VLOAD_16(v4, 0x3b57, 0x2cbc, 0xa8a3, 0x34bd, 0x38fb, 0xba6d, 0x3a33, 0x3a8d, + 0x3276, 0x37d1, 0x3889, 0xb194, 0x370c, 0x2bea, 0x3a9a, 0xb8c2); + // 0.2812, -0.6733, 0.3289, 0.9609, -0.6841, 0.5488, -0.1401, + // 0.5591, 0.2759, -0.6973, 0.1418, 0.2389, -0.3308, 0.8867, + // 0.8936, 0.3611 + VLOAD_16(v6, 0x3480, 0xb963, 0x3543, 0x3bb0, 0xb979, 0x3864, 0xb07c, 0x3879, + 0x346a, 0xb994, 0x308a, 0x33a5, 0xb54b, 0x3b18, 0x3b26, 0x35c7); + // 0.4780, -0.1810, 0.2800, 0.7529, 0.6484, -0.9526, + // 0.5947, 1.0000, -0.1874, 0.2949, -0.4280, 0.6934, 0.5444, + // 0.8823, -0.8911, 0.7603 + VLOAD_16(v2, 0x37a6, 0xb1cb, 0x347b, 0x3a06, 0x3930, 0xbb9f, 0x38c2, 0x3c00, + 0xb1ff, 0x34b8, 0xb6d9, 0x398c, 0x385b, 0x3b0f, 0xbb21, 0x3a15); + asm volatile("vfmsac.vv v2, v4, v6"); + // -0.2200, 0.1312, -0.2920, -0.4683, -1.0742, 0.5117, -0.7031, + // -0.5420, 0.2430, -0.6357, 0.5083, -0.7349, -0.6904, -0.8276, + // 1.6289, -0.9751 + VCMP_U16(1, v2, 0xb30a, 0x3033, 0xb4ac, 0xb77e, 0xbc4c, 0x3818, 0xb9a0, + 0xb856, 0x33c7, 0xb916, 0x3811, 0xb9e1, 0xb985, 0xba9f, 0x3e84, + 0xbbcd); + + VSET(16, e32, m4); + // -0.90310860, 0.30282700, 0.54854167, -0.38732994, + // 0.92121714, 0.99595129, -0.10263380, 0.83759040, + // -0.23468767, 0.03914077, -0.46234205, 0.38326120, + // 0.36417511, -0.50103557, 0.36991179, 0.44718841 + VLOAD_32(v8, 0xbf673220, 0x3e9b0c24, 0x3f0c6d3a, 0xbec6501c, 0x3f6bd4e3, + 0x3f7ef6aa, 0xbdd231ab, 0x3f566c53, 0xbe7051f7, 0x3d205212, + 0xbeecb819, 0x3ec43ad0, 0x3eba7529, 0xbf0043de, 0x3ebd6514, + 0x3ee4f5e1); + // 0.84989786, -0.04543342, -0.74596256, -0.30687407, + // -0.30795863, 0.57084304, 0.51653886, -0.97366458, + // 0.49300706, 0.62932760, 0.45846274, -0.73850167, + // -0.42686453, -0.31419462, -0.47245970, -0.87721694 + VLOAD_32(v12, 0x3f5992e8, 0xbd3a1866, 0xbf3ef767, 0xbe9d1e99, 0xbe9dacc1, + 0x3f1222c5, 0x3f043be4, 0xbf794215, 0x3efc6b6c, 0x3f211b9d, + 0x3eeabba1, 0xbf3d0e72, 0xbeda8dfd, 0xbea0de1e, 0xbef1e63d, + 0xbf60914a); + // -0.76813585, 0.87161541, -0.67958647, -0.98584491, + // 0.12284227, -0.04006640, -0.93113720, -0.93526161, + // -0.27461481, 0.64110506, 0.61687475, -0.43741968, + // 0.70502371, 0.37014356, -0.98105848, 0.77993429 + VLOAD_32(v4, 0xbf44a48d, 0x3f5f2230, 0xbf2df961, 0xbf7c6055, 0x3dfb94bb, + 0xbd241caa, 0xbf6e5f02, 0xbf6f6d4e, 0xbe8c9a50, 0x3f241f76, + 0x3f1deb81, 0xbedff579, 0x3f347c6f, 0x3ebd8375, 0xbf7b26a6, + 0x3f47a9c6); + asm volatile("vfmsac.vv v4, v8, v12"); + // 0.00058579, -0.88537389, 0.27039492, 1.10470641, + // -0.40653905, 0.60859829, 0.87812287, 0.11972952, + // 0.15891212, -0.61647266, -0.82884133, 0.15438065, + // -0.86047715, -0.21272089, 0.80629003, -1.17221546 + VCMP_U32(2, v4, 0x3a198f11, 0xbf62a7dd, 0x3e8a7134, 0x3f8d6705, 0xbed025e3, + 0x3f1bcd19, 0x3f60cca9, 0x3df534be, 0x3e22b9dd, 0xbf1dd127, + 0xbf542ef3, 0x3e1e15f6, 0xbf5c483b, 0xbe59d381, 0x3f4e6907, + 0xbf960b29); + + VSET(16, e64, m8); + // 0.0971325394189311, 0.6403859199401045, 0.3478142243141771, + // -0.4702414117546168, 0.8862438155310881, 0.6157878617136987, + // -0.9954501284062294, -0.2761157935600853, + // -0.7189549700888722, -0.2302799669824283, 0.0093666993515229, + // 0.9188774299961215, -0.4297410504980956, 0.2729294776457381, + // -0.1419575372981836, -0.8472908703054822 + VLOAD_64(v16, 0x3fb8ddad982e8680, 0x3fe47e0a9cdec59e, 0x3fd6429697a0d4f8, + 0xbfde186f6f2d8030, 0x3fec5c1bfd7f9ffe, 0x3fe3b488beeab100, + 0xbfefdaba3a49b85e, 0xbfd1abe193cffa54, 0xbfe701adda7a81f6, + 0xbfcd79d05f8e86d0, 0x3f832ed91b170d00, 0x3fed6771a3dbb538, + 0xbfdb80e09b68d514, 0x3fd177ad33269468, 0xbfc22baa220ee628, + 0xbfeb1d01be452a62); + // -0.2416734667201210, -0.2737616510555549, 0.6084509432766920, + // -0.4000545529138850, 0.5985258122916897, -0.9559409603601607, + // 0.5010970610326939, 0.5772808284477746, -0.4551243154247406, + // 0.6584804564152213, 0.6542532086910551, -0.2215058802905889, + // 0.3203723346938081, 0.0696368102348055, 0.8882580549203218, + // 0.7725843936650791 + VLOAD_64(v24, 0xbfceef27f9efdac8, 0xbfd1854f968baf44, 0x3fe3786e1cd2fff2, + 0xbfd99a7e695862ec, 0x3fe3271f9ab3593a, 0xbfee97117f34eb4c, + 0x3fe008fcb4283a76, 0x3fe27915a4d94fb2, 0xbfdd20c1bc974608, + 0x3fe512459b2b7912, 0x3fe4efa46cd43256, 0xbfcc5a4dffdc2170, + 0x3fd480faf7036588, 0x3fb1d3b7ce8e6640, 0x3fec6c9c280952c6, + 0x3fe8b902e80620ce); + // -0.8982912058335177, 0.5582779858188844, -0.3988318240568800, + // 0.0267896464795028, 0.8241806039831361, 0.2839220639224551, + // -0.4781090814672235, -0.1240154287362147, 0.4586341020154134, + // -0.6132901056934972, 0.1207753636997857, 0.4420874266235846, + // 0.8256868703569773, 0.0642541522901756, -0.3012484644971416, + // 0.7323810741358745 + VLOAD_64(v8, 0xbfecbecd32eadc10, 0x3fe1dd69cb65674e, 0xbfd98675ea3b69b0, + 0x3f9b6ebebe00e300, 0x3fea5fb000835cf4, 0x3fd22bc772ca399c, + 0xbfde9956d534a0f8, 0xbfbfbf79a29f1810, 0x3fdd5a42d93f2348, + 0xbfe3a012925d3f1c, 0x3fbeeb225d40ca30, 0x3fdc4b290fd48cd4, + 0x3fea6c06df1d6f14, 0x3fb072f5cab7f020, 0xbfd347a7a3bf1174, + 0x3fe76faa6f33ef10); + asm volatile("vfmsac.vv v8, v16, v24"); + // 0.8748168483008159, -0.7335910925744179, 0.6104597169258920, + // 0.1613325712615846, -0.2937408044039052, -0.8725789038271781, + // -0.0207080522817558, -0.0353809253176660, + // -0.1314202134325006, 0.4616552479316257, + // -0.1146471705942074, -0.6456241806340295, + // -0.9633640140188219, -0.0452482140478748, 0.1751535385353780, + // -1.3869847774287927 + VCMP_U64(3, v8, 0x3febfe7fe72e2334, 0xbfe7799406e7cf1f, 0x3fe388e2d0f71ba3, + 0x3fc4a68bb2ac8e62, 0xbfd2cca63b1a97a5, 0xbfebec2a97e3c096, + 0xbf95347ddd418906, 0xbfa21d72da487f01, 0xbfc0d260a75ceb46, + 0x3fdd8bc273f9289e, 0xbfbd59845847f323, 0xbfe4a8f40aaa8efa, + 0xbfeed3e0c4cb54ff, 0xbfa72ac61f1378a7, 0x3fc66b6e5fe4e141, + 0xbff63116f331b43d); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.9175, 0.0740, -0.0362, 0.2961, 0.6226, -0.8032, 0.7749, + // 0.8188, 0.2019, 0.4885, 0.5669, -0.1743, 0.4404, 0.0618, + // 0.8252, -0.5947 + VLOAD_16(v4, 0x3b57, 0x2cbc, 0xa8a3, 0x34bd, 0x38fb, 0xba6d, 0x3a33, 0x3a8d, + 0x3276, 0x37d1, 0x3889, 0xb194, 0x370c, 0x2bea, 0x3a9a, 0xb8c2); + // 0.2812, -0.6733, 0.3289, 0.9609, -0.6841, 0.5488, -0.1401, + // 0.5591, 0.2759, -0.6973, 0.1418, 0.2389, -0.3308, 0.8867, + // 0.8936, 0.3611 + VLOAD_16(v6, 0x3480, 0xb963, 0x3543, 0x3bb0, 0xb979, 0x3864, 0xb07c, 0x3879, + 0x346a, 0xb994, 0x308a, 0x33a5, 0xb54b, 0x3b18, 0x3b26, 0x35c7); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.4780, -0.1810, 0.2800, 0.7529, 0.6484, -0.9526, + // 0.5947, 1.0000, -0.1874, 0.2949, -0.4280, 0.6934, 0.5444, + // 0.8823, -0.8911, 0.7603 + VLOAD_16(v2, 0x37a6, 0xb1cb, 0x347b, 0x3a06, 0x3930, 0xbb9f, 0x38c2, 0x3c00, + 0xb1ff, 0x34b8, 0xb6d9, 0x398c, 0x385b, 0x3b0f, 0xbb21, 0x3a15); + asm volatile("vfmsac.vv v2, v4, v6, v0.t"); + // 0.4780, 0.1312, 0.2800, -0.4683, 0.6484, 0.5117, 0.5947, + // -0.5420, -0.1874, -0.6357, -0.4280, -0.7349, 0.5444, + // -0.8276, -0.8911, -0.9751 + VCMP_U16(4, v2, 0x37a6, 0x3033, 0x347b, 0xb77e, 0x3930, 0x3818, 0x38c2, + 0xb856, 0xb1ff, 0xb916, 0xb6d9, 0xb9e1, 0x385b, 0xba9f, 0xbb21, + 0xbbcd); + + VSET(16, e32, m4); + // -0.90310860, 0.30282700, 0.54854167, -0.38732994, + // 0.92121714, 0.99595129, -0.10263380, 0.83759040, + // -0.23468767, 0.03914077, -0.46234205, 0.38326120, + // 0.36417511, -0.50103557, 0.36991179, 0.44718841 + VLOAD_32(v8, 0xbf673220, 0x3e9b0c24, 0x3f0c6d3a, 0xbec6501c, 0x3f6bd4e3, + 0x3f7ef6aa, 0xbdd231ab, 0x3f566c53, 0xbe7051f7, 0x3d205212, + 0xbeecb819, 0x3ec43ad0, 0x3eba7529, 0xbf0043de, 0x3ebd6514, + 0x3ee4f5e1); + // 0.84989786, -0.04543342, -0.74596256, -0.30687407, + // -0.30795863, 0.57084304, 0.51653886, -0.97366458, + // 0.49300706, 0.62932760, 0.45846274, -0.73850167, + // -0.42686453, -0.31419462, -0.47245970, -0.87721694 + VLOAD_32(v12, 0x3f5992e8, 0xbd3a1866, 0xbf3ef767, 0xbe9d1e99, 0xbe9dacc1, + 0x3f1222c5, 0x3f043be4, 0xbf794215, 0x3efc6b6c, 0x3f211b9d, + 0x3eeabba1, 0xbf3d0e72, 0xbeda8dfd, 0xbea0de1e, 0xbef1e63d, + 0xbf60914a); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.76813585, 0.87161541, -0.67958647, -0.98584491, + // 0.12284227, -0.04006640, -0.93113720, -0.93526161, + // -0.27461481, 0.64110506, 0.61687475, -0.43741968, + // 0.70502371, 0.37014356, -0.98105848, 0.77993429 + VLOAD_32(v4, 0xbf44a48d, 0x3f5f2230, 0xbf2df961, 0xbf7c6055, 0x3dfb94bb, + 0xbd241caa, 0xbf6e5f02, 0xbf6f6d4e, 0xbe8c9a50, 0x3f241f76, + 0x3f1deb81, 0xbedff579, 0x3f347c6f, 0x3ebd8375, 0xbf7b26a6, + 0x3f47a9c6); + asm volatile("vfmsac.vv v4, v8, v12, v0.t"); + // -0.76813585, -0.88537389, -0.67958647, 1.10470641, + // 0.12284227, 0.60859829, -0.93113720, 0.11972952, + // -0.27461481, -0.61647266, 0.61687475, 0.15438065, + // 0.70502371, -0.21272089, -0.98105848, -1.17221546 + VCMP_U32(5, v4, 0xbf44a48d, 0xbf62a7dd, 0xbf2df961, 0x3f8d6705, 0x3dfb94bb, + 0x3f1bcd19, 0xbf6e5f02, 0x3df534be, 0xbe8c9a50, 0xbf1dd127, + 0x3f1deb81, 0x3e1e15f6, 0x3f347c6f, 0xbe59d381, 0xbf7b26a6, + 0xbf960b29); + + VSET(16, e64, m8); + // 0.0971325394189311, 0.6403859199401045, 0.3478142243141771, + // -0.4702414117546168, 0.8862438155310881, 0.6157878617136987, + // -0.9954501284062294, -0.2761157935600853, + // -0.7189549700888722, -0.2302799669824283, 0.0093666993515229, + // 0.9188774299961215, -0.4297410504980956, 0.2729294776457381, + // -0.1419575372981836, -0.8472908703054822 + VLOAD_64(v16, 0x3fb8ddad982e8680, 0x3fe47e0a9cdec59e, 0x3fd6429697a0d4f8, + 0xbfde186f6f2d8030, 0x3fec5c1bfd7f9ffe, 0x3fe3b488beeab100, + 0xbfefdaba3a49b85e, 0xbfd1abe193cffa54, 0xbfe701adda7a81f6, + 0xbfcd79d05f8e86d0, 0x3f832ed91b170d00, 0x3fed6771a3dbb538, + 0xbfdb80e09b68d514, 0x3fd177ad33269468, 0xbfc22baa220ee628, + 0xbfeb1d01be452a62); + // -0.2416734667201210, -0.2737616510555549, 0.6084509432766920, + // -0.4000545529138850, 0.5985258122916897, -0.9559409603601607, + // 0.5010970610326939, 0.5772808284477746, -0.4551243154247406, + // 0.6584804564152213, 0.6542532086910551, -0.2215058802905889, + // 0.3203723346938081, 0.0696368102348055, 0.8882580549203218, + // 0.7725843936650791 + VLOAD_64(v24, 0xbfceef27f9efdac8, 0xbfd1854f968baf44, 0x3fe3786e1cd2fff2, + 0xbfd99a7e695862ec, 0x3fe3271f9ab3593a, 0xbfee97117f34eb4c, + 0x3fe008fcb4283a76, 0x3fe27915a4d94fb2, 0xbfdd20c1bc974608, + 0x3fe512459b2b7912, 0x3fe4efa46cd43256, 0xbfcc5a4dffdc2170, + 0x3fd480faf7036588, 0x3fb1d3b7ce8e6640, 0x3fec6c9c280952c6, + 0x3fe8b902e80620ce); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.8982912058335177, 0.5582779858188844, -0.3988318240568800, + // 0.0267896464795028, 0.8241806039831361, 0.2839220639224551, + // -0.4781090814672235, -0.1240154287362147, 0.4586341020154134, + // -0.6132901056934972, 0.1207753636997857, 0.4420874266235846, + // 0.8256868703569773, 0.0642541522901756, -0.3012484644971416, + // 0.7323810741358745 + VLOAD_64(v8, 0xbfecbecd32eadc10, 0x3fe1dd69cb65674e, 0xbfd98675ea3b69b0, + 0x3f9b6ebebe00e300, 0x3fea5fb000835cf4, 0x3fd22bc772ca399c, + 0xbfde9956d534a0f8, 0xbfbfbf79a29f1810, 0x3fdd5a42d93f2348, + 0xbfe3a012925d3f1c, 0x3fbeeb225d40ca30, 0x3fdc4b290fd48cd4, + 0x3fea6c06df1d6f14, 0x3fb072f5cab7f020, 0xbfd347a7a3bf1174, + 0x3fe76faa6f33ef10); + asm volatile("vfmsac.vv v8, v16, v24, v0.t"); + // -0.8982912058335177, -0.7335910925744179, -0.3988318240568800, + // 0.1613325712615846, 0.8241806039831361, -0.8725789038271781, + // -0.4781090814672235, -0.0353809253176660, 0.4586341020154134, + // 0.4616552479316257, 0.1207753636997857, -0.6456241806340295, + // 0.8256868703569773, -0.0452482140478748, -0.3012484644971416, + // -1.3869847774287927 + VCMP_U64(6, v8, 0xbfecbecd32eadc10, 0xbfe7799406e7cf1f, 0xbfd98675ea3b69b0, + 0x3fc4a68bb2ac8e62, 0x3fea5fb000835cf4, 0xbfebec2a97e3c096, + 0xbfde9956d534a0f8, 0xbfa21d72da487f01, 0x3fdd5a42d93f2348, + 0x3fdd8bc273f9289e, 0x3fbeeb225d40ca30, 0xbfe4a8f40aaa8efa, + 0x3fea6c06df1d6f14, 0xbfa72ac61f1378a7, 0xbfd347a7a3bf1174, + 0xbff63116f331b43d); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.3911 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3642); + // 0.3203, -0.8521, 0.0744, 0.9370, 0.2056, 0.2866, -0.8379, + // -0.2668, -0.0878, -0.5703, 0.1272, -0.6606, -0.6919, 0.9189, + // 0.6245, 0.9932 + VLOAD_16(v4, 0x3520, 0xbad1, 0x2cc3, 0x3b7f, 0x3294, 0x3496, 0xbab4, 0xb445, + 0xad9f, 0xb890, 0x3012, 0xb949, 0xb989, 0x3b5a, 0x38ff, 0x3bf2); + // 0.8706, 0.4900, -0.9497, 0.4727, -0.7168, 0.0167, -0.3606, + // -0.1565, -0.5142, 0.8271, -0.4783, 0.6318, 0.0842, + // -0.6646, 0.1454, -0.3020 + VLOAD_16(v2, 0x3af7, 0x37d7, 0xbb99, 0x3790, 0xb9bc, 0x2445, 0xb5c5, 0xb102, + 0xb81d, 0x3a9e, 0xb7a7, 0x390e, 0x2d63, 0xb951, 0x30a7, 0xb4d5); + asm volatile("vfmsac.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.7451, -0.8232, 0.9790, -0.1062, 0.7974, 0.0955, 0.0330, + // 0.0521, 0.4797, -1.0498, 0.5278, -0.8901, -0.3547, 1.0234, + // 0.0989, 0.6904 + VCMP_U16(7, v2, 0xb9f6, 0xba96, 0x3bd5, 0xaecc, 0x3a61, 0x2e1b, 0x2836, + 0x2aac, 0x37ad, 0xbc33, 0x3839, 0xbb1f, 0xb5ad, 0x3c19, 0x2e54, + 0x3986); + + VSET(16, e32, m4); + double dscalar_32; + // -0.39704049 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbecb48e4); + // 0.43844241, 0.13734208, 0.15601240, 0.48965997, + // -0.41457745, -0.69918746, 0.38535324, 0.83301985, + // 0.79336989, -0.03326649, -0.85931808, 0.92554229, + // -0.77742523, 0.47821125, -0.53653014, -0.32442030 + VLOAD_32(v8, 0x3ee07b86, 0x3e0ca367, 0x3e1fc1b7, 0x3efab4b6, 0xbed4437f, + 0xbf32fdf3, 0x3ec54d05, 0x3f5540ca, 0x3f4b1a4a, 0xbd084272, + 0xbf5bfc45, 0x3f6cf057, 0xbf470557, 0x3ef4d81b, 0xbf095a0a, + 0xbea61a6b); + // -0.73119336, 0.87333083, -0.16325396, -0.30275631, + // 0.34779423, 0.22721651, 0.47497734, -0.58483958, + // -0.24916913, 0.13750601, -0.99799657, 0.66137350, + // 0.58565408, 0.68887448, -0.74538875, 0.99311894 + VLOAD_32(v4, 0xbf3b2f7d, 0x3f5f929c, 0xbe272c0c, 0xbe9b02e0, 0x3eb21216, + 0x3e68ab72, 0x3ef3303b, 0xbf15b80c, 0xbe7f2631, 0x3e0cce60, + 0xbf7f7cb4, 0x3f294fc6, 0x3f15ed6d, 0x3f305a14, 0xbf3ed1cc, + 0x3f7e3d0b); + asm volatile("vfmsac.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // 0.55711401, -0.92786121, 0.10131072, 0.10834149, + // -0.18319020, 0.05038923, -0.62797821, 0.25409698, + // -0.06583084, -0.12429786, 1.33918071, -1.02885127, + // -0.27698478, -0.87874371, 0.95841295, -0.86431098 + VCMP_U32(8, v4, 0x3f0e9f06, 0xbf6d8850, 0x3dcf7bff, 0x3ddde223, 0xbe3b9636, + 0x3d4e64ec, 0xbf20c32e, 0x3e821900, 0xbd86d252, 0xbdfe8fe1, + 0x3fab6a45, 0xbf83b166, 0xbe8dd0f3, 0xbf60f559, 0x3f755a8d, + 0xbf5d437b); + + VSET(16, e64, m8); + double dscalar_64; + // 0.0070730785066928 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3f7cf8a90de48000); + // -0.3542445595658672, 0.5861662785402695, 0.5713440701195280, + // 0.2841717566030781, 0.4022451154567073, -0.9804628417449213, + // -0.9703836833708208, 0.2288593539727362, 0.1806576644288407, + // 0.4892172254017777, 0.9508074316559227, -0.9022151172016701, + // -0.7929839752648156, 0.5513143449560454, 0.4823446191982377, + // -0.7486658065787619 + VLOAD_64(v16, 0xbfd6abf15f87c3c4, 0x3fe2c1dfc88b26c8, 0x3fe248735bfda932, + 0x3fd22fdebc43a768, 0x3fd9be624bf72ff4, 0xbfef5ff39c079aea, + 0xbfef0d621514ca4c, 0x3fcd4b43685929d8, 0x3fc71fca543f2eb8, + 0x3fdf4f55c3ef6448, 0x3fee6d03b4f830b0, 0xbfecdef23ccad0bc, + 0xbfe9601fee00766c, 0x3fe1a45dfb2cdc2e, 0x3fdedebbf736e98c, + 0xbfe7f511fe5c74b0); + // -0.7603855538897846, -0.0491604902215765, + // -0.3714656077097227, -0.6096204185796581, + // -0.2818689596683441, 0.3527700521309320, 0.1176602936422064, + // -0.2049443830034134, -0.6926950556538125, 0.7269529331298494, + // -0.2107692441818434, 0.1746722346734710, + // -0.5298547863982788, 0.2397543330794352, + // -0.8347981409736787, -0.6198539479673024 + VLOAD_64(v8, 0xbfe8551415c9d6cc, 0xbfa92b9053839560, 0xbfd7c617af2cedf8, + 0xbfe38202ae18c034, 0xbfd20a241ae21e00, 0x3fd693c8d73a46c0, + 0x3fbe1efc293b2500, 0xbfca3b9e173fd0c8, 0xbfe62a8ed24449ee, + 0x3fe74332cc30c46e, 0xbfcafa7c9161bf78, 0x3fc65ba8e7b88cc0, + 0xbfe0f4920666b5a4, 0x3fceb0451dd34270, 0xbfeab6aa9747cb24, + 0xbfe3d5d7f25a1d14); + asm volatile("vfmsac.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // 0.7578799543094064, 0.0533064903276678, 0.3755067691720115, + // 0.6116303877234964, 0.2847140709489031, -0.3597049427834890, + // -0.1245238936163020, 0.2065631231810536, 0.6939728614971534, + // -0.7234926612877562, 0.2174943797906927, + // -0.1810536730273635, 0.5242459484866814, + // -0.2358548434356951, 0.8382098023325486, 0.6145585759420944 + VCMP_U64(9, v8, 0x3fe8408d7641b126, 0x3fab4afd013e6639, 0x3fd8084d8b414e68, + 0x3fe39279e4106415, 0x3fd238c15ddbf0a4, 0xbfd70567e15dbc9b, + 0xbfbfe0cc42a710ce, 0x3fca70a9114fa5b7, 0x3fe63506930e2352, + 0xbfe726da14e40fb7, 0x3fcbd6db1821e5e6, 0xbfc72cc44a3c91ef, + 0x3fe0c69f7079f20d, 0xbfce307dd3946ada, 0x3fead29d5d068eb6, + 0x3fe3aa76bf24b95e); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.3911 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3642); + // 0.3203, -0.8521, 0.0744, 0.9370, 0.2056, 0.2866, + // -0.8379, -0.2668, -0.0878, -0.5703, 0.1272, -0.6606, + // -0.6919, 0.9189, 0.6245, 0.9932 + VLOAD_16(v4, 0x3520, 0xbad1, 0x2cc3, 0x3b7f, 0x3294, 0x3496, 0xbab4, 0xb445, + 0xad9f, 0xb890, 0x3012, 0xb949, 0xb989, 0x3b5a, 0x38ff, 0x3bf2); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.8706, 0.4900, -0.9497, 0.4727, -0.7168, 0.0167, + // -0.3606, -0.1565, -0.5142, 0.8271, -0.4783, 0.6318, + // 0.0842, -0.6646, 0.1454, -0.3020 + VLOAD_16(v2, 0x3af7, 0x37d7, 0xbb99, 0x3790, 0xb9bc, 0x2445, 0xb5c5, 0xb102, + 0xb81d, 0x3a9e, 0xb7a7, 0x390e, 0x2d63, 0xb951, 0x30a7, 0xb4d5); + asm volatile("vfmsac.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // 0.8706, -0.8232, -0.9497, -0.1062, -0.7168, 0.0955, + // -0.3606, 0.0521, -0.5142, -1.0498, -0.4783, -0.8901, + // 0.0842, 1.0234, 0.1454, 0.6904 + VCMP_U16(10, v2, 0x3af7, 0xba96, 0xbb99, 0xaecc, 0xb9bc, 0x2e1b, 0xb5c5, + 0x2aac, 0xb81d, 0xbc33, 0xb7a7, 0xbb1f, 0x2d63, 0x3c19, 0x30a7, + 0x3986); + + VSET(16, e32, m4); + double dscalar_32; + // -0.39704049 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbecb48e4); + // 0.43844241, 0.13734208, 0.15601240, 0.48965997, + // -0.41457745, -0.69918746, 0.38535324, 0.83301985, + // 0.79336989, -0.03326649, -0.85931808, 0.92554229, + // -0.77742523, 0.47821125, -0.53653014, -0.32442030 + VLOAD_32(v8, 0x3ee07b86, 0x3e0ca367, 0x3e1fc1b7, 0x3efab4b6, 0xbed4437f, + 0xbf32fdf3, 0x3ec54d05, 0x3f5540ca, 0x3f4b1a4a, 0xbd084272, + 0xbf5bfc45, 0x3f6cf057, 0xbf470557, 0x3ef4d81b, 0xbf095a0a, + 0xbea61a6b); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.73119336, 0.87333083, -0.16325396, -0.30275631, + // 0.34779423, 0.22721651, 0.47497734, -0.58483958, + // -0.24916913, 0.13750601, -0.99799657, 0.66137350, + // 0.58565408, 0.68887448, -0.74538875, 0.99311894 + VLOAD_32(v4, 0xbf3b2f7d, 0x3f5f929c, 0xbe272c0c, 0xbe9b02e0, 0x3eb21216, + 0x3e68ab72, 0x3ef3303b, 0xbf15b80c, 0xbe7f2631, 0x3e0cce60, + 0xbf7f7cb4, 0x3f294fc6, 0x3f15ed6d, 0x3f305a14, 0xbf3ed1cc, + 0x3f7e3d0b); + asm volatile("vfmsac.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // -0.73119336, -0.92786121, -0.16325396, 0.10834149, + // 0.34779423, 0.05038923, 0.47497734, 0.25409698, + // -0.24916913, -0.12429786, -0.99799657, -1.02885127, + // 0.58565408, -0.87874371, -0.74538875, -0.86431098 + VCMP_U32(11, v4, 0xbf3b2f7d, 0xbf6d8850, 0xbe272c0c, 0x3ddde223, 0x3eb21216, + 0x3d4e64ec, 0x3ef3303b, 0x3e821900, 0xbe7f2631, 0xbdfe8fe1, + 0xbf7f7cb4, 0xbf83b166, 0x3f15ed6d, 0xbf60f559, 0xbf3ed1cc, + 0xbf5d437b); + + VSET(16, e64, m8); + double dscalar_64; + // 0.0070730785066928 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3f7cf8a90de48000); + // -0.3542445595658672, 0.5861662785402695, + // 0.5713440701195280, 0.2841717566030781, 0.4022451154567073, + // -0.9804628417449213, -0.9703836833708208, + // 0.2288593539727362, 0.1806576644288407, 0.4892172254017777, + // 0.9508074316559227, -0.9022151172016701, + // -0.7929839752648156, 0.5513143449560454, + // 0.4823446191982377, -0.7486658065787619 + VLOAD_64(v16, 0xbfd6abf15f87c3c4, 0x3fe2c1dfc88b26c8, 0x3fe248735bfda932, + 0x3fd22fdebc43a768, 0x3fd9be624bf72ff4, 0xbfef5ff39c079aea, + 0xbfef0d621514ca4c, 0x3fcd4b43685929d8, 0x3fc71fca543f2eb8, + 0x3fdf4f55c3ef6448, 0x3fee6d03b4f830b0, 0xbfecdef23ccad0bc, + 0xbfe9601fee00766c, 0x3fe1a45dfb2cdc2e, 0x3fdedebbf736e98c, + 0xbfe7f511fe5c74b0); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.7603855538897846, -0.0491604902215765, + // -0.3714656077097227, -0.6096204185796581, + // -0.2818689596683441, 0.3527700521309320, + // 0.1176602936422064, -0.2049443830034134, + // -0.6926950556538125, 0.7269529331298494, + // -0.2107692441818434, 0.1746722346734710, + // -0.5298547863982788, 0.2397543330794352, + // -0.8347981409736787, -0.6198539479673024 + VLOAD_64(v8, 0xbfe8551415c9d6cc, 0xbfa92b9053839560, 0xbfd7c617af2cedf8, + 0xbfe38202ae18c034, 0xbfd20a241ae21e00, 0x3fd693c8d73a46c0, + 0x3fbe1efc293b2500, 0xbfca3b9e173fd0c8, 0xbfe62a8ed24449ee, + 0x3fe74332cc30c46e, 0xbfcafa7c9161bf78, 0x3fc65ba8e7b88cc0, + 0xbfe0f4920666b5a4, 0x3fceb0451dd34270, 0xbfeab6aa9747cb24, + 0xbfe3d5d7f25a1d14); + asm volatile("vfmsac.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + // -0.7603855538897846, 0.0533064903276678, + // -0.3714656077097227, 0.6116303877234964, + // -0.2818689596683441, -0.3597049427834890, 0.1176602936422064, + // 0.2065631231810536, -0.6926950556538125, -0.7234926612877562, + // -0.2107692441818434, -0.1810536730273635, + // -0.5298547863982788, -0.2358548434356951, + // -0.8347981409736787, 0.6145585759420944 + VCMP_U64(12, v8, 0xbfe8551415c9d6cc, 0x3fab4afd013e6639, 0xbfd7c617af2cedf8, + 0x3fe39279e4106415, 0xbfd20a241ae21e00, 0xbfd70567e15dbc9b, + 0x3fbe1efc293b2500, 0x3fca70a9114fa5b7, 0xbfe62a8ed24449ee, + 0xbfe726da14e40fb7, 0xbfcafa7c9161bf78, 0xbfc72cc44a3c91ef, + 0xbfe0f4920666b5a4, 0xbfce307dd3946ada, 0xbfeab6aa9747cb24, + 0x3fe3aa76bf24b95e); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsub.c new file mode 100644 index 000000000..d54334f0a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsub.c @@ -0,0 +1,453 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.6821, 0.7749, 0.0299, 0.0299, -0.5410, -0.1865, 0.1885, + // 0.0228, -0.5410, 0.1902, -0.9160, -0.3511, -0.9287, -0.9961, + // -0.6509, -0.3940 + VLOAD_16(v4, 0x3975, 0x3a33, 0x27a5, 0x27a5, 0xb854, 0xb1f8, 0x3208, 0x25d8, + 0xb854, 0x3216, 0xbb54, 0xb59e, 0xbb6e, 0xbbf8, 0xb935, 0xb64e); + // -0.9048, 0.1698, 0.2411, 0.2411, -0.6147, 0.9580, 0.5117, + // 0.8330, -0.6147, 0.5591, 0.7031, -0.1556, 0.2397, 0.8154, + // 0.1646, 0.2693 + VLOAD_16(v6, 0xbb3d, 0x316f, 0x33b7, 0x33b7, 0xb8eb, 0x3baa, 0x3818, 0x3aaa, + 0xb8eb, 0x3879, 0x39a0, 0xb0fb, 0x33ac, 0x3a86, 0x3144, 0x344f); + // -0.2292, 0.7578, -0.7427, -0.7427, 0.1119, 0.2939, -0.2983, + // -0.3608, 0.1119, -0.5562, -0.0629, -0.1968, -0.8638, -0.2700, + // -0.7690, 0.4216 + VLOAD_16(v2, 0xb356, 0x3a10, 0xb9f1, 0xb9f1, 0x2f29, 0x34b4, 0xb4c6, 0xb5c6, + 0x2f29, 0xb873, 0xac06, 0xb24c, 0xbae9, 0xb452, 0xba27, 0x36bf); + asm volatile("vfmsub.vv v2, v4, v6"); + // 0.7485, 0.4175, -0.2632, -0.2632, 0.5542, -1.0127, -0.5679, + // -0.8413, 0.5542, -0.6650, -0.6455, 0.2247, 0.5625, + // -0.5464, 0.3359, -0.4355 + VCMP_U16(1, v2, 0x39fd, 0x36ae, 0xb436, 0xb436, 0x386f, 0xbc0d, 0xb88b, + 0xbabb, 0x386f, 0xb952, 0xb92a, 0x3331, 0x3880, 0xb85f, 0x3560, + 0xb6f8); + + VSET(16, e32, m4); + // -0.74553698, -0.16736358, -0.11869104, -0.85860848, + // -0.66138542, -0.68386567, -0.45389724, -0.12761629, + // -0.95652348, 0.71083277, 0.24187960, 0.01609672, + // -0.58867335, -0.55222940, -0.67417240, -0.06725668 + VLOAD_32(v8, 0xbf3edb83, 0xbe2b615c, 0xbdf3144a, 0xbf5bcdc4, 0xbf29508e, + 0xbf2f11d2, 0xbee86538, 0xbe02add8, 0xbf74deb9, 0x3f35f923, + 0x3e77af49, 0x3c83dd45, 0xbf16b34c, 0xbf0d5ee8, 0xbf2c9690, + 0xbd89bddf); + // -0.17500710, -0.81537211, -0.31956050, 0.22762603, + // 0.49659184, -0.09389434, 0.05757815, -0.13087828, + // -0.73042232, -0.79662275, -0.96801740, 0.03017101, + // 0.70759267, -0.35606241, 0.18037270, -0.35372722 + VLOAD_32(v12, 0xbe333510, 0xbf50bc3a, 0xbea39d6f, 0x3e6916cc, 0x3efe4149, + 0xbdc04bad, 0x3d6bd711, 0xbe0604f5, 0xbf3afcf5, 0xbf4bef78, + 0xbf77cffd, 0x3cf72932, 0x3f3524cb, 0xbeb64dd0, 0x3e38b39f, + 0xbeb51bbc); + // 0.92876774, 0.18572871, -0.42147154, -0.79289448, + // 0.90907055, 0.07037155, 0.07339484, 0.17415307, + // -0.61978233, -0.04939311, 0.56138068, -0.51601994, + // -0.80625385, -0.31227911, 0.91474551, 0.78424871 + VLOAD_32(v4, 0x3f6dc3b9, 0x3e3e2fab, 0xbed7cb1e, 0xbf4afb22, 0x3f68b8d9, + 0x3d901ef6, 0x3d965009, 0x3e32552f, 0xbf1eaa0e, 0xbd4a506e, + 0x3f0fb6a5, 0xbf0419e2, 0xbf4e66a7, 0xbe9fe30c, 0x3f6a2cc3, + 0x3f48c486); + asm volatile("vfmsub.vv v4, v8, v12"); + // -0.51742357, 0.78428787, 0.36958539, 0.45315993, + // -1.09783781, 0.04576965, -0.09089187, + // 0.10865352, 1.32325864, 0.76151252, 1.10380387, + // -0.03847724, -0.23297250, 0.52851212, -0.79706889, 0.30098125 + VCMP_U32(2, v4, 0xbf0475df, 0x3f48c717, 0x3ebd3a4c, 0x3ee80493, 0xbf8c85f3, + 0x3d3b78f5, 0xbdba2584, 0x3dde85bc, 0x3fa9608a, 0x3f42f27c, + 0x3f8d4973, 0xbd1d9a4f, 0xbe6e9058, 0x3f074c92, 0xbf4c0cb5, + 0x3e9a1a37); + + VSET(16, e64, m8); + // 0.3304351537536074, -0.7528197595818080, -0.7530937950641439, + // -0.7994160811423281, 0.0797802827518117, + // -0.6361377214985149, 0.1748070414096887, 0.8251843575618585, + // 0.0699629848559165, -0.8195631240215655, -0.4843919596862658, + // -0.9206444585342115, 0.9791118581337512, 0.5143481050333210, + // 0.5856279779979670, -0.5536419150604011 + VLOAD_64(v16, 0x3fd525d97cb482ac, 0xbfe8171976e5f762, 0xbfe819582893df6e, + 0xbfe994d1088ce396, 0x3fb46c7b0948dfc0, 0xbfe45b3d7eb2d188, + 0x3fc66013befb8968, 0x3fea67e9069cc438, 0x3fb1e9181be2ff10, + 0xbfea39dc71d5c454, 0xbfdf00472253102c, 0xbfed75eb5e14bcf6, + 0x3fef54e26439ed98, 0x3fe0758a283c1602, 0x3fe2bd76e2a3f6ca, + 0xbfe1b76f3fdc22ac); + // 0.4156163852505284, -0.7806302214299039, -0.8826873649954201, + // 0.4810449553239884, 0.9337837820126544, -0.5377837408558668, + // -0.1434453653318362, 0.1199087999382409, + // -0.2601268153647489, -0.1813009025048657, + // -0.2492371358416354, 0.4131695659117063, + // -0.9085600854772706, -0.3952216110937696, 0.0817663443229741, + // 0.1439804529607418 + VLOAD_64(v24, 0x3fda997577954be0, 0xbfe8faec3ae9f10e, 0xbfec3ef992a7ed86, + 0x3fdec970c7c16d8c, 0x3fede18e86a8f206, 0xbfe135863f697cd2, + 0xbfc25c6af06e7710, 0x3fbeb257d63cc310, 0xbfd0a5eaf1337874, + 0xbfc734de337f3100, 0xbfcfe700a1b1bb78, 0x3fda715ec352c558, + 0xbfed12ec99b26d4c, 0xbfd94b4f95947db0, 0x3fb4eea39ec7d8a0, + 0x3fc26df3945d6540); + // -0.2185765241217579, -0.9587275435281344, 0.1216903502931035, + // 0.7653655177934149, -0.5928258331230032, 0.9123074434439491, + // 0.1569052366565831, 0.2566745252901157, -0.9113595614847214, + // 0.1628442001087833, 0.2337303194688813, 0.1926350000139823, + // -0.6653994610877216, -0.6745212179353777, 0.8748797125997727, + // -0.7324641634418565 + VLOAD_64(v8, 0xbfcbfa50c7635df8, 0xbfeeade562a749c2, 0x3fbf27194abf66e0, + 0x3fe87ddfd38d1514, 0xbfe2f86de1af9792, 0x3fed319f612fa6f6, + 0x3fc4157886016dd8, 0x3fd06d5afcf59780, 0xbfed29db86ef2934, + 0x3fc4d81428e7be98, 0x3fcdeae00719eac8, 0x3fc8a84380900070, + 0xbfe54af3cf84bab0, 0xbfe595ad856fb278, 0x3febff03bd3198ce, + 0xbfe77058af6f3156); + asm volatile("vfmsub.vv v8, v16, v24"); + // -0.4878417526056305, 1.5023792602532113, 0.7910431172705017, + // -1.0928904581998689, -0.9810795946017860, -0.0425694375227023, + // 0.1708735055334605, 0.0918950033157782, 0.1963653801662987, + // 0.0478398011349183, 0.1360200483560070, -0.5905179111943168, + // 0.2570595827304748, 0.0482829008439404, 0.4305876927582732, + // 0.2615424092003222 + VCMP_U64(3, v8, 0xbfdf38cc9d4420dd, 0x3ff809bed5cf9e94, 0x3fe95039a7cce2e7, + 0xbff17c7ab4814324, 0xbfef650108b2cdb1, 0xbfa5cba94bf030ac, + 0x3fc5df2edb027178, 0x3fb7866e51e83656, 0x3fc9228032f0c004, + 0x3fa87e755aa4ab1c, 0x3fc1691adda50ab1, 0xbfe2e585d18904b2, + 0x3fd073aa093cd9d6, 0x3fa8b88950295616, 0x3fdb8ebfae9d3d83, + 0x3fd0bd1c5f821364); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.6821, 0.7749, 0.0299, 0.0299, -0.5410, -0.1865, 0.1885, + // 0.0228, 0.7217, 0.1902, -0.9160, -0.3511, -0.9287, -0.9961, + // -0.6509, -0.3940 + VLOAD_16(v4, 0x3975, 0x3a33, 0x27a5, 0x27a5, 0xb854, 0xb1f8, 0x3208, 0x25d8, + 0x39c6, 0x3216, 0xbb54, 0xb59e, 0xbb6e, 0xbbf8, 0xb935, 0xb64e); + // -0.9048, 0.1698, 0.2411, 0.2411, -0.6147, 0.9580, 0.5117, + // 0.8330, -0.8584, 0.5591, 0.7031, -0.1556, 0.2397, 0.8154, + // 0.1646, 0.2693 + VLOAD_16(v6, 0xbb3d, 0x316f, 0x33b7, 0x33b7, 0xb8eb, 0x3baa, 0x3818, 0x3aaa, + 0xbade, 0x3879, 0x39a0, 0xb0fb, 0x33ac, 0x3a86, 0x3144, 0x344f); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.2292, 0.7578, -0.7427, -0.7427, 0.1119, 0.2939, -0.2983, + // -0.3608, 0.3169, -0.5562, -0.0629, -0.1968, -0.8638, -0.2700, + // -0.7690, 0.4216 + VLOAD_16(v2, 0xb356, 0x3a10, 0xb9f1, 0xb9f1, 0x2f29, 0x34b4, 0xb4c6, 0xb5c6, + 0x3512, 0xb873, 0xac06, 0xb24c, 0xbae9, 0xb452, 0xba27, 0x36bf); + asm volatile("vfmsub.vv v2, v4, v6, v0.t"); + // -0.2292, 0.4175, -0.7427, -0.2632, 0.1119, -1.0127, -0.2983, + // -0.8413, 0.3169, -0.6650, -0.0629, 0.2247, -0.8638, -0.5464, + // -0.7690, -0.4355 + VCMP_U16(4, v2, 0xb356, 0x36ae, 0xb9f1, 0xb436, 0x2f29, 0xbc0d, 0xb4c6, + 0xbabb, 0x3512, 0xb952, 0xac06, 0x3331, 0xbae9, 0xb85f, 0xba27, + 0xb6f8); + + VSET(16, e32, m4); + // -0.74553698, -0.16736358, -0.11869104, -0.85860848, + // -0.66138542, -0.68386567, -0.45389724, -0.12761629, + // -0.95652348, 0.71083277, 0.24187960, 0.01609672, + // -0.58867335, -0.55222940, -0.67417240, -0.06725668 + VLOAD_32(v8, 0xbf3edb83, 0xbe2b615c, 0xbdf3144a, 0xbf5bcdc4, 0xbf29508e, + 0xbf2f11d2, 0xbee86538, 0xbe02add8, 0xbf74deb9, 0x3f35f923, + 0x3e77af49, 0x3c83dd45, 0xbf16b34c, 0xbf0d5ee8, 0xbf2c9690, + 0xbd89bddf); + // -0.17500710, -0.81537211, -0.31956050, 0.22762603, + // 0.49659184, -0.09389434, 0.05757815, -0.13087828, + // -0.73042232, -0.79662275, -0.96801740, 0.03017101, + // 0.70759267, -0.35606241, 0.18037270, -0.35372722 + VLOAD_32(v12, 0xbe333510, 0xbf50bc3a, 0xbea39d6f, 0x3e6916cc, 0x3efe4149, + 0xbdc04bad, 0x3d6bd711, 0xbe0604f5, 0xbf3afcf5, 0xbf4bef78, + 0xbf77cffd, 0x3cf72932, 0x3f3524cb, 0xbeb64dd0, 0x3e38b39f, + 0xbeb51bbc); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.92876774, 0.18572871, -0.42147154, -0.79289448, + // 0.90907055, 0.07037155, 0.07339484, 0.17415307, + // -0.61978233, -0.04939311, 0.56138068, -0.51601994, + // -0.80625385, -0.31227911, 0.91474551, 0.78424871 + VLOAD_32(v4, 0x3f6dc3b9, 0x3e3e2fab, 0xbed7cb1e, 0xbf4afb22, 0x3f68b8d9, + 0x3d901ef6, 0x3d965009, 0x3e32552f, 0xbf1eaa0e, 0xbd4a506e, + 0x3f0fb6a5, 0xbf0419e2, 0xbf4e66a7, 0xbe9fe30c, 0x3f6a2cc3, + 0x3f48c486); + asm volatile("vfmsub.vv v4, v8, v12, v0.t"); + // 0.92876774, 0.78428787, -0.42147154, 0.45315993, + // 0.90907055, 0.04576965, 0.07339484, 0.10865352, + // -0.61978233, 0.76151252, 0.56138068, -0.03847724, + // -0.80625385, 0.52851212, 0.91474551, 0.30098125 + VCMP_U32(5, v4, 0x3f6dc3b9, 0x3f48c717, 0xbed7cb1e, 0x3ee80493, 0x3f68b8d9, + 0x3d3b78f5, 0x3d965009, 0x3dde85bc, 0xbf1eaa0e, 0x3f42f27c, + 0x3f0fb6a5, 0xbd1d9a4f, 0xbf4e66a7, 0x3f074c92, 0x3f6a2cc3, + 0x3e9a1a37); + + VSET(16, e64, m8); + // 0.3304351537536074, -0.7528197595818080, -0.7530937950641439, + // -0.7994160811423281, 0.0797802827518117, + // -0.6361377214985149, 0.1748070414096887, 0.8251843575618585, + // 0.0699629848559165, -0.8195631240215655, -0.4843919596862658, + // -0.9206444585342115, 0.9791118581337512, 0.5143481050333210, + // 0.5856279779979670, -0.5536419150604011 + VLOAD_64(v16, 0x3fd525d97cb482ac, 0xbfe8171976e5f762, 0xbfe819582893df6e, + 0xbfe994d1088ce396, 0x3fb46c7b0948dfc0, 0xbfe45b3d7eb2d188, + 0x3fc66013befb8968, 0x3fea67e9069cc438, 0x3fb1e9181be2ff10, + 0xbfea39dc71d5c454, 0xbfdf00472253102c, 0xbfed75eb5e14bcf6, + 0x3fef54e26439ed98, 0x3fe0758a283c1602, 0x3fe2bd76e2a3f6ca, + 0xbfe1b76f3fdc22ac); + // 0.4156163852505284, -0.7806302214299039, -0.8826873649954201, + // 0.4810449553239884, 0.9337837820126544, -0.5377837408558668, + // -0.1434453653318362, 0.1199087999382409, + // -0.2601268153647489, -0.1813009025048657, + // -0.2492371358416354, 0.4131695659117063, + // -0.9085600854772706, -0.3952216110937696, 0.0817663443229741, + // 0.1439804529607418 + VLOAD_64(v24, 0x3fda997577954be0, 0xbfe8faec3ae9f10e, 0xbfec3ef992a7ed86, + 0x3fdec970c7c16d8c, 0x3fede18e86a8f206, 0xbfe135863f697cd2, + 0xbfc25c6af06e7710, 0x3fbeb257d63cc310, 0xbfd0a5eaf1337874, + 0xbfc734de337f3100, 0xbfcfe700a1b1bb78, 0x3fda715ec352c558, + 0xbfed12ec99b26d4c, 0xbfd94b4f95947db0, 0x3fb4eea39ec7d8a0, + 0x3fc26df3945d6540); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.2185765241217579, -0.9587275435281344, 0.1216903502931035, + // 0.7653655177934149, -0.5928258331230032, 0.9123074434439491, + // 0.1569052366565831, 0.2566745252901157, -0.9113595614847214, + // 0.1628442001087833, 0.2337303194688813, 0.1926350000139823, + // -0.6653994610877216, -0.6745212179353777, 0.8748797125997727, + // -0.7324641634418565 + VLOAD_64(v8, 0xbfcbfa50c7635df8, 0xbfeeade562a749c2, 0x3fbf27194abf66e0, + 0x3fe87ddfd38d1514, 0xbfe2f86de1af9792, 0x3fed319f612fa6f6, + 0x3fc4157886016dd8, 0x3fd06d5afcf59780, 0xbfed29db86ef2934, + 0x3fc4d81428e7be98, 0x3fcdeae00719eac8, 0x3fc8a84380900070, + 0xbfe54af3cf84bab0, 0xbfe595ad856fb278, 0x3febff03bd3198ce, + 0xbfe77058af6f3156); + asm volatile("vfmsub.vv v8, v16, v24, v0.t"); + // -0.2185765241217579, 1.5023792602532113, 0.1216903502931035, + // -1.0928904581998689, -0.5928258331230032, -0.0425694375227023, + // 0.1569052366565831, 0.0918950033157782, -0.9113595614847214, + // 0.0478398011349183, 0.2337303194688813, -0.5905179111943168, + // -0.6653994610877216, 0.0482829008439404, 0.8748797125997727, + // 0.2615424092003222 + VCMP_U64(6, v8, 0xbfcbfa50c7635df8, 0x3ff809bed5cf9e94, 0x3fbf27194abf66e0, + 0xbff17c7ab4814324, 0xbfe2f86de1af9792, 0xbfa5cba94bf030ac, + 0x3fc4157886016dd8, 0x3fb7866e51e83656, 0xbfed29db86ef2934, + 0x3fa87e755aa4ab1c, 0x3fcdeae00719eac8, 0xbfe2e585d18904b2, + 0xbfe54af3cf84bab0, 0x3fa8b88950295616, 0x3febff03bd3198ce, + 0x3fd0bd1c5f821364); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.1489 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x30c4); + // 0.8530, -0.3298, -0.1814, -0.2385, 0.9946, 0.6553, 0.8711, + // -0.6377, 0.4224, -0.1814, 0.7026, 0.2852, 0.0553, 0.7349, + // -0.8105, 0.0033 + VLOAD_16(v4, 0x3ad3, 0xb547, 0xb1ce, 0xb3a2, 0x3bf5, 0x393e, 0x3af8, 0xb91a, + 0x36c2, 0xb1ce, 0x399f, 0x3490, 0x2b15, 0x39e1, 0xba7c, 0x1abd); + // -0.2338, -0.2512, 0.0069, 0.0613, -0.1733, 0.8560, -0.2766, + // -0.0028, -0.1803, 0.0069, 0.7856, -0.0243, -0.1974, 0.6416, + // 0.7109, 0.0817 + VLOAD_16(v2, 0xb37b, 0xb405, 0x1f06, 0x2bd8, 0xb18c, 0x3ad9, 0xb46d, 0x99d2, + 0xb1c5, 0x1f06, 0x3a49, 0xa639, 0xb251, 0x3922, 0x39b0, 0x2d3a); + asm volatile("vfmsub.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.8877, 0.2925, 0.1824, 0.2477, -1.0205, -0.5278, -0.9121, + // 0.6372, -0.4492, 0.1824, -0.5854, -0.2888, -0.0847, -0.6392, + // 0.9165, 0.0089 + VCMP_U16(7, v2, 0xbb1a, 0x34ae, 0x31d6, 0x33ed, 0xbc15, 0xb839, 0xbb4c, + 0x3919, 0xb730, 0x31d6, 0xb8af, 0xb49f, 0xad6c, 0xb91d, 0x3b55, + 0x208b); + + VSET(16, e32, m4); + double dscalar_32; + // -0.12857932 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe03aa4c); + // 0.31050768, -0.13843875, 0.23405042, -0.30545133, + // -0.28880060, 0.46233574, -0.51105869, -0.11776974, + // -0.39969075, 0.51141965, 0.88750082, -0.22310242, + // 0.60111052, 0.58466393, -0.14306845, -0.01826003 + VLOAD_32(v8, 0x3e9efadd, 0xbe0dc2e3, 0x3e6faaea, 0xbe9c641e, 0xbe93ddac, + 0x3eecb745, 0xbf02d4be, 0xbdf13143, 0xbecca444, 0x3f02ec66, + 0x3f633341, 0xbe6474f6, 0x3f19e261, 0x3f15ac89, 0xbe128089, + 0xbc95960e); + // -0.51789892, 0.77328473, -0.88433731, 0.40865302, + // -0.50454420, 0.30827177, -0.25503114, 0.07736996, + // 0.20596179, -0.42633566, 0.89622146, 0.03779412, 0.50878429, + // 0.67896879, -0.17667305, 0.06984760 + VLOAD_32(v4, 0xbf049506, 0x3f45f5fd, 0xbf6263ee, 0x3ed13af8, 0xbf0129cf, + 0x3e9dd5cc, 0xbe829371, 0x3d9e7424, 0x3e52e7a6, 0xbeda48ab, + 0x3f656ec5, 0x3d1ace01, 0x3f023fb0, 0x3f2dd0e6, 0xbe34e9c8, + 0x3d8f0c42); + asm volatile("vfmsub.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // -0.24391660, 0.03901032, -0.12034293, 0.25290701, + // 0.35367453, -0.50197309, 0.54385042, 0.10782156, 0.37320831, + // -0.45660171, -1.00273633, 0.21824288, -0.66652966, + // -0.67196524, 0.16578496, 0.00927907 + VCMP_U32(8, v4, 0xbe79c546, 0x3d1fc94a, 0xbdf6765b, 0x3e817d07, 0x3eb514d5, + 0xbf00814f, 0x3f0b39c8, 0x3ddcd18d, 0x3ebf1529, 0xbee9c7b3, + 0xbf8059aa, 0x3e5f7b10, 0xbf2aa1b0, 0xbf2c05eb, 0x3e29c388, + 0x3c18073f); + + VSET(16, e64, m8); + double dscalar_64; + // -0.6953502965951812 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfe6404f43e47c8c); + // -0.8873182146436771, 0.3913246153259273, 0.8941416868753180, + // 0.2630283463166789, -0.8096670634564123, 0.6449486037845993, + // 0.8384405697279889, -0.9956067461953679, -0.8936777193492917, + // -0.4464070291333477, 0.5599998966835931, + // -0.3406088963725078, 0.4908382567748615, + // -0.5194254665571632, 0.0909287222245825, 0.5893410930389467 + VLOAD_64(v16, 0xbfec64e92b21453c, 0x3fd90b76663c74f4, 0x3fec9ccf06e3d51a, + 0x3fd0d574d8567864, 0xbfe9e8cae6c6325c, 0x3fe4a36b411b6206, + 0x3fead4815153e1da, 0xbfefdc02add2c126, 0xbfec990204389c42, + 0xbfdc91eec9b5438c, 0x3fe1eb84e7409f04, 0xbfd5cc8941a96178, + 0x3fdf69e4dd1e50c8, 0xbfe09f2227f25264, 0x3fb7471ad038be10, + 0x3fe2dbe1da195142); + // -0.4387964890891065, -0.2425720412460179, + // -0.8909058709916624, -0.7961584351708695, 0.0353694444236163, + // 0.2992862865812480, -0.4186756300648600, -0.5421957392048740, + // 0.3780444269462682, -0.6731508364205383, 0.1263808806166760, + // 0.8571806635726140, 0.5149747658358419, 0.3530123248386567, + // -0.3756405874818076, -0.4529815298587780 + VLOAD_64(v8, 0xbfdc153dde8f3078, 0xbfcf0c99c409ad98, 0xbfec824d0777279c, + 0xbfe97a21412fca1a, 0x3fa21bf19e277c80, 0x3fd32781ab407ee0, + 0xbfdacb94deb0b06c, 0xbfe159aae0fd4b9a, 0x3fd831e1408ad588, + 0xbfe58a739f7670b4, 0x3fc02d3faa8b4d88, 0x3feb6e062499dac6, + 0x3fe07aac5c30f764, 0x3fd697c1019115dc, 0xbfd80a7ed19236bc, + 0xbfdcfda63e1bdf38); + asm volatile("vfmsub.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // 1.1924354834767115, -0.2226520744998102, -0.2746500252428773, + // 0.2905806577161405, 0.7850729097860438, -0.8530574119257405, + // -0.5473143461852141, 1.3726227142641205, 0.6308044149460488, + // 0.9144826628916634, -0.6478788795043589, -0.2554319322783636, + // -0.8489261129378481, 0.2739582417788488, 0.1702730716940806, + // -0.2743602518995064 + VCMP_U64(9, v8, 0x3ff314373ac1f573, 0xbfcc7fdcf92e7eaa, 0xbfd193ddb310e0ff, + 0x3fd298df9d6f6c70, 0x3fe91f5139103634, 0xbfeb4c3f0eba9b49, + 0xbfe18399602fe862, 0x3ff5f6433c382dac, 0x3fe42f8cbd8bb3c3, + 0x3fed4371253c1e34, 0xbfe4bb6c7ce7b0ea, 0xbfd058ff2cdf5691, + 0xbfeb2a6718793b11, 0x3fd18888263a3a6e, 0x3fc5cb820d286398, + 0xbfd18f1e4d4ec3d2); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.1489 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x30c4); + // 0.8530, -0.3298, -0.1814, -0.2385, 0.9946, 0.6553, 0.8711, + // -0.6377, 0.4224, -0.1814, 0.7026, 0.2852, 0.0553, + // 0.7349, -0.8105, 0.0033 + VLOAD_16(v4, 0x3ad3, 0xb547, 0xb1ce, 0xb3a2, 0x3bf5, 0x393e, 0x3af8, 0xb91a, + 0x36c2, 0xb1ce, 0x399f, 0x3490, 0x2b15, 0x39e1, 0xba7c, 0x1abd); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.2338, -0.2512, 0.0069, 0.0613, -0.1733, 0.8560, + // -0.2766, -0.0028, -0.1803, 0.0069, 0.7856, -0.0243, + // -0.1974, 0.6416, 0.7109, 0.0817 + VLOAD_16(v2, 0xb37b, 0xb405, 0x1f06, 0x2bd8, 0xb18c, 0x3ad9, 0xb46d, 0x99d2, + 0xb1c5, 0x1f06, 0x3a49, 0xa639, 0xb251, 0x3922, 0x39b0, 0x2d3a); + asm volatile("vfmsub.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // -0.2338, 0.2925, 0.0069, 0.2477, -0.1733, -0.5278, + // -0.2766, 0.6372, -0.1803, 0.1824, 0.7856, -0.2888, + // -0.1974, -0.6392, 0.7109, 0.0089 + VCMP_U16(10, v2, 0xb37b, 0x34ae, 0x1f06, 0x33ed, 0xb18c, 0xb839, 0xb46d, + 0x3919, 0xb1c5, 0x31d6, 0x3a49, 0xb49f, 0xb251, 0xb91d, 0x39b0, + 0x208b); + + VSET(16, e32, m4); + double dscalar_32; + // -0.12857932 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe03aa4c); + // 0.31050768, -0.13843875, 0.23405042, -0.30545133, + // -0.28880060, 0.46233574, -0.51105869, -0.11776974, + // -0.39969075, 0.51141965, 0.88750082, -0.22310242, + // 0.60111052, 0.58466393, -0.14306845, -0.01826003 + VLOAD_32(v8, 0x3e9efadd, 0xbe0dc2e3, 0x3e6faaea, 0xbe9c641e, 0xbe93ddac, + 0x3eecb745, 0xbf02d4be, 0xbdf13143, 0xbecca444, 0x3f02ec66, + 0x3f633341, 0xbe6474f6, 0x3f19e261, 0x3f15ac89, 0xbe128089, + 0xbc95960e); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.51789892, 0.77328473, -0.88433731, 0.40865302, + // -0.50454420, 0.30827177, -0.25503114, 0.07736996, + // 0.20596179, -0.42633566, 0.89622146, 0.03779412, + // 0.50878429, 0.67896879, -0.17667305, 0.06984760 + VLOAD_32(v4, 0xbf049506, 0x3f45f5fd, 0xbf6263ee, 0x3ed13af8, 0xbf0129cf, + 0x3e9dd5cc, 0xbe829371, 0x3d9e7424, 0x3e52e7a6, 0xbeda48ab, + 0x3f656ec5, 0x3d1ace01, 0x3f023fb0, 0x3f2dd0e6, 0xbe34e9c8, + 0x3d8f0c42); + asm volatile("vfmsub.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // -0.51789892, 0.03901032, -0.88433731, 0.25290701, + // -0.50454420, -0.50197309, -0.25503114, 0.10782156, + // 0.20596179, -0.45660171, 0.89622146, 0.21824288, + // 0.50878429, -0.67196524, -0.17667305, 0.00927907 + VCMP_U32(11, v4, 0xbf049506, 0x3d1fc94a, 0xbf6263ee, 0x3e817d07, 0xbf0129cf, + 0xbf00814f, 0xbe829371, 0x3ddcd18d, 0x3e52e7a6, 0xbee9c7b3, + 0x3f656ec5, 0x3e5f7b10, 0x3f023fb0, 0xbf2c05eb, 0xbe34e9c8, + 0x3c18073f); + + VSET(16, e64, m8); + double dscalar_64; + // -0.6953502965951812 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfe6404f43e47c8c); + // -0.8873182146436771, 0.3913246153259273, + // 0.8941416868753180, 0.2630283463166789, + // -0.8096670634564123, 0.6449486037845993, + // 0.8384405697279889, -0.9956067461953679, + // -0.8936777193492917, -0.4464070291333477, + // 0.5599998966835931, -0.3406088963725078, 0.4908382567748615, + // -0.5194254665571632, 0.0909287222245825, 0.5893410930389467 + VLOAD_64(v16, 0xbfec64e92b21453c, 0x3fd90b76663c74f4, 0x3fec9ccf06e3d51a, + 0x3fd0d574d8567864, 0xbfe9e8cae6c6325c, 0x3fe4a36b411b6206, + 0x3fead4815153e1da, 0xbfefdc02add2c126, 0xbfec990204389c42, + 0xbfdc91eec9b5438c, 0x3fe1eb84e7409f04, 0xbfd5cc8941a96178, + 0x3fdf69e4dd1e50c8, 0xbfe09f2227f25264, 0x3fb7471ad038be10, + 0x3fe2dbe1da195142); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.4387964890891065, -0.2425720412460179, + // -0.8909058709916624, -0.7961584351708695, + // 0.0353694444236163, 0.2992862865812480, + // -0.4186756300648600, -0.5421957392048740, + // 0.3780444269462682, -0.6731508364205383, 0.1263808806166760, + // 0.8571806635726140, 0.5149747658358419, 0.3530123248386567, + // -0.3756405874818076, -0.4529815298587780 + VLOAD_64(v8, 0xbfdc153dde8f3078, 0xbfcf0c99c409ad98, 0xbfec824d0777279c, + 0xbfe97a21412fca1a, 0x3fa21bf19e277c80, 0x3fd32781ab407ee0, + 0xbfdacb94deb0b06c, 0xbfe159aae0fd4b9a, 0x3fd831e1408ad588, + 0xbfe58a739f7670b4, 0x3fc02d3faa8b4d88, 0x3feb6e062499dac6, + 0x3fe07aac5c30f764, 0x3fd697c1019115dc, 0xbfd80a7ed19236bc, + 0xbfdcfda63e1bdf38); + asm volatile("vfmsub.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + // -0.4387964890891065, -0.2226520744998102, + // -0.8909058709916624, 0.2905806577161405, 0.0353694444236163, + // -0.8530574119257405, + // -0.4186756300648600, 1.3726227142641205, 0.3780444269462682, + // 0.9144826628916634, 0.1263808806166760, -0.2554319322783636, + // 0.5149747658358419, 0.2739582417788488, -0.3756405874818076, + // -0.2743602518995064 + VCMP_U64(12, v8, 0xbfdc153dde8f3078, 0xbfcc7fdcf92e7eaa, 0xbfec824d0777279c, + 0x3fd298df9d6f6c70, 0x3fa21bf19e277c80, 0xbfeb4c3f0eba9b49, + 0xbfdacb94deb0b06c, 0x3ff5f6433c382dac, 0x3fd831e1408ad588, + 0x3fed4371253c1e34, 0x3fc02d3faa8b4d88, 0xbfd058ff2cdf5691, + 0x3fe07aac5c30f764, 0x3fd18888263a3a6e, 0xbfd80a7ed19236bc, + 0xbfd18f1e4d4ec3d2); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmul.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmul.c new file mode 100644 index 000000000..21f397e90 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmul.c @@ -0,0 +1,350 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.5522, 0.0462, -0.4255, 0.4131, 0.4658, 0.3931, -0.4868, + // 0.5503, 0.3516, -0.3025, -0.2155, 0.9307, 0.9775, 0.8394, + // 0.7446, 0.3909 + VLOAD_16(v4, 0xb86b, 0x29e9, 0xb6cf, 0x369c, 0x3774, 0x364a, 0xb7ca, 0x3867, + 0x35a0, 0xb4d7, 0xb2e5, 0x3b72, 0x3bd2, 0x3ab7, 0x39f5, 0x3641); + // 0.8247, 0.4902, 0.5796, -0.9561, -0.7676, 0.1672, -0.1094, + // -0.9395, 0.4885, -0.2739, 0.8691, -0.3394, -0.8032, + // -0.4922, 0.4456, 0.2050 + VLOAD_16(v6, 0x3a99, 0x37d8, 0x38a3, 0xbba6, 0xba24, 0x315a, 0xaf01, 0xbb84, + 0x37d1, 0xb462, 0x3af4, 0xb56e, 0xba6d, 0xb7e0, 0x3721, 0x328f); + asm volatile("vfmul.vv v2, v4, v6"); + // -0.4553, 0.0226, -0.2466, -0.3950, -0.3577, 0.0657, 0.0533, + // -0.5171, 0.1718, 0.0829, -0.1873, -0.3159, -0.7852, -0.4131, + // 0.3318, 0.0801 + VCMP_U16(1, v2, 0xb749, 0x25cb, 0xb3e4, 0xb652, 0xb5b9, 0x2c35, 0x2ad2, + 0xb823, 0x317f, 0x2d4e, 0xb1fe, 0xb50e, 0xba48, 0xb69c, 0x354f, + 0x2d21); + + VSET(16, e32, m4); + // 0.48805356, 0.30350628, -0.10483003, 0.61108905, + // -0.09161828, 0.83353645, -0.55006021, -0.78635991, + // 0.49253011, -0.03583150, -0.77662903, 0.57397723, + // -0.54674339, 0.86299890, 0.65402901, -0.16832402 + VLOAD_32(v8, 0x3ef9e228, 0x3e9b652d, 0xbdd6b121, 0x3f1c7055, 0xbdbba25d, + 0x3f5562a5, 0xbf0cd0bf, 0xbf494ee2, 0x3efc2ce8, 0xbd12c40e, + 0xbf46d129, 0x3f12f02c, 0xbf0bf760, 0x3f5ced7f, 0x3f276e72, + 0xbe2c5d22); + // 0.87142652, -0.32756421, 0.76706660, -0.54420376, + // -0.99424285, 0.31885657, 0.18092929, -0.68290263, + // 0.45391774, -0.45151946, -0.08929581, 0.80524033, + // 0.81978256, -0.28325567, -0.53026456, -0.21847765 + VLOAD_32(v12, 0x3f5f15cf, 0xbea7b67f, 0x3f445e7a, 0xbf0b50f0, 0xbf7e86b3, + 0x3ea3412b, 0x3e394587, 0xbf2ed2b5, 0x3ee867e8, 0xbee72d8f, + 0xbdb6e0b9, 0x3f4e243b, 0x3f51dd45, 0xbe9106e3, 0xbf07bf6b, + 0xbe5fb89b); + asm volatile("vfmul.vv v4, v8, v12"); + // 0.42530280, -0.09941780, -0.08041162, -0.33255696, + // 0.09109081, 0.26577857, -0.09952200, 0.53700727, + // 0.22356816, 0.01617862, 0.06934972, 0.46218961, + // -0.44821069, -0.24444933, -0.34680840, 0.03677504 + VCMP_U32(2, v4, 0x3ed9c14a, 0xbdcb9b8f, 0xbda4aed9, 0xbeaa44e8, 0x3dba8dd2, + 0x3e881421, 0xbdcbd231, 0x3f09794f, 0x3e64ef0d, 0x3c848907, + 0x3d8e073a, 0x3eeca41e, 0xbee57bdf, 0xbe7a50ed, 0xbeb190df, + 0x3d16a16c); + + VSET(16, e64, m8); + // -0.7493892241714462, 0.7026559207451004, 0.6475697152132245, + // 0.0771197585157644, -0.2238692303359540, 0.8998213782649329, + // -0.9446193329247832, 0.8596730101791072, -0.0254417293392082, + // 0.1965035124326171, -0.4709662077579637, -0.2875069600640039, + // -0.4671574223295827, 0.3105385724706418, 0.1703390668980564, + // 0.1487690137320270 + VLOAD_64(v16, 0xbfe7faff1c39514c, 0x3fe67c2844fe1c76, 0x3fe4b8e41f971110, + 0x3fb3be1ed8b35c30, 0xbfcca7bf376fd290, 0x3feccb5633fc770c, + 0xbfee3a5252c299d8, 0x3feb8270f8ff23f8, 0xbf9a0d658ddcc1c0, + 0x3fc92706efb93e80, 0xbfde244f72f5dcb4, 0xbfd2668397b639c0, + 0xbfdde5e83ebf4f58, 0x3fd3dfdd2d3a1b90, 0x3fc5cdaba8c776a8, + 0x3fc30adcf05190c8); + // 0.6932733143704406, -0.2687556191190688, 0.2528829246597466, + // 0.7287253758892476, -0.5682564905667424, 0.0092122398882537, + // -0.5132517188156311, -0.0178020357545405, 0.0816988280997786, + // 0.6297663200296084, 0.3637508978200528, 0.6003193921430929, + // -0.9089688764960682, 0.1595578103621622, 0.2113473996516566, + // -0.4586515678904381 + VLOAD_64(v24, 0x3fe62f4b848d2362, 0xbfd1334ac4aee374, 0x3fd02f3bdcc85930, + 0x3fe751b7e126b540, 0xbfe22f283c572a1e, 0x3f82dddde857f980, + 0xbfe06c8ede5db9be, 0xbf923ab26578ce40, 0x3fb4ea36e2cf6110, + 0x3fe4270bb294c832, 0x3fd747b1d881c6e4, 0x3fe335d1038d1808, + 0xbfed1645e5b43d3e, 0x3fc46c63eca9d670, 0x3fcb0d6e7ccc9be0, + 0xbfdd5a8c1b164ebc); + asm volatile("vfmul.vv v8, v16, v24"); + // -0.5195315511948315, -0.1888427270075288, 0.1637593235041994, + // 0.0561991250128884, 0.1272151431765869, 0.0082893703931556, + // 0.4848274962501199, -0.0153039296644220, -0.0020785594718451, + // 0.1237512938975817, -0.1713143809148648, -0.1725960035025313, + // 0.4246315573217200, 0.0495488546564072, 0.0360007188479938, + // -0.0682331414017083 + VCMP_U64(3, v8, 0xbfe0a000a1b3e706, 0xbfc82bff9c4ada77, 0x3fc4f610c56ecca8, + 0x3facc621b7fd0401, 0x3fc04895f7bfec49, 0x3f80fa0475f1bbe1, + 0x3fdf0769e826220a, 0xbf8f57aaab459580, 0xbf61070e1e8a29ae, + 0x3fbfae2a3020b759, 0xbfc5eda12fae9203, 0xbfc617a0373b59a7, + 0x3fdb2d29d6e2f72e, 0x3fa95e77ac9b67ce, 0x3fa26eafac2b53dd, + 0xbfb177ba26d2dcbe); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.5522, 0.0462, -0.4255, 0.4131, 0.4658, 0.3931, -0.4868, + // 0.5503, 0.3516, -0.3025, -0.2155, 0.9307, 0.9775, 0.8394, + // 0.7446, 0.3909 + VLOAD_16(v4, 0xb86b, 0x29e9, 0xb6cf, 0x369c, 0x3774, 0x364a, 0xb7ca, 0x3867, + 0x35a0, 0xb4d7, 0xb2e5, 0x3b72, 0x3bd2, 0x3ab7, 0x39f5, 0x3641); + // 0.8247, 0.4902, 0.5796, -0.9561, -0.7676, 0.1672, -0.1094, + // -0.9395, 0.4885, -0.2739, 0.8691, -0.3394, -0.8032, + // -0.4922, 0.4456, 0.2050 + VLOAD_16(v6, 0x3a99, 0x37d8, 0x38a3, 0xbba6, 0xba24, 0x315a, 0xaf01, 0xbb84, + 0x37d1, 0xb462, 0x3af4, 0xb56e, 0xba6d, 0xb7e0, 0x3721, 0x328f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfmul.vv v2, v4, v6, v0.t"); + // 0.0000, 0.0226, 0.0000, -0.3950, 0.0000, 0.0657, 0.0000, + // -0.5171, 0.0000, 0.0829, 0.0000, -0.3159, 0.0000, + // -0.4131, 0.0000, 0.0801 + VCMP_U16(4, v2, 0x0, 0x25cb, 0x0, 0xb652, 0x0, 0x2c35, 0x0, 0xb823, 0x0, + 0x2d4e, 0x0, 0xb50e, 0x0, 0xb69c, 0x0, 0x2d21); + + VSET(16, e32, m4); + // 0.48805356, 0.30350628, -0.10483003, 0.61108905, + // -0.09161828, 0.83353645, -0.55006021, -0.78635991, + // 0.49253011, -0.03583150, -0.77662903, 0.57397723, + // -0.54674339, 0.86299890, 0.65402901, -0.16832402 + VLOAD_32(v8, 0x3ef9e228, 0x3e9b652d, 0xbdd6b121, 0x3f1c7055, 0xbdbba25d, + 0x3f5562a5, 0xbf0cd0bf, 0xbf494ee2, 0x3efc2ce8, 0xbd12c40e, + 0xbf46d129, 0x3f12f02c, 0xbf0bf760, 0x3f5ced7f, 0x3f276e72, + 0xbe2c5d22); + // 0.87142652, -0.32756421, 0.76706660, -0.54420376, + // -0.99424285, 0.31885657, 0.18092929, -0.68290263, + // 0.45391774, -0.45151946, -0.08929581, 0.80524033, + // 0.81978256, -0.28325567, -0.53026456, -0.21847765 + VLOAD_32(v12, 0x3f5f15cf, 0xbea7b67f, 0x3f445e7a, 0xbf0b50f0, 0xbf7e86b3, + 0x3ea3412b, 0x3e394587, 0xbf2ed2b5, 0x3ee867e8, 0xbee72d8f, + 0xbdb6e0b9, 0x3f4e243b, 0x3f51dd45, 0xbe9106e3, 0xbf07bf6b, + 0xbe5fb89b); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfmul.vv v4, v8, v12, v0.t"); + // 0.00000000, -0.09941780, 0.00000000, -0.33255696, + // 0.00000000, 0.26577857, 0.00000000, 0.53700727, + // 0.00000000, 0.01617862, 0.00000000, 0.46218961, + // 0.00000000, -0.24444933, 0.00000000, 0.03677504 + VCMP_U32(5, v4, 0x0, 0xbdcb9b8f, 0x0, 0xbeaa44e8, 0x0, 0x3e881421, 0x0, + 0x3f09794f, 0x0, 0x3c848907, 0x0, 0x3eeca41e, 0x0, 0xbe7a50ed, 0x0, + 0x3d16a16c); + + VSET(16, e64, m8); + // -0.7493892241714462, 0.7026559207451004, 0.6475697152132245, + // 0.0771197585157644, -0.2238692303359540, 0.8998213782649329, + // -0.9446193329247832, 0.8596730101791072, -0.0254417293392082, + // 0.1965035124326171, -0.4709662077579637, -0.2875069600640039, + // -0.4671574223295827, 0.3105385724706418, 0.1703390668980564, + // 0.1487690137320270 + VLOAD_64(v16, 0xbfe7faff1c39514c, 0x3fe67c2844fe1c76, 0x3fe4b8e41f971110, + 0x3fb3be1ed8b35c30, 0xbfcca7bf376fd290, 0x3feccb5633fc770c, + 0xbfee3a5252c299d8, 0x3feb8270f8ff23f8, 0xbf9a0d658ddcc1c0, + 0x3fc92706efb93e80, 0xbfde244f72f5dcb4, 0xbfd2668397b639c0, + 0xbfdde5e83ebf4f58, 0x3fd3dfdd2d3a1b90, 0x3fc5cdaba8c776a8, + 0x3fc30adcf05190c8); + // 0.6932733143704406, -0.2687556191190688, 0.2528829246597466, + // 0.7287253758892476, -0.5682564905667424, 0.0092122398882537, + // -0.5132517188156311, -0.0178020357545405, 0.0816988280997786, + // 0.6297663200296084, 0.3637508978200528, 0.6003193921430929, + // -0.9089688764960682, 0.1595578103621622, 0.2113473996516566, + // -0.4586515678904381 + VLOAD_64(v24, 0x3fe62f4b848d2362, 0xbfd1334ac4aee374, 0x3fd02f3bdcc85930, + 0x3fe751b7e126b540, 0xbfe22f283c572a1e, 0x3f82dddde857f980, + 0xbfe06c8ede5db9be, 0xbf923ab26578ce40, 0x3fb4ea36e2cf6110, + 0x3fe4270bb294c832, 0x3fd747b1d881c6e4, 0x3fe335d1038d1808, + 0xbfed1645e5b43d3e, 0x3fc46c63eca9d670, 0x3fcb0d6e7ccc9be0, + 0xbfdd5a8c1b164ebc); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfmul.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, -0.1888427270075288, 0.0000000000000000, + // 0.0561991250128884, 0.0000000000000000, 0.0082893703931556, + // 0.0000000000000000, -0.0153039296644220, 0.0000000000000000, + // 0.1237512938975817, 0.0000000000000000, -0.1725960035025313, + // 0.0000000000000000, 0.0495488546564072, 0.0000000000000000, + // -0.0682331414017083 + VCMP_U64(6, v8, 0x0, 0xbfc82bff9c4ada77, 0x0, 0x3facc621b7fd0401, 0x0, + 0x3f80fa0475f1bbe1, 0x0, 0xbf8f57aaab459580, 0x0, 0x3fbfae2a3020b759, + 0x0, 0xbfc617a0373b59a7, 0x0, 0x3fa95e77ac9b67ce, 0x0, + 0xbfb177ba26d2dcbe); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + // -0.8423, 0.9531, 0.3889, -0.3704, -0.9731, -0.4636, -0.4797, + // -0.5903, 0.2959, 0.4685, -0.3660, 0.3167, -0.9766, 0.0052, + // -0.6489, -0.0474 + VLOAD_16(v4, 0xbabd, 0x3ba0, 0x3639, 0xb5ed, 0xbbc9, 0xb76b, 0xb7ad, 0xb8b9, + 0x34bc, 0x377f, 0xb5db, 0x3511, 0xbbd0, 0x1d48, 0xb931, 0xaa11); + double dscalar_16; + // 0.2971 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x34c1); + asm volatile("vfmul.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // -0.2502, 0.2832, 0.1155, -0.1100, -0.2891, -0.1377, -0.1426, + // -0.1754, 0.0879, 0.1392, -0.1088, 0.0941, -0.2900, 0.0015, + // -0.1927, -0.0141 + VCMP_U16(7, v2, 0xb401, 0x3488, 0x2f65, 0xaf0b, 0xb4a0, 0xb068, 0xb090, + 0xb19d, 0x2da0, 0x3074, 0xaef6, 0x2e05, 0xb4a4, 0x1647, 0xb22b, + 0xa336); + + VSET(16, e32, m4); + // -0.11454447, -0.46133029, 0.06972761, 0.20429718, + // -0.97134608, -0.95719630, -0.11250938, 0.48455358, + // 0.59656250, 0.46462929, 0.13447689, -0.32035729, 0.75118428, + // 0.90634471, 0.73552424, -0.53555632 + VLOAD_32(v8, 0xbdea964b, 0xbeec337c, 0x3d8ecd5a, 0x3e513348, 0xbf78aa23, + 0xbf750ad1, 0xbde66b52, 0x3ef81768, 0x3f18b852, 0x3eede3e4, + 0x3e09b44f, 0xbea405df, 0x3f404d9d, 0x3f680635, 0x3f3c4b51, + 0xbf091a38); + double dscalar_32; + // 0.94017404 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f70af3f); + asm volatile("vfmul.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // -0.10769174, -0.43373078, 0.06555609, 0.19207491, + // -0.91323435, -0.89993113, -0.10577840, 0.45556471, + // 0.56087255, 0.43683240, 0.12643167, -0.30119160, 0.70624399, + // 0.85212177, 0.69152081, -0.50351614 + VCMP_U32(8, v4, 0xbddc8d7d, 0xbede11f6, 0x3d864246, 0x3e44af49, 0xbf69c9ba, + 0xbf6661e3, 0xbdd8a259, 0x3ee93fc7, 0x3f0f9558, 0x3edfa87f, + 0x3e01774e, 0xbe9a35c9, 0x3f34cc68, 0x3f5a24a7, 0x3f310782, + 0xbf00e66f); + + VSET(16, e64, m8); + // -0.3344965024132001, -0.2497404698970234, 0.3402338726452623, + // -0.5885400342262450, -0.7135559920290824, 0.1114442794173345, + // -0.9541638058007114, 0.1021679621951177, + // -0.1364702451627324, -0.9351295729000717, + // -0.2701320849999789, 0.3582375365191053, + // -0.6137661452178358, 0.6195430637830983, 0.2731869234335833, + // -0.4075196944877124 + VLOAD_64(v16, 0xbfd56864049f6dd8, 0xbfcff77ee7590278, 0x3fd5c6644b002e60, + 0xbfe2d551e8ec6e20, 0xbfe6d573603426e0, 0x3fbc879cbf6c7a10, + 0xbfee8882889e1c44, 0x3fba27adf853b5f0, 0xbfc177db63eceed0, + 0xbfedec94daa41aac, 0xbfd149d815ab3680, 0x3fd6ed5d21e3257c, + 0xbfe3a3f8e623486e, 0x3fe3d34bf9ad2f82, 0x3fd17be50175e4e8, + 0xbfda14cd7c133da0); + double dscalar_64; + // -0.7970907277742201 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfe981c469f7860e); + asm volatile("vfmul.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // 0.2666240605464688, 0.1990658129048941, -0.2711972651602534, + // 0.4691198042056620, 0.5687688649941168, -0.0888312017870367, + // 0.7605551223815086, -0.0814371353413154, 0.1087791670362886, + // 0.7453831118261137, 0.2153197802278006, -0.2855478187000574, + // 0.4892273033748624, -0.4938320315983399, -0.2177547636180751, + // 0.3248301698615385 + VCMP_U64(9, v8, 0x3fd1105e5d17ec76, 0x3fc97afd1216ce6e, 0xbfd15b4bc6282ffc, + 0x3fde060f123e080e, 0x3fe2335ac3443fa9, 0xbfb6bda4428a29bb, + 0x3fe85677b22de228, 0xbfb4d91068f88b49, 0x3fbbd8f394e82fe7, + 0x3fe7da2daf091575, 0x3fcb8f993b2151e0, 0xbfd2466a5bb0b251, + 0x3fdf4f8009138a1b, 0xbfdf9af1aa5ba7aa, 0xbfcbdf635a24d80a, + 0x3fd4ca047b13cdbf); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + // -0.8423, 0.9531, 0.3889, -0.3704, -0.9731, -0.4636, + // -0.4797, -0.5903, 0.2959, 0.4685, -0.3660, 0.3167, + // -0.9766, 0.0052, -0.6489, -0.0474 + VLOAD_16(v4, 0xbabd, 0x3ba0, 0x3639, 0xb5ed, 0xbbc9, 0xb76b, 0xb7ad, 0xb8b9, + 0x34bc, 0x377f, 0xb5db, 0x3511, 0xbbd0, 0x1d48, 0xb931, 0xaa11); + double dscalar_16; + // 0.2971 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x34c1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfmul.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.2832, 0.0000, -0.1100, 0.0000, -0.1377, 0.0000, + // -0.1754, 0.0000, 0.1392, 0.0000, 0.0941, 0.0000, + // 0.0015, 0.0000, -0.0141 + VCMP_U16(10, v2, 0x0, 0x3488, 0x0, 0xaf0b, 0x0, 0xb068, 0x0, 0xb19d, 0x0, + 0x3074, 0x0, 0x2e05, 0x0, 0x1647, 0x0, 0xa336); + + VSET(16, e32, m4); + // -0.11454447, -0.46133029, 0.06972761, 0.20429718, + // -0.97134608, -0.95719630, -0.11250938, 0.48455358, + // 0.59656250, 0.46462929, 0.13447689, -0.32035729, + // 0.75118428, 0.90634471, 0.73552424, -0.53555632 + VLOAD_32(v8, 0xbdea964b, 0xbeec337c, 0x3d8ecd5a, 0x3e513348, 0xbf78aa23, + 0xbf750ad1, 0xbde66b52, 0x3ef81768, 0x3f18b852, 0x3eede3e4, + 0x3e09b44f, 0xbea405df, 0x3f404d9d, 0x3f680635, 0x3f3c4b51, + 0xbf091a38); + double dscalar_32; + // 0.94017404 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f70af3f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfmul.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, -0.43373078, 0.00000000, 0.19207491, + // 0.00000000, -0.89993113, 0.00000000, 0.45556471, + // 0.00000000, 0.43683240, 0.00000000, -0.30119160, + // 0.00000000, 0.85212177, 0.00000000, -0.50351614 + VCMP_U32(11, v4, 0x0, 0xbede11f6, 0x0, 0x3e44af49, 0x0, 0xbf6661e3, 0x0, + 0x3ee93fc7, 0x0, 0x3edfa87f, 0x0, 0xbe9a35c9, 0x0, 0x3f5a24a7, 0x0, + 0xbf00e66f); + + VSET(16, e64, m8); + // -0.3344965024132001, -0.2497404698970234, + // 0.3402338726452623, -0.5885400342262450, + // -0.7135559920290824, 0.1114442794173345, + // -0.9541638058007114, 0.1021679621951177, + // -0.1364702451627324, -0.9351295729000717, + // -0.2701320849999789, 0.3582375365191053, + // -0.6137661452178358, 0.6195430637830983, + // 0.2731869234335833, -0.4075196944877124 + VLOAD_64(v16, 0xbfd56864049f6dd8, 0xbfcff77ee7590278, 0x3fd5c6644b002e60, + 0xbfe2d551e8ec6e20, 0xbfe6d573603426e0, 0x3fbc879cbf6c7a10, + 0xbfee8882889e1c44, 0x3fba27adf853b5f0, 0xbfc177db63eceed0, + 0xbfedec94daa41aac, 0xbfd149d815ab3680, 0x3fd6ed5d21e3257c, + 0xbfe3a3f8e623486e, 0x3fe3d34bf9ad2f82, 0x3fd17be50175e4e8, + 0xbfda14cd7c133da0); + double dscalar_64; + // -0.7970907277742201 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfe981c469f7860e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfmul.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, 0.1990658129048941, 0.0000000000000000, + // 0.4691198042056620, 0.0000000000000000, + // -0.0888312017870367, 0.0000000000000000, + // -0.0814371353413154, 0.0000000000000000, + // 0.7453831118261137, 0.0000000000000000, + // -0.2855478187000574, 0.0000000000000000, + // -0.4938320315983399, 0.0000000000000000, 0.3248301698615385 + VCMP_U64(12, v8, 0x0, 0x3fc97afd1216ce6e, 0x0, 0x3fde060f123e080e, 0x0, + 0xbfb6bda4428a29bb, 0x0, 0xbfb4d91068f88b49, 0x0, 0x3fe7da2daf091575, + 0x0, 0xbfd2466a5bb0b251, 0x0, 0xbfdf9af1aa5ba7aa, 0x0, + 0x3fd4ca047b13cdbf); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmv.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmv.c new file mode 100644 index 000000000..942be7ad3 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmv.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + VCLEAR(v2); + asm volatile("vfmv.v.f v2, %[A]" ::[A] "f"(dscalar_16)); + // -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, + // -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, + // -0.9380, -0.9380 + VCMP_U16(1, v2, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, + 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, + 0xbb81); + + VSET(16, e32, m4); + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + VCLEAR(v4); + asm volatile("vfmv.v.f v4, %[A]" ::[A] "f"(dscalar_32)); + // -0.96056187, -0.96056187, -0.96056187, -0.96056187, + // -0.96056187, -0.96056187, -0.96056187, -0.96056187, + // -0.96056187, -0.96056187, -0.96056187, -0.96056187, + // -0.96056187, -0.96056187, -0.96056187, -0.96056187 + VCMP_U32(2, v4, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, + 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, + 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, + 0xbf75e762); + + VSET(16, e64, m8); + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + VCLEAR(v8); + asm volatile("vfmv.v.f v8, %[A]" ::[A] "f"(dscalar_64)); + // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, + // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, + // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, + // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, + // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, + // 0.9108707261227378 + VCMP_U64(3, v8, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, + 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, + 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, + 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, + 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, + 0x3fed25da5d7296fe); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvfs.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvfs.c new file mode 100644 index 000000000..e48726a90 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvfs.c @@ -0,0 +1,90 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +double scalar_16b; +float scalar_32b; +double scalar_64b; + +void TEST_CASE1() { + BOX_HALF_IN_DOUBLE(scalar_16b, 0); + VSET(16, e16, m2); + VLOAD_16(v2, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); + asm volatile("vfmv.f.s %0, v2" : "=f"(scalar_16b)); + XCMP(1, *((uint16_t *)&scalar_16b), 0xbb1e); + + scalar_32b = 0; + VSET(16, e32, m4); + VLOAD_32(v4, 0xbe9451b0, 0x3ece4bf7, 0x3eadc098, 0x3f09f4f0, 0x3ecc80cc, + 0xbe8a42c5, 0x3f47fd31, 0xbe201365, 0xbeffeb17, 0xbf314e2e, + 0xbd0a9c78, 0xbf1fb51f, 0x3b5e1209, 0x3eac9a73, 0xbeb187b6, + 0x3dea828d); + asm volatile("vfmv.f.s %0, v4" : "=f"(scalar_32b)); + XCMP(2, *((uint32_t *)&scalar_32b), 0xbe9451b0); + + scalar_64b = 0; + VSET(16, e64, m8); + VLOAD_64(v8, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, + 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, + 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, + 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, + 0xbfdc0cda147fbe5c); + asm volatile("vfmv.f.s %0, v8" : "=f"(scalar_64b)); + XCMP(3, *((uint64_t *)&scalar_64b), 0xbfe8d9d3f67536d2); +} + +// Check special cases +void TEST_CASE2() { + scalar_64b = 0; + VSET(16, e64, m1); + VLOAD_64(v1, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, + 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, + 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, + 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, + 0xbfdc0cda147fbe5c); + VSET(16, e64, m8); + asm volatile("vfmv.f.s %0, v1" : "=f"(scalar_64b)); + XCMP(4, *((uint64_t *)&scalar_64b), 0xbfe8d9d3f67536d2); + + scalar_64b = 0; + VSET(16, e64, m1); + VLOAD_64(v1, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, + 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, + 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, + 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, + 0xbfdc0cda147fbe5c); + VSET_ZERO(e64, m1); + asm volatile("vfmv.f.s %0, v1" : "=f"(scalar_64b)); + XCMP(5, *((uint64_t *)&scalar_64b), 0xbfe8d9d3f67536d2); + + scalar_64b = 0; + VSET(16, e64, m1); + VLOAD_64(v1, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, + 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, + 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, + 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, + 0xbfdc0cda147fbe5c); + VSET_ZERO(e64, m8); + asm volatile("vfmv.f.s %0, v1" : "=f"(scalar_64b)); + XCMP(6, *((uint64_t *)&scalar_64b), 0xbfe8d9d3f67536d2); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvsf.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvsf.c new file mode 100644 index 000000000..5a751ede6 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvsf.c @@ -0,0 +1,69 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +double scalar_16b; +float scalar_32b; +double scalar_64b; + +void TEST_CASE1() { + BOX_HALF_IN_DOUBLE(scalar_16b, 0xbb1e); + VSET(16, e16, m1); + VLOAD_16(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vfmv.s.f v1, %0" ::"f"(scalar_16b)); + VCMP_U16(1, v1, *((uint16_t *)&scalar_16b)); + + scalar_32b = 0xbe9451b0; + VSET(16, e32, m1); + VLOAD_32(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vfmv.s.f v1, %0" ::"f"(scalar_32b)); + VCMP_U32(2, v1, *((uint32_t *)&scalar_32b)); + + scalar_64b = 0xbfe8d9d3f67536d2; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vfmv.s.f v1, %0" ::"f"(scalar_64b)); + VCMP_U64(3, v1, *((uint64_t *)&scalar_64b)); +} + +// Check special cases +void TEST_CASE2() { + scalar_64b = 0xbfe8d9d3f67536d2; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + asm volatile("vfmv.s.f v1, %0" ::"f"(scalar_64b)); + VSET(1, e64, m1); + VCMP_U64(4, v1, *((uint64_t *)&scalar_64b)); + + scalar_64b = 0xbfe8d9d3f67536d2; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET_ZERO(e64, m1); + asm volatile("vfmv.s.f v1, %0" ::"f"(scalar_64b)); + VSET(1, e64, m1); + VCMP_U64(5, v1, 1); + + scalar_64b = 0xbfe8d9d3f67536d2; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET_ZERO(e64, m8); + asm volatile("vfmv.s.f v1, %0" ::"f"(scalar_64b)); + VSET(1, e64, m1); + VCMP_U64(6, v1, 1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfncvt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfncvt.c new file mode 100644 index 000000000..df1148f2f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfncvt.c @@ -0,0 +1,793 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +///////////////// +// vfncvt.xu.f // +///////////////// + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 9165.669, 5488.131, -1648.302, 80154.047, 7163.093, + // -6826.076, -6976.746, 2675.899, 9587.624, -3671.810, + // 3611.960, -9086.531, -5333.617, -3284.205, 5676.141, + // -8293.472 + VLOAD_32(v4, 0x460f36ad, 0x45ab810c, 0x479c8d06, 0xc59cf316, 0x45dfd8be, + 0xc5d5509c, 0xc5da05f8, 0x45273e62, 0x4615ce7f, 0xc5657cf5, + 0x4561bf5b, 0xc60dfa20, 0xc5a6acf0, 0xc54d4347, 0x45b16120, + 0xc60195e3); + asm volatile("vfncvt.xu.f.w v8, v4"); + // 9166, 5488, 65535, 0, 7163, 0, + // 0, 2676, 9588, 0, 3612, 0, 0, + // 0, 5676, 0 + VCMP_U16(1, v8, 0x23ce, 0x1570, 0xffff, 0x0000, 0x1bfb, 0x0000, 0x0000, + 0x0a74, 0x2574, 0x0000, 0x0e1c, 0x0000, 0x0000, 0x0000, 0x162c, + 0x0000); + + VSET(16, e32, m4); + // -3508862.563, 1678202.418, -799491.756, 1707676.429, + // -5056868.769, 4282070.604, 458667.918, 8393053.957, + // -4485003.775, -5016427.098, -9086965.507, -6796529.257, + // -7756776.890, -1173384.460, 4850684.145, 8658279.578 + VLOAD_64(v8, 0xc14ac53f4813ac38, 0x41399b7a6ae9e42f, 0xc128660783332e44, + 0x413a0e9c6ddfa609, 0xc1534a59313a407b, 0x415055b5a6a655de, + 0x411bfeafabb28b3f, 0x4160022bbe9fc5e9, 0xc1511be2f1a1ac8b, + 0xc15322dac64b7c31, 0xc16154feb0372db3, 0xc159ed3c506ab6eb, + 0xc15d96fa38fb0400, 0xc131e78875bc4ace, 0x415280ff09493a97, + 0x416083acf280b61e); + asm volatile("vfncvt.xu.f.w v16, v8"); + // 0, 1678202, 0, 1707676, 0, + // 4282071, 458668, 8393054, 0, 0, 0, + // 0, 0, 0, 4850684, + // 8658280 + VCMP_U32(2, v16, 0x00000000, 0x00199b7a, 0x00000000, 0x001a0e9c, 0x00000000, + 0x004156d7, 0x0006ffac, 0x0080115e, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x004a03fc, + 0x00841d68); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 9165.669, 5488.131, -1648.302, -5022.386, + // 7163.093, -6826.076, -6976.746, 2675.899, 9587.624, + // -3671.810, 3611.960, -9086.531, -5333.617, -3284.205, + // 5676.141, -8293.472 + VLOAD_32(v4, 0x460f36ad, 0x45ab810c, 0xc4ce09ad, 0xc59cf316, 0x45dfd8be, + 0xc5d5509c, 0xc5da05f8, 0x45273e62, 0x4615ce7f, 0xc5657cf5, + 0x4561bf5b, 0xc60dfa20, 0xc5a6acf0, 0xc54d4347, 0x45b16120, + 0xc60195e3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.xu.f.w v8, v4, v0.t"); + // 0, 5488, 0, 0, 0, 0, + // 0, 2676, 0, 0, 0, 0, + // 0, 0, 0, 0 + VCMP_U16(3, v8, 0x0000, 0x1570, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0a74, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000); + + VSET(16, e32, m4); + // -3508862.563, 1678202.418, -799491.756, + // 1707676.429, -5056868.769, 4282070.604, + // 458667.918, 8393053.957, -4485003.775, + // -5016427.098, -9086965.507, -6796529.257, + // -7756776.890, -1173384.460, 4850684.145, + // 8658279.578 + VLOAD_64(v8, 0xc14ac53f4813ac38, 0x41399b7a6ae9e42f, 0xc128660783332e44, + 0x413a0e9c6ddfa609, 0xc1534a59313a407b, 0x415055b5a6a655de, + 0x411bfeafabb28b3f, 0x4160022bbe9fc5e9, 0xc1511be2f1a1ac8b, + 0xc15322dac64b7c31, 0xc16154feb0372db3, 0xc159ed3c506ab6eb, + 0xc15d96fa38fb0400, 0xc131e78875bc4ace, 0x415280ff09493a97, + 0x416083acf280b61e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.xu.f.w v16, v8, v0.t"); + // 0, 1678202, 0, 1707676, 0, + // 4282071, 0, 8393054, 0, 0, 0, + // 0, 0, 0, 0, 8658280 + VCMP_U32(4, v16, 0x00000000, 0x00199b7a, 0x00000000, 0x001a0e9c, 0x00000000, + 0x004156d7, 0x00000000, 0x0080115e, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00841d68); +}; + +//////////////// +// vfncvt.x.f // +//////////////// + +// Simple random test with similar values +void TEST_CASE3(void) { + VSET(16, e16, m2); + // -7808.056, 9317.408, 1685.891, 3975.596, -5978.108, + // 9676.333, 6963.966, 3589.870, -4334.772, -3261.309, + // -2340.480, 6085.075, 4043.322, 2827.902, 4389.497, + // -5196.684 + VLOAD_32(v4, 0xc5f40072, 0x461195a2, 0x44d2bc86, 0x4578798a, 0xc5bad0dd, + 0x46173155, 0x45d99fbb, 0x45605ded, 0xc587762e, 0xc54bd4f0, + 0xc51247af, 0x45be2899, 0x457cb528, 0x4530be6f, 0x45892bfa, + 0xc5a26578); + asm volatile("vfncvt.x.f.w v8, v4"); + // -7808, 9317, 1686, 3976, -5978, + // 9676, 6964, 3590, -4335, -3261, + // -2340, 6085, 4043, 2828, 4389, + // -5197 + VCMP_U16(5, v8, 0xe180, 0x2465, 0x0696, 0x0f88, 0xe8a6, 0x25cc, 0x1b34, + 0x0e06, 0xef11, 0xf343, 0xf6dc, 0x17c5, 0x0fcb, 0x0b0c, 0x1125, + 0xebb3); + + VSET(16, e32, m4); + // 5365665.770, -7563846.858, 8056193.411, -2468299.255, + // -9624608.750, -6974543.165, 5868078.422, -5387798.170, + // 3847378.080, 1368753.124, 4380497.931, -8044304.268, + // 1687738.849, 3753399.509, -3684410.483, -7416477.444 + VLOAD_64(v8, 0x415477e8714aea69, 0xc15cda91b6eefd56, 0x415ebb605a479cd5, + 0xc142d4e5a0a1f367, 0xc1625b841802ee1d, 0xc15a9b13ca8c7bb6, + 0x4156628b9afacdc9, 0xc1548d858ae6df86, 0x414d5a690a2dbb5e, + 0x4134e2b11fa8e994, 0x4150b5d47b9c3df2, 0xc15eafc4112995f5, + 0x4139c0bad971859a, 0x414ca2dbc1288a12, 0xc14c1c1d3dcd1b39, + 0xc15c4aa75c6c5635); + asm volatile("vfncvt.x.f.w v16, v8"); + // 5365666, -7563847, 8056193, -2468299, + // -9624609, -6974543, 5868078, -5387798, + // 3847378, 1368753, 4380498, -8044304, + // 1687739, 3753400, -3684410, -7416477 + VCMP_U32(6, v16, 0x0051dfa2, 0xff8c95b9, 0x007aed81, 0xffda5635, 0xff6d23df, + 0xff9593b1, 0x00598a2e, 0xffadc9ea, 0x003ab4d2, 0x0014e2b1, + 0x0042d752, 0xff8540f0, 0x0019c0bb, 0x003945b8, 0xffc7c7c6, + 0xff8ed563); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE4(void) { + VSET(16, e16, m2); + // -7808.056, 9317.408, 1685.891, 3975.596, -5978.108, + // 9676.333, 6963.966, 3589.870, -4334.772, -3261.309, + // -2340.480, 6085.075, 4043.322, 2827.902, 4389.497, + // -5196.684 + VLOAD_32(v4, 0xc5f40072, 0x461195a2, 0x44d2bc86, 0x4578798a, 0xc5bad0dd, + 0x46173155, 0x45d99fbb, 0x45605ded, 0xc587762e, 0xc54bd4f0, + 0xc51247af, 0x45be2899, 0x457cb528, 0x4530be6f, 0x45892bfa, + 0xc5a26578); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.x.f.w v8, v4, v0.t"); + // 0, 9317, 0, 3976, 0, + // 9676, 0, 3590, 0, -3261, + // 0, 6085, 0, 2828, 0, + // -5197 + VCMP_U16(7, v8, 0x0000, 0x2465, 0x0000, 0x0f88, 0x0000, 0x25cc, 0x0000, + 0x0e06, 0x0000, 0xf343, 0x0000, 0x17c5, 0x0000, 0x0b0c, 0x0000, + 0xebb3); + + VSET(16, e32, m4); + // 5365665.770, -7563846.858, 8056193.411, -2468299.255, + // -9624608.750, -6974543.165, 5868078.422, -5387798.170, + // 3847378.080, 1368753.124, 4380497.931, -8044304.268, + // 1687738.849, 3753399.509, -3684410.483, -7416477.444 + VLOAD_64(v8, 0x415477e8714aea69, 0xc15cda91b6eefd56, 0x415ebb605a479cd5, + 0xc142d4e5a0a1f367, 0xc1625b841802ee1d, 0xc15a9b13ca8c7bb6, + 0x4156628b9afacdc9, 0xc1548d858ae6df86, 0x414d5a690a2dbb5e, + 0x4134e2b11fa8e994, 0x4150b5d47b9c3df2, 0xc15eafc4112995f5, + 0x4139c0bad971859a, 0x414ca2dbc1288a12, 0xc14c1c1d3dcd1b39, + 0xc15c4aa75c6c5635); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.x.f.w v16, v8, v0.t"); + // 0, -7563847, 0, -2468299, 0, + // -6974543, 0, -5387798, 0, + // 1368753, 0, -8044304, 0, + // 3753400, 0, -7416477 + VCMP_U32(8, v16, 0x00000000, 0xff8c95b9, 0x00000000, 0xffda5635, 0x00000000, + 0xff9593b1, 0x00000000, 0xffadc9ea, 0x00000000, 0x0014e2b1, + 0x00000000, 0xff8540f0, 0x00000000, 0x003945b8, 0x00000000, + 0xff8ed563); +}; + +///////////////////// +// vfncvt.rtz.xu.f // +///////////////////// + +// Simple random test with similar values +void TEST_CASE5(void) { + VSET(16, e16, m2); + // -9750.252, -4363.736, -2345.615, 6996.062, -7115.004, + // 6670.171, -4079.234, -1773.082, 254.350, 53.058, + // -9041.926, -8137.022, 1522.146, 198.516, -920.430, + // 2857.583 + VLOAD_32(v4, 0xc6185902, 0xc5885de3, 0xc51299d6, 0x45daa07e, 0xc5de5808, + 0x45d0715e, 0xc57ef3bf, 0xc4dda29c, 0x437e5998, 0x42543afb, + 0xc60d47b4, 0xc5fe482e, 0x44be44af, 0x43468433, 0xc4661b8b, + 0x45329953); + asm volatile("vfncvt.rtz.xu.f.w v8, v4"); + // 0, 0, 0, 6996, 0, + // 6670, 0, 0, 254, 53, + // 0, 0, 1522, 198, 0, + // 2857 + VCMP_U16(9, v8, 0x0000, 0x0000, 0x0000, 0x1b54, 0x0000, 0x1a0e, 0x0000, + 0x0000, 0x00fe, 0x0035, 0x0000, 0x0000, 0x05f2, 0x00c6, 0x0000, + 0x0b29); + + VSET(16, e32, m4); + // -8404683.758, 3627605.540, -4368861.865, -2883871.623, + // 5750957.328, -7243911.338, -8202847.045, 5348152.868, + // 9957770.965, 8018962.598, -8478197.842, -9780786.953, + // 184470.081, 250336.923, -6517203.475, -7691903.192 + VLOAD_64(v8, 0xc16007d978438b7f, 0x414bad2ac51eade4, 0xc150aa777763eeda, + 0xc146008fcfc2093b, 0x4155f02b54fbd105, 0xc15ba221d5a4f5c3, + 0xc15f4a97c2e2daa6, 0x415466ce378e9269, 0x4162fe315edeecec, + 0x415e9704a64d845e, 0xc1602bbebaf40bb8, 0xc162a7c65e8002a9, + 0x410684b0a4ee482d, 0x410e8f07623ffd06, 0xc158dc74de617fbc, + 0xc15d579fcc41ba16); + asm volatile("vfncvt.rtz.xu.f.w v16, v8"); + // 0, 3627605, 0, 0, 5750957, 0, + // 0, 5348152, 9957770, 8018962, 0, 0, + // 184470, 250336, 0, 0 + VCMP_U32(10, v16, 0x00000000, 0x00375a55, 0x00000000, 0x00000000, 0x0057c0ad, + 0x00000000, 0x00000000, 0x00519b38, 0x0097f18a, 0x007a5c12, + 0x00000000, 0x00000000, 0x0002d096, 0x0003d1e0, 0x00000000, + 0x00000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE6(void) { + VSET(16, e16, m2); + // -9750.252, -4363.736, -2345.615, 6996.062, -7115.004, + // 6670.171, -4079.234, -1773.082, 254.350, 53.058, + // -9041.926, -8137.022, 1522.146, 198.516, -920.430, + // 2857.583 + VLOAD_32(v4, 0xc6185902, 0xc5885de3, 0xc51299d6, 0x45daa07e, 0xc5de5808, + 0x45d0715e, 0xc57ef3bf, 0xc4dda29c, 0x437e5998, 0x42543afb, + 0xc60d47b4, 0xc5fe482e, 0x44be44af, 0x43468433, 0xc4661b8b, + 0x45329953); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.rtz.xu.f.w v8, v4, v0.t"); + // 0, 0, 0, 6996, 0, + // 6670, 0, 0, 0, 53, + // 0, 0, 0, 198, 0, + // 2857 + VCMP_U16(11, v8, 0x0000, 0x0000, 0x0000, 0x1b54, 0x0000, 0x1a0e, 0x0000, + 0x0000, 0x0000, 0x0035, 0x0000, 0x0000, 0x0000, 0x00c6, 0x0000, + 0x0b29); + + VSET(16, e32, m4); + // -8404683.758, 3627605.540, -4368861.865, -2883871.623, + // 5750957.328, -7243911.338, -8202847.045, 5348152.868, + // 9957770.965, 8018962.598, -8478197.842, -9780786.953, + // 184470.081, 250336.923, -6517203.475, -7691903.192 + VLOAD_64(v8, 0xc16007d978438b7f, 0x414bad2ac51eade4, 0xc150aa777763eeda, + 0xc146008fcfc2093b, 0x4155f02b54fbd105, 0xc15ba221d5a4f5c3, + 0xc15f4a97c2e2daa6, 0x415466ce378e9269, 0x4162fe315edeecec, + 0x415e9704a64d845e, 0xc1602bbebaf40bb8, 0xc162a7c65e8002a9, + 0x410684b0a4ee482d, 0x410e8f07623ffd06, 0xc158dc74de617fbc, + 0xc15d579fcc41ba16); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.rtz.xu.f.w v16, v8, v0.t"); + // 0, 3627605, 0, 0, 0, 0, 0, + // 5348152, 0, 8018962, 0, 0, 0, + // 250336, 0, 0 + VCMP_U32(12, v16, 0x00000000, 0x00375a55, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00519b38, 0x00000000, 0x007a5c12, + 0x00000000, 0x00000000, 0x00000000, 0x0003d1e0, 0x00000000, + 0x00000000); +}; + +//////////////////// +// vfncvt.rtz.x.f // +//////////////////// + +// Simple random test with similar values +void TEST_CASE7(void) { + VSET(16, e16, m2); + // 9352.418, -5719.459, 4617.815, -3012.009, -3597.063, + // -5717.140, -3327.545, 1286.004, 1797.767, 3842.966, + // -2148.369, -7283.256, 8783.331, -7958.880, -6728.271, + // 4727.792 + VLOAD_32(v4, 0x461221ac, 0xc5b2bbac, 0x45904e86, 0xc53c4026, 0xc560d104, + 0xc5b2a91e, 0xc54ff8b9, 0x44a0c01e, 0x44e0b88c, 0x45702f76, + 0xc50645e9, 0xc5e39a0c, 0x46093d53, 0xc5f8b70a, 0xc5d2422c, + 0x4593be56); + asm volatile("vfncvt.rtz.x.f.w v8, v4"); + // 9352, -5719, 4617, -3012, -3597, + // -5717, -3327, 1286, 1797, 3842, + // -2148, -7283, 8783, -7958, -6728, + // 4727 + VCMP_U16(13, v8, 0x2488, 0xe9a9, 0x1209, 0xf43c, 0xf1f3, 0xe9ab, 0xf301, + 0x0506, 0x0705, 0x0f02, 0xf79c, 0xe38d, 0x224f, 0xe0ea, 0xe5b8, + 0x1277); + + VSET(16, e32, m4); + // 1563546.261, -1988965.594, 6496092.888, 5054778.769, + // 9551708.952, -336377.787, -2352111.643, 4412162.570, + // 7087155.475, 338850.875, 2765611.498, 2723631.912, + // -3252079.308, 1096915.326, 5492109.280, -7265880.245 + VLOAD_64(v8, 0x4137db9a42b839bd, 0xc13e596598118127, 0x4158c7d738d1eec8, + 0x4153484eb13573ed, 0x416237eb9e79d2a8, 0xc11487e725f1ce50, + 0xc141f1f7d2451c3d, 0x4150d4c0a47be906, 0x415b090cde6b0575, + 0x4114ae8b8081532d, 0x41451995bfc3bc74, 0x4144c797f4b307dd, + 0xc148cfb7a76dea0f, 0x4130bcd353667e5d, 0x4154f36351f3a3c5, + 0xc15bb7960fb007a5); + asm volatile("vfncvt.rtz.x.f.w v16, v8"); + // 1563546, -1988965, 6496092, 5054778, + // 9551708, -336377, -2352111, 4412162, + // 7087155, 338850, 2765611, 2723631, + // -3252079, 1096915, 5492109, -7265880 + VCMP_U32(14, v16, 0x0017db9a, 0xffe1a69b, 0x00631f5c, 0x004d213a, 0x0091bf5c, + 0xfffade07, 0xffdc1c11, 0x00435302, 0x006c2433, 0x00052ba2, + 0x002a332b, 0x00298f2f, 0xffce6091, 0x0010bcd3, 0x0053cd8d, + 0xff9121a8); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE8(void) { + VSET(16, e16, m2); + // 9352.418, -5719.459, 4617.815, -3012.009, -3597.063, + // -5717.140, -3327.545, 1286.004, 1797.767, 3842.966, + // -2148.369, -7283.256, 8783.331, -7958.880, -6728.271, + // 4727.792 + VLOAD_32(v4, 0x461221ac, 0xc5b2bbac, 0x45904e86, 0xc53c4026, 0xc560d104, + 0xc5b2a91e, 0xc54ff8b9, 0x44a0c01e, 0x44e0b88c, 0x45702f76, + 0xc50645e9, 0xc5e39a0c, 0x46093d53, 0xc5f8b70a, 0xc5d2422c, + 0x4593be56); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.rtz.x.f.w v8, v4, v0.t"); + // 0, -5719, 0, -3012, 0, + // -5717, 0, 1286, 0, 3842, 0, + // -7283, 0, -7958, 0, 4727 + VCMP_U16(15, v8, 0x0000, 0xe9a9, 0x0000, 0xf43c, 0x0000, 0xe9ab, 0x0000, + 0x0506, 0x0000, 0x0f02, 0x0000, 0xe38d, 0x0000, 0xe0ea, 0x0000, + 0x1277); + + VSET(16, e32, m4); + // 1563546.261, -1988965.594, 6496092.888, 5054778.769, + // 9551708.952, -336377.787, -2352111.643, 4412162.570, + // 7087155.475, 338850.875, 2765611.498, 2723631.912, + // -3252079.308, 1096915.326, 5492109.280, -7265880.245 + VLOAD_64(v8, 0x4137db9a42b839bd, 0xc13e596598118127, 0x4158c7d738d1eec8, + 0x4153484eb13573ed, 0x416237eb9e79d2a8, 0xc11487e725f1ce50, + 0xc141f1f7d2451c3d, 0x4150d4c0a47be906, 0x415b090cde6b0575, + 0x4114ae8b8081532d, 0x41451995bfc3bc74, 0x4144c797f4b307dd, + 0xc148cfb7a76dea0f, 0x4130bcd353667e5d, 0x4154f36351f3a3c5, + 0xc15bb7960fb007a5); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.rtz.x.f.w v16, v8, v0.t"); + // 0, -1988965, 0, 5054778, 0, + // -336377, 0, 4412162, 0, + // 338850, 0, 2723631, 0, + // 1096915, 0, -7265880 + VCMP_U32(16, v16, 0x00000000, 0xffe1a69b, 0x00000000, 0x004d213a, 0x00000000, + 0xfffade07, 0x00000000, 0x00435302, 0x00000000, 0x00052ba2, + 0x00000000, 0x00298f2f, 0x00000000, 0x0010bcd3, 0x00000000, + 0xff9121a8); +}; + +///////////////// +// vfncvt.f.xu // +///////////////// + +// Simple random test with similar values +void TEST_CASE9(void) { + VSET(16, e16, m2); + // 4294964178, 5853, 4294962638, 4294962082, 4585, + // 1637, 3984, 4294964217, 9553, 4294962615, + // 4294962166, 9867, 4294958580, 4294966752, 5172, + // 7478 + VLOAD_32(v4, 0xfffff3d2, 0x000016dd, 0xffffedce, 0xffffeba2, 0x000011e9, + 0x00000665, 0x00000f90, 0xfffff3f9, 0x00002551, 0xffffedb7, + 0xffffebf6, 0x0000268b, 0xffffddf4, 0xfffffde0, 0x00001434, + 0x00001d36); + asm volatile("vfncvt.f.xu.w v8, v4"); + // inf, 5852.000, inf, inf, 4584.000, 1637.000, + // 3984.000, inf, 9552.000, inf, inf, 9864.000, + // inf, inf, 5172.000, 7480.000 + VCMP_U16(17, v8, 0x7c00, 0x6db7, 0x7c00, 0x7c00, 0x6c7a, 0x6665, 0x6bc8, + 0x7c00, 0x70aa, 0x7c00, 0x7c00, 0x70d1, 0x7c00, 0x7c00, 0x6d0d, + 0x6f4e); + + VSET(16, e32, m4); + // 18446744073704835106, 18446744073709117625, + // 18446744073705901616, 2086515, 18446744073699655996, + // 932771, 255753, 3148047, + // 18446744073705977615, 18446744073704792883, + // 18446744073704699584, 8685460, 18446744073709143843, + // 18446744073703142874, 3905530, 18446744073704152149 + VLOAD_64(v8, 0xffffffffffb80822, 0xfffffffffff960b9, 0xffffffffffc84e30, + 0x00000000001fd673, 0xffffffffff69013c, 0x00000000000e3ba3, + 0x000000000003e709, 0x000000000030090f, 0xffffffffffc9770f, + 0xffffffffffb76333, 0xffffffffffb5f6c0, 0x0000000000848794, + 0xfffffffffff9c723, 0xffffffffff9e35da, 0x00000000003b97fa, + 0xffffffffffad9c55); + asm volatile("vfncvt.f.xu.w v16, v8"); + // 18446744073709551616.000, 18446744073709551616.000, + // 18446744073709551616.000, 2086515.000, + // 18446744073709551616.000, 932771.000, 255753.000, + // 3148047.000, 18446744073709551616.000, + // 18446744073709551616.000, 18446744073709551616.000, + // 8685460.000, 18446744073709551616.000, + // 18446744073709551616.000, 3905530.000, + // 18446744073709551616.000 + VCMP_U32(18, v16, 0x5f800000, 0x5f800000, 0x5f800000, 0x49feb398, 0x5f800000, + 0x4963ba30, 0x4879c240, 0x4a40243c, 0x5f800000, 0x5f800000, + 0x5f800000, 0x4b048794, 0x5f800000, 0x5f800000, 0x4a6e5fe8, + 0x5f800000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE10(void) { + VSET(16, e16, m2); + // 4294964178, 5853, 4294962638, 4294962082, 4585, + // 1637, 3984, 4294964217, 9553, 4294962615, + // 4294962166, 9867, 4294958580, 4294966752, 5172, + // 7478 + VLOAD_32(v4, 0xfffff3d2, 0x000016dd, 0xffffedce, 0xffffeba2, 0x000011e9, + 0x00000665, 0x00000f90, 0xfffff3f9, 0x00002551, 0xffffedb7, + 0xffffebf6, 0x0000268b, 0xffffddf4, 0xfffffde0, 0x00001434, + 0x00001d36); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.f.xu.w v8, v4, v0.t"); + // 0.000, 5852.000, 0.000, inf, 0.000, 1637.000, + // 0.000, inf, 0.000, inf, 0.000, 9864.000, 0.000, + // inf, 0.000, 7480.000 + VCMP_U16(19, v8, 0x0, 0x6db7, 0x0, 0x7c00, 0x0, 0x6665, 0x0, 0x7c00, 0x0, + 0x7c00, 0x0, 0x70d1, 0x0, 0x7c00, 0x0, 0x6f4e); + + VSET(16, e32, m4); + // 18446744073704835106, 18446744073709117625, + // 18446744073705901616, 2086515, 18446744073699655996, + // 932771, 255753, 3148047, + // 18446744073705977615, 18446744073704792883, + // 18446744073704699584, 8685460, 18446744073709143843, + // 18446744073703142874, 3905530, 18446744073704152149 + VLOAD_64(v8, 0xffffffffffb80822, 0xfffffffffff960b9, 0xffffffffffc84e30, + 0x00000000001fd673, 0xffffffffff69013c, 0x00000000000e3ba3, + 0x000000000003e709, 0x000000000030090f, 0xffffffffffc9770f, + 0xffffffffffb76333, 0xffffffffffb5f6c0, 0x0000000000848794, + 0xfffffffffff9c723, 0xffffffffff9e35da, 0x00000000003b97fa, + 0xffffffffffad9c55); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.f.xu.w v16, v8, v0.t"); + // 0.000, 18446744073709551616.000, 0.000, 2086515.000, + // 0.000, 932771.000, 0.000, 3148047.000, 0.000, + // 18446744073709551616.000, 0.000, 8685460.000, 0.000, + // 18446744073709551616.000, 0.000, 18446744073709551616.000 + VCMP_U32(20, v16, 0x0, 0x5f800000, 0x0, 0x49feb398, 0x0, 0x4963ba30, 0x0, + 0x4a40243c, 0x0, 0x5f800000, 0x0, 0x4b048794, 0x0, 0x5f800000, 0x0, + 0x5f800000); +}; + +//////////////// +// vfncvt.f.x // +//////////////// + +// Simple random test with similar values +void TEST_CASE11(void) { + VSET(16, e16, m2); + // -6279, 3717, 9022, -8925, -5530, + // 3851, 5592, -3692, -2747, -748, + // -2621, -9352, 4018, 3174, -6975, + // -4466 + VLOAD_32(v4, 0xffffe779, 0x00000e85, 0x0000233e, 0xffffdd23, 0xffffea66, + 0x00000f0b, 0x000015d8, 0xfffff194, 0xfffff545, 0xfffffd14, + 0xfffff5c3, 0xffffdb78, 0x00000fb2, 0x00000c66, 0xffffe4c1, + 0xffffee8e); + asm volatile("vfncvt.f.x.w v8, v4"); + // -6280.000, 3716.000, 9024.000, -8928.000, -5528.000, + // 3852.000, 5592.000, -3692.000, -2748.000, -748.000, + // -2620.000, -9352.000, 4018.000, 3174.000, -6976.000, + // -4464.000 + VCMP_U16(21, v8, 0xee22, 0x6b42, 0x7068, 0xf05c, 0xed66, 0x6b86, 0x6d76, + 0xeb36, 0xe95e, 0xe1d8, 0xe91e, 0xf091, 0x6bd9, 0x6a33, 0xeed0, + 0xec5c); + + VSET(16, e32, m4); + // 757099, -9365555, 3016973, + // -9277105, -8350486, -650348, + // -1775160, 4659116, 148573, + // 4475248, -2937762, 3310433, + // 9151745, -2201488, -1506850, + // 1593161 + VLOAD_64(v8, 0x00000000000b8d6b, 0xffffffffff7117cd, 0x00000000002e090d, + 0xffffffffff72714f, 0xffffffffff8094ea, 0xfffffffffff61394, + 0xffffffffffe4e9c8, 0x00000000004717ac, 0x000000000002445d, + 0x0000000000444970, 0xffffffffffd32c5e, 0x0000000000328361, + 0x00000000008ba501, 0xffffffffffde6870, 0xffffffffffe901de, + 0x0000000000184f49); + asm volatile("vfncvt.f.x.w v16, v8"); + // 757099.000, -9365555.000, 3016973.000, -9277105.000, + // -8350486.000, -650348.000, -1775160.000, 4659116.000, + // 148573.000, 4475248.000, -2937762.000, 3310433.000, + // 9151745.000, -2201488.000, -1506850.000, 1593161.000 + VCMP_U32(22, v16, 0x4938d6b0, 0xcb0ee833, 0x4a382434, 0xcb0d8eb1, 0xcafed62c, + 0xc91ec6c0, 0xc9d8b1c0, 0x4a8e2f58, 0x48111740, 0x4a8892e0, + 0xca334e88, 0x4a4a0d84, 0x4b0ba501, 0xca065e40, 0xc9b7f110, + 0x49c27a48); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE12(void) { + VSET(16, e16, m2); + // -6279, 3717, 9022, -8925, -5530, + // 3851, 5592, -3692, -2747, -748, + // -2621, -9352, 4018, 3174, -6975, + // -4466 + VLOAD_32(v4, 0xffffe779, 0x00000e85, 0x0000233e, 0xffffdd23, 0xffffea66, + 0x00000f0b, 0x000015d8, 0xfffff194, 0xfffff545, 0xfffffd14, + 0xfffff5c3, 0xffffdb78, 0x00000fb2, 0x00000c66, 0xffffe4c1, + 0xffffee8e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.f.x.w v8, v4, v0.t"); + // 0.000, 3716.000, 0.000, -8928.000, 0.000, 3852.000, + // 0.000, -3692.000, 0.000, -748.000, 0.000, -9352.000, + // 0.000, 3174.000, 0.000, -4464.000 + VCMP_U16(23, v8, 0x0, 0x6b42, 0x0, 0xf05c, 0x0, 0x6b86, 0x0, 0xeb36, 0x0, + 0xe1d8, 0x0, 0xf091, 0x0, 0x6a33, 0x0, 0xec5c); + + VSET(16, e32, m4); + // 757099, -9365555, 3016973, -9277105, + // -8350486, -650348, -1775160, 4659116, + // 148573, 4475248, -2937762, 3310433, + // 9151745, -2201488, -1506850, 1593161 + VLOAD_64(v8, 0x00000000000b8d6b, 0xffffffffff7117cd, 0x00000000002e090d, + 0xffffffffff72714f, 0xffffffffff8094ea, 0xfffffffffff61394, + 0xffffffffffe4e9c8, 0x00000000004717ac, 0x000000000002445d, + 0x0000000000444970, 0xffffffffffd32c5e, 0x0000000000328361, + 0x00000000008ba501, 0xffffffffffde6870, 0xffffffffffe901de, + 0x0000000000184f49); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.f.x.w v16, v8, v0.t"); + // 0.000, -9365555.000, 0.000, -9277105.000, 0.000, + // -650348.000, 0.000, 4659116.000, 0.000, 4475248.000, + // 0.000, 3310433.000, 0.000, -2201488.000, 0.000, + // 1593161.000 + VCMP_U32(24, v16, 0x0, 0xcb0ee833, 0x0, 0xcb0d8eb1, 0x0, 0xc91ec6c0, 0x0, + 0x4a8e2f58, 0x0, 0x4a8892e0, 0x0, 0x4a4a0d84, 0x0, 0xca065e40, 0x0, + 0x49c27a48); +}; + +//////////////// +// vfncvt.f.f // +//////////////// + +// Simple random test with similar values +void TEST_CASE13(void) { + VSET(16, e16, m2); + // 908.994, -6788.630, -5789.335, 8054.104, 3947.551, 9596.856, + // 2474.506, 3094.286, 7684.992, -6850.149, -54.922, 7737.443, + // 4171.873, 5266.611, 9163.839, 5679.187 + VLOAD_32(v4, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, + 0x4615f36d, 0x451aa818, 0x45416494, 0x45f027ef, 0xc5d61131, + 0xc25bb026, 0x45f1cb8c, 0x45825efb, 0x45a494e4, 0x460f2f5b, + 0x45b1797f); + asm volatile("vfncvt.f.f.w v8, v4"); + // 909.000, -6788.000, -5788.000, 8056.000, 3948.000, 9600.000, + // 2474.000, 3094.000, 7684.000, -6852.000, -54.938, 7736.000, + // 4172.000, 5268.000, 9160.000, 5680.000 + VCMP_U16(25, v8, 0x631a, 0xeea1, 0xeda7, 0x6fde, 0x6bb6, 0x70b0, 0x68d5, + 0x6a0b, 0x6f81, 0xeeb1, 0xd2de, 0x6f8e, 0x6c13, 0x6d25, 0x7079, + 0x6d8c); + + VSET(16, e32, m4); + // 153431.766, -7796010.957, -6652812.196, 1049714.758, + // 7538298.328, -8731739.480, 537176.622, -3884944.157, + // 7612336.042, -2270131.404, -4976406.726, -5260237.163, + // -4947737.810, 3583352.355, 7648790.331, -9360989.228 + VLOAD_64(v8, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, + 0x41300472c1efbd9f, 0x415cc19e94ffb79b, 0xc160a78b6f5bcd25, + 0x412064b13e931aa9, 0xc14da3c81425b914, 0x415d09ec02a8cc93, + 0xc14151d9b3c1ecaf, 0xc152fbc5ae718384, 0xc15410f34a6ddb48, + 0xc152dfc673d9ba53, 0x414b56bc2d765fad, 0x415d2d85952e8398, + 0xc161dacba74d791e); + asm volatile("vfncvt.f.f.w v16, v8"); + // 153431.766, -7796011.000, -6652812.000, 1049714.750, + // 7538298.500, -8731739.000, 537176.625, -3884944.250, + // 7612336.000, -2270131.500, -4976406.500, -5260237.000, + // -4947738.000, 3583352.250, 7648790.500, -9360989.000 + VCMP_U32(26, v16, 0x4815d5f1, 0xcaedea56, 0xcacb0718, 0x49802396, 0x4ae60cf5, + 0xcb053c5b, 0x4903258a, 0xca6d1e41, 0x4ae84f60, 0xca0a8ece, + 0xca97de2d, 0xcaa0879a, 0xca96fe34, 0x4a5ab5e1, 0x4ae96c2d, + 0xcb0ed65d); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE14(void) { + VSET(16, e16, m2); + // 908.994, -6788.630, -5789.335, 8054.104, 3947.551, 9596.856, + // 2474.506, 3094.286, 7684.992, -6850.149, -54.922, 7737.443, + // 4171.873, 5266.611, 9163.839, 5679.187 + VLOAD_32(v4, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, + 0x4615f36d, 0x451aa818, 0x45416494, 0x45f027ef, 0xc5d61131, + 0xc25bb026, 0x45f1cb8c, 0x45825efb, 0x45a494e4, 0x460f2f5b, + 0x45b1797f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.f.f.w v8, v4, v0.t"); + // 0.000, -6788.000, 0.000, 8056.000, 0.000, 9600.000, 0.000, + // 3094.000, 0.000, -6852.000, 0.000, 7736.000, 0.000, 5268.000, + // 0.000, 5680.000 + VCMP_U16(27, v8, 0x0, 0xeea1, 0x0, 0x6fde, 0x0, 0x70b0, 0x0, 0x6a0b, 0x0, + 0xeeb1, 0x0, 0x6f8e, 0x0, 0x6d25, 0x0, 0x6d8c); + + VSET(16, e32, m4); + // 153431.766, -7796010.957, -6652812.196, 1049714.758, + // 7538298.328, -8731739.480, 537176.622, -3884944.157, + // 7612336.042, -2270131.404, -4976406.726, -5260237.163, + // -4947737.810, 3583352.355, 7648790.331, -9360989.228 + VLOAD_64(v8, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, + 0x41300472c1efbd9f, 0x415cc19e94ffb79b, 0xc160a78b6f5bcd25, + 0x412064b13e931aa9, 0xc14da3c81425b914, 0x415d09ec02a8cc93, + 0xc14151d9b3c1ecaf, 0xc152fbc5ae718384, 0xc15410f34a6ddb48, + 0xc152dfc673d9ba53, 0x414b56bc2d765fad, 0x415d2d85952e8398, + 0xc161dacba74d791e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.f.f.w v16, v8, v0.t"); + // 0.000, -7796011.000, 0.000, 1049714.750, 0.000, -8731739.000, + // 0.000, -3884944.250, 0.000, -2270131.500, 0.000, -5260237.000, + // 0.000, 3583352.250, 0.000, -9360989.000 + VCMP_U32(28, v16, 0x0, 0xcaedea56, 0x0, 0x49802396, 0x0, 0xcb053c5b, 0x0, + 0xca6d1e41, 0x0, 0xca0a8ece, 0x0, 0xcaa0879a, 0x0, 0x4a5ab5e1, 0x0, + 0xcb0ed65d); +}; + +//////////////////// +// vfncvt.rod.f.f // +//////////////////// + +// Simple random test with similar values +void TEST_CASE15(void) { + VSET(16, e16, m2); + // 908.994, -6788.630, -5789.335, 8054.104, 3947.551, + // 9596.856, 2474.506, 3094.286, 7684.992, -6850.149, + // -54.922, 7737.443, 4171.873, 5266.611, 9163.839, + // 5679.187 + VLOAD_32(v4, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, + 0x4615f36d, 0x451aa818, 0x45416494, 0x45f027ef, 0xc5d61131, + 0xc25bb026, 0x45f1cb8c, 0x45825efb, 0x45a494e4, 0x460f2f5b, + 0x45b1797f); + asm volatile("vfncvt.rod.f.f.w v8, v4"); + // 909.000, -6788.000, -5788.000, 8056.000, 3948.000, + // 9600.000, 2474.000, 3094.000, 7684.000, -6852.000, + // -54.938, 7736.000, 4172.000, 5268.000, 9160.000, 5680.000 + VCMP_U16(29, v8, 0x6319, 0xeea1, 0xeda7, 0x6fdd, 0x6bb5, 0x70af, 0x68d5, + 0x6a0b, 0x6f81, 0xeeb1, 0xd2dd, 0x6f8f, 0x6c13, 0x6d25, 0x7079, + 0x6d8b); + + VSET(16, e32, m4); + // 153431.766, -7796010.957, -6652812.196, + // 1049714.758, 7538298.328, -8731739.480, 537176.622, + // -3884944.157, 7612336.042, -2270131.404, + // -4976406.726, -5260237.163, -4947737.810, + // 3583352.355, 7648790.331, -9360989.228 + VLOAD_64(v8, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, + 0x41300472c1efbd9f, 0x415cc19e94ffb79b, 0xc160a78b6f5bcd25, + 0x412064b13e931aa9, 0xc14da3c81425b914, 0x415d09ec02a8cc93, + 0xc14151d9b3c1ecaf, 0xc152fbc5ae718384, 0xc15410f34a6ddb48, + 0xc152dfc673d9ba53, 0x414b56bc2d765fad, 0x415d2d85952e8398, + 0xc161dacba74d791e); + asm volatile("vfncvt.rod.f.f.w v16, v8"); + // 153431.766, -7796010.500, -6652812.500, + // 1049714.875, 7538298.500, -8731739.000, + // 537176.5625, -3884944.250, 7612336.500, + // -2270131.250, -4976406.500, -5260237.500, + // -4947737.500, 3583352.250, 7648790.500, + // -9360989.000 + VCMP_U32(30, v16, 0x4815d5f1, 0xcaedea55, 0xcacb0719, 0x49802397, 0x4ae60cf5, + 0xcb053c5b, 0x49032589, 0xca6d1e41, 0x4ae84f61, 0xca0a8ecd, + 0xca97de2d, 0xcaa0879b, 0xca96fe33, 0x4a5ab5e1, 0x4ae96c2d, + 0xcb0ed65d); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE16(void) { + VSET(16, e16, m2); + // 908.994, -6788.630, -5789.335, 8054.104, 3947.551, 9596.856, + // 2474.506, 3094.286, 7684.992, -6850.149, -54.922, 7737.443, + // 4171.873, 5266.611, 9163.839, 5679.187 + VLOAD_32(v4, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, + 0x4615f36d, 0x451aa818, 0x45416494, 0x45f027ef, 0xc5d61131, + 0xc25bb026, 0x45f1cb8c, 0x45825efb, 0x45a494e4, 0x460f2f5b, + 0x45b1797f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.rod.f.f.w v8, v4, v0.t"); + // 0.000, -6788.000, 0.000, 8056.000, 0.000, 9600.000, 0.000, + // 3094.000, 0.000, -6852.000, 0.000, 7736.000, 0.000, 5268.000, + // 0.000, 5680.000 + VCMP_U16(31, v8, 0x0, 0xeea1, 0x0, 0x6fdd, 0x0, 0x70af, 0x0, 0x6a0b, 0x0, + 0xeeb1, 0x0, 0x6f8f, 0x0, 0x6d25, 0x0, 0x6d8b); + + VSET(16, e32, m4); + // 153431.766, -7796010.957, -6652812.196, 1049714.758, + // 7538298.328, -8731739.480, 537176.622, -3884944.157, + // 7612336.042, -2270131.404, -4976406.726, -5260237.163, + // -4947737.810, 3583352.355, 7648790.331, -9360989.228 + VLOAD_64(v8, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, + 0x41300472c1efbd9f, 0x415cc19e94ffb79b, 0xc160a78b6f5bcd25, + 0x412064b13e931aa9, 0xc14da3c81425b914, 0x415d09ec02a8cc93, + 0xc14151d9b3c1ecaf, 0xc152fbc5ae718384, 0xc15410f34a6ddb48, + 0xc152dfc673d9ba53, 0x414b56bc2d765fad, 0x415d2d85952e8398, + 0xc161dacba74d791e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.rod.f.f.w v16, v8, v0.t"); + // 0.000, -7796011.000, 0.000, 1049714.750, 0.000, -8731739.000, + // 0.000, -3884944.250, 0.000, -2270131.500, 0.000, -5260237.000, + // 0.000, 3583352.250, 0.000, -9360989.000 + VCMP_U32(32, v16, 0x0, 0xcaedea55, 0x0, 0x49802397, 0x0, 0xcb053c5b, 0x0, + 0xca6d1e41, 0x0, 0xca0a8ecd, 0x0, 0xcaa0879b, 0x0, 0x4a5ab5e1, 0x0, + 0xcb0ed65d); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + + TEST_CASE7(); + TEST_CASE8(); + + TEST_CASE9(); + TEST_CASE10(); + + TEST_CASE11(); + TEST_CASE12(); + + TEST_CASE13(); + TEST_CASE14(); + + /* + vfncvt.rod.f.f is not supported yet + + // TEST_CASE15(); + // TEST_CASE16(); + */ + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmacc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmacc.c new file mode 100644 index 000000000..3d48cc3e8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmacc.c @@ -0,0 +1,456 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.6377, -0.2332, 0.9458, -0.2612, -0.6772, 0.4543, 0.1002, + // 0.7764, 0.7979, -0.8599, 0.7837, -0.2461, 0.4221, 0.2251, + // 0.7739, 0.1461 + VLOAD_16(v4, 0xb91a, 0xb376, 0x3b91, 0xb42e, 0xb96b, 0x3745, 0x2e69, 0x3a36, + 0x3a62, 0xbae1, 0x3a45, 0xb3e0, 0x36c1, 0x3334, 0x3a31, 0x30ad); + // 0.9551, -0.6787, 0.5605, -0.7305, -0.7197, -0.1581, 0.7271, + // 0.6113, 0.2971, -0.8062, 0.9668, -0.5278, 0.3972, -0.1084, + // -0.3015, 0.9556 + VLOAD_16(v6, 0x3ba4, 0xb96e, 0x387c, 0xb9d8, 0xb9c2, 0xb10f, 0x39d1, 0x38e4, + 0x34c1, 0xba73, 0x3bbc, 0xb839, 0x365b, 0xaef0, 0xb4d3, 0x3ba5); + // 0.7402, 0.0935, 0.1455, -0.2771, 0.3347, 0.7964, 0.6543, + // -0.7534, 0.2476, 0.0338, 0.9980, 0.3284, 0.2239, + // -0.4551, 0.6694, -0.8550 + VLOAD_16(v2, 0x39ec, 0x2dfc, 0x30a8, 0xb46f, 0x355b, 0x3a5f, 0x393c, 0xba07, + 0x33ec, 0x2853, 0x3bfc, 0x3541, 0x332a, 0xb748, 0x395b, 0xbad7); + asm volatile("vfnmacc.vv v2, v4, v6"); + // -0.1313, -0.2517, -0.6758, 0.0863, -0.8223, -0.7246, -0.7271, + // 0.2788, -0.4846, -0.7271, -1.7559, -0.4583, -0.3916, 0.4795, + // -0.4360, 0.7153 + VCMP_U16(1, v2, 0xb033, 0xb407, 0xb968, 0x2d86, 0xba94, 0xb9cc, 0xb9d1, + 0x3476, 0xb7c1, 0xb9d1, 0xbf06, 0xb755, 0xb644, 0x37ac, 0xb6fa, + 0x39b9); + + VSET(16, e32, m4); + // -0.17374928, -0.36242354, -0.18093164, 0.94970566, + // -0.45790458, -0.17780401, -0.51985794, -0.04832974, + // 0.13252106, 0.77533042, 0.42536697, -0.72199643, + // -0.25088808, 0.28798762, 0.66300607, -0.63549894 + VLOAD_32(v8, 0xbe31eb55, 0xbeb98f94, 0xbe394625, 0x3f731fe9, 0xbeea7278, + 0xbe361241, 0xbf051569, 0xbd45f569, 0x3e07b39a, 0x3f467c0e, + 0x3ed9c9b3, 0xbf38d4c2, 0xbe807467, 0x3e93731d, 0x3f29bac4, + 0xbf22b00f); + // -0.61242568, 0.71439523, -0.15632962, 0.10917858, + // 0.19637996, -0.88467985, 0.73412597, -0.98048240, 0.25438991, + // -0.02058743, -0.00876777, 0.21936898, -0.71130067, + // -0.29675287, -0.96093589, 0.24695934 + VLOAD_32(v12, 0xbf1cc7ee, 0x3f36e29b, 0xbe2014df, 0x3ddf9905, 0x3e4917d4, + 0xbf627a61, 0x3f3befae, 0xbf7b00e5, 0x3e823f65, 0xbca8a6f9, + 0xbc0fa6af, 0x3e60a243, 0xbf3617cd, 0xbe97effe, 0xbf75ffe5, + 0x3e7ce2e9); + // 0.77600455, 0.02542816, -0.63618338, 0.11704731, + // 0.45613721, -0.90825689, 0.21235447, 0.35766414, + // 0.08650716, -0.98431164, 0.21029140, -0.92919809, + // 0.46440944, 0.70648551, -0.80876821, -0.19595607 + VLOAD_32(v4, 0x3f46a83c, 0x3cd04eb8, 0xbf22dcea, 0x3defb680, 0x3ee98ad1, + 0xbf688386, 0x3e597373, 0x3eb71fc1, 0x3db12aaa, 0xbf7bfbd9, + 0x3e5756a1, 0xbf6ddfed, 0x3eedc713, 0x3f34dc3c, 0xbf4f0b6f, + 0xbe48a8b5); + asm volatile("vfnmacc.vv v4, v8, v12"); + // -0.88241309, 0.23348548, 0.60789841, -0.22073483, + // -0.36621392, 0.75095725, 0.16928674, -0.40505061, + // -0.12021918, 1.00027370, -0.20656188, 1.08758175, + // -0.64286631, -0.62102437, 1.44587445, 0.35289848 + VCMP_U32(2, v4, 0xbf61e5d3, 0x3e6f16d2, 0x3f1b9f3b, 0xbe62084f, 0xbebb8064, + 0x3f403ebc, 0x3e2d5982, 0xbecf62cb, 0xbdf63579, 0x3f8008f8, + 0xbe5384f5, 0x3f8b35e1, 0xbf2492e3, 0xbf1efb74, 0x3fb9126b, + 0x3eb4af1c); + + VSET(16, e64, m8); + // -0.3252450595073633, 0.4758165631309326, -0.1595578232245429, + // -0.5062008461482019, -0.8497827573746595, -0.1941654045426651, + // 0.5653121187716577, -0.9852357785633095, -0.4238236947700038, + // 0.5852522737985073, 0.4009389814391957, -0.8725649196362917, + // -0.5946782335830663, 0.4175703122760628, -0.6355596052793091, + // -0.3469340725892474 + VLOAD_64(v16, 0xbfd4d0d0a77142c0, 0x3fde73c75062b7e8, 0xbfc46c6408490198, + 0xbfe032cc1ded3ff0, 0xbfeb316b9bf41faa, 0xbfc8da6977433ee0, + 0x3fe2170970c503fe, 0xbfef870d2ef8e992, 0xbfdb1fed6b13a6c0, + 0x3fe2ba62f9fbf9aa, 0x3fd9a8fbf93e43f0, 0xbfebec0d442f3114, + 0xbfe3079aa59c3bf4, 0x3fdab978d4c06588, 0xbfe4568118eaaa68, + 0xbfd6342af7e8e3dc); + // 0.9024789401717532, 0.1750129013440402, 0.5031110880652467, + // -0.2303324647743561, -0.3880673069078899, + // -0.9441232974464955, -0.9718449040015202, 0.6713775626400460, + // -0.0912048565692380, -0.5347347522064834, + // -0.5209348837668262, 0.1676058792979986, + // -0.3611782231841894, 0.5839305722445856, + // -0.5690013462620132, -0.7273345685963009 + VLOAD_64(v24, 0x3fece11b83abb9b8, 0x3fc666d29fd34b08, 0x3fe0197c6cafd8c4, + 0xbfcd7b88c1b4daf0, 0xbfd8d61841f43c54, 0xbfee36420fbd9482, + 0xbfef195a7bef10b4, 0x3fe57beccc59d47e, 0xbfb7593394338500, + 0xbfe11c8c0e185e4a, 0xbfe0ab7fa223f876, 0x3fc5741c0519e298, + 0xbfd71d8b44269f74, 0x3fe2af8f2add9a18, 0xbfe235424fb26902, + 0xbfe74653252be25a); + // -0.0769255470598902, -0.8447241112550155, -0.1913688167412757, + // 0.7663381230505260, 0.2058488268749510, -0.0251549939511286, + // 0.5275264461714482, -0.7602756587514194, 0.6498044022974587, + // -0.7128277097157256, -0.8385947434294139, 0.8834902787005550, + // 0.5936682304042178, 0.1532178226844403, -0.5096194622607613, + // -0.8578075287458693 + VLOAD_64(v8, 0xbfb3b16484d96110, 0xbfeb07fadbff7462, 0xbfc87ec5fcb06230, + 0x3fe885d78705c2d4, 0x3fca59411dac8758, 0xbf99c23b11679a80, + 0x3fe0e17f24429b70, 0xbfe8542d9e4907ce, 0x3fe4cb329a1542de, + 0xbfe6cf7c0e9d2c04, 0xbfead5c4a4b40f1a, 0x3fec458d67ab4a36, + 0x3fe2ff5484485472, 0x3fc39ca440cc0820, 0xbfe04ecd797a151a, + 0xbfeb7328c6473c1e); + asm volatile("vfnmacc.vv v8, v16, v24"); + // 0.3704523636601942, 0.7614500740339213, 0.2716441267930978, + // -0.8829326116147059, -0.5356217329860959, + // -0.1581610880357251, 0.0218692556270895, 1.4217408543890222, + // -0.6884591815896014, 1.0257824393236514, 1.0474578451230310, + // -0.7372432681003268, -0.8084530581760619, + // -0.3970498940841519, 0.1479851912270807, 0.6054703847278113 + VCMP_U64(3, v8, 0x3fd7b57dd4a95f28, 0x3fe85dcc8bb06629, 0x3fd1629e0c2e846c, + 0xbfec40fbe46ea001, 0xbfe123d0304677d2, 0xbfc43e9f5e4e7ddd, + 0x3f9664e4e6d32991, 0x3ff6bf73568fcea3, 0xbfe607db8cb1dd4b, + 0x3ff0699ad8db4c8b, 0x3ff0c263284bdf71, 0xbfe7977f31b5fc6d, + 0xbfe9ded8f2a6f4b5, 0xbfd96943f57e3046, 0x3fc2f12dc24e6a42, + 0x3fe360036da34794); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.6377, -0.2332, 0.9458, -0.2612, -0.6772, 0.4543, 0.1002, + // 0.7764, 0.7979, -0.8599, 0.7837, -0.2461, 0.4221, 0.2251, + // 0.7739, 0.1461 + VLOAD_16(v4, 0xb91a, 0xb376, 0x3b91, 0xb42e, 0xb96b, 0x3745, 0x2e69, 0x3a36, + 0x3a62, 0xbae1, 0x3a45, 0xb3e0, 0x36c1, 0x3334, 0x3a31, 0x30ad); + // 0.9551, -0.6787, 0.5605, -0.7305, -0.7197, -0.1581, 0.7271, + // 0.6113, 0.2971, -0.8062, 0.9668, -0.5278, 0.3972, -0.1084, + // -0.3015, 0.9556 + VLOAD_16(v6, 0x3ba4, 0xb96e, 0x387c, 0xb9d8, 0xb9c2, 0xb10f, 0x39d1, 0x38e4, + 0x34c1, 0xba73, 0x3bbc, 0xb839, 0x365b, 0xaef0, 0xb4d3, 0x3ba5); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.7402, 0.0935, 0.1455, -0.2771, 0.3347, 0.7964, 0.6543, + // -0.7534, 0.2476, 0.0338, 0.9980, 0.3284, 0.2239, + // -0.4551, 0.6694, -0.8550 + VLOAD_16(v2, 0x39ec, 0x2dfc, 0x30a8, 0xb46f, 0x355b, 0x3a5f, 0x393c, 0xba07, + 0x33ec, 0x2853, 0x3bfc, 0x3541, 0x332a, 0xb748, 0x395b, 0xbad7); + asm volatile("vfnmacc.vv v2, v4, v6, v0.t"); + // 0.0000, -0.2517, 0.0000, 0.0863, 0.0000, -0.7246, 0.0000, + // 0.2788, 0.0000, -0.7271, 0.0000, -0.4583, 0.0000, 0.4795, + // 0.0000, 0.7153 + VCMP_U16(4, v2, 0x39ec, 0xb407, 0x30a8, 0x2d86, 0x355b, 0xb9cc, 0x393c, + 0x3476, 0x33ec, 0xb9d1, 0x3bfc, 0xb755, 0x332a, 0x37ac, 0x395b, + 0x39b9); + + VSET(16, e32, m4); + // -0.17374928, -0.36242354, -0.18093164, 0.94970566, + // -0.45790458, -0.17780401, -0.51985794, -0.04832974, + // 0.13252106, 0.77533042, 0.42536697, -0.72199643, + // -0.25088808, 0.28798762, 0.66300607, -0.63549894 + VLOAD_32(v8, 0xbe31eb55, 0xbeb98f94, 0xbe394625, 0x3f731fe9, 0xbeea7278, + 0xbe361241, 0xbf051569, 0xbd45f569, 0x3e07b39a, 0x3f467c0e, + 0x3ed9c9b3, 0xbf38d4c2, 0xbe807467, 0x3e93731d, 0x3f29bac4, + 0xbf22b00f); + // -0.61242568, 0.71439523, -0.15632962, 0.10917858, + // 0.19637996, -0.88467985, 0.73412597, -0.98048240, 0.25438991, + // -0.02058743, -0.00876777, 0.21936898, -0.71130067, + // -0.29675287, -0.96093589, 0.24695934 + VLOAD_32(v12, 0xbf1cc7ee, 0x3f36e29b, 0xbe2014df, 0x3ddf9905, 0x3e4917d4, + 0xbf627a61, 0x3f3befae, 0xbf7b00e5, 0x3e823f65, 0xbca8a6f9, + 0xbc0fa6af, 0x3e60a243, 0xbf3617cd, 0xbe97effe, 0xbf75ffe5, + 0x3e7ce2e9); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.77600455, 0.02542816, -0.63618338, 0.11704731, + // 0.45613721, -0.90825689, 0.21235447, 0.35766414, + // 0.08650716, -0.98431164, 0.21029140, -0.92919809, + // 0.46440944, 0.70648551, -0.80876821, -0.19595607 + VLOAD_32(v4, 0x3f46a83c, 0x3cd04eb8, 0xbf22dcea, 0x3defb680, 0x3ee98ad1, + 0xbf688386, 0x3e597373, 0x3eb71fc1, 0x3db12aaa, 0xbf7bfbd9, + 0x3e5756a1, 0xbf6ddfed, 0x3eedc713, 0x3f34dc3c, 0xbf4f0b6f, + 0xbe48a8b5); + asm volatile("vfnmacc.vv v4, v8, v12, v0.t"); + // 0.00000000, 0.23348548, 0.00000000, -0.22073483, + // 0.00000000, 0.75095725, 0.00000000, -0.40505061, + // 0.00000000, 1.00027370, 0.00000000, 1.08758175, + // 0.00000000, -0.62102437, 0.00000000, 0.35289848 + VCMP_U32(5, v4, 0x3f46a83c, 0x3e6f16d2, 0xbf22dcea, 0xbe62084f, 0x3ee98ad1, + 0x3f403ebc, 0x3e597373, 0xbecf62cb, 0x3db12aaa, 0x3f8008f8, + 0x3e5756a1, 0x3f8b35e1, 0x3eedc713, 0xbf1efb74, 0xbf4f0b6f, + 0x3eb4af1c); + + VSET(16, e64, m8); + // -0.3252450595073633, 0.4758165631309326, -0.1595578232245429, + // -0.5062008461482019, -0.8497827573746595, -0.1941654045426651, + // 0.5653121187716577, -0.9852357785633095, -0.4238236947700038, + // 0.5852522737985073, 0.4009389814391957, -0.8725649196362917, + // -0.5946782335830663, 0.4175703122760628, -0.6355596052793091, + // -0.3469340725892474 + VLOAD_64(v16, 0xbfd4d0d0a77142c0, 0x3fde73c75062b7e8, 0xbfc46c6408490198, + 0xbfe032cc1ded3ff0, 0xbfeb316b9bf41faa, 0xbfc8da6977433ee0, + 0x3fe2170970c503fe, 0xbfef870d2ef8e992, 0xbfdb1fed6b13a6c0, + 0x3fe2ba62f9fbf9aa, 0x3fd9a8fbf93e43f0, 0xbfebec0d442f3114, + 0xbfe3079aa59c3bf4, 0x3fdab978d4c06588, 0xbfe4568118eaaa68, + 0xbfd6342af7e8e3dc); + // 0.9024789401717532, 0.1750129013440402, 0.5031110880652467, + // -0.2303324647743561, -0.3880673069078899, + // -0.9441232974464955, -0.9718449040015202, 0.6713775626400460, + // -0.0912048565692380, -0.5347347522064834, + // -0.5209348837668262, 0.1676058792979986, + // -0.3611782231841894, 0.5839305722445856, + // -0.5690013462620132, -0.7273345685963009 + VLOAD_64(v24, 0x3fece11b83abb9b8, 0x3fc666d29fd34b08, 0x3fe0197c6cafd8c4, + 0xbfcd7b88c1b4daf0, 0xbfd8d61841f43c54, 0xbfee36420fbd9482, + 0xbfef195a7bef10b4, 0x3fe57beccc59d47e, 0xbfb7593394338500, + 0xbfe11c8c0e185e4a, 0xbfe0ab7fa223f876, 0x3fc5741c0519e298, + 0xbfd71d8b44269f74, 0x3fe2af8f2add9a18, 0xbfe235424fb26902, + 0xbfe74653252be25a); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.0769255470598902, -0.8447241112550155, -0.1913688167412757, + // 0.7663381230505260, 0.2058488268749510, -0.0251549939511286, + // 0.5275264461714482, -0.7602756587514194, 0.6498044022974587, + // -0.7128277097157256, -0.8385947434294139, 0.8834902787005550, + // 0.5936682304042178, 0.1532178226844403, -0.5096194622607613, + // -0.8578075287458693 + VLOAD_64(v8, 0xbfb3b16484d96110, 0xbfeb07fadbff7462, 0xbfc87ec5fcb06230, + 0x3fe885d78705c2d4, 0x3fca59411dac8758, 0xbf99c23b11679a80, + 0x3fe0e17f24429b70, 0xbfe8542d9e4907ce, 0x3fe4cb329a1542de, + 0xbfe6cf7c0e9d2c04, 0xbfead5c4a4b40f1a, 0x3fec458d67ab4a36, + 0x3fe2ff5484485472, 0x3fc39ca440cc0820, 0xbfe04ecd797a151a, + 0xbfeb7328c6473c1e); + asm volatile("vfnmacc.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, 0.7614500740339213, 0.0000000000000000, + // -0.8829326116147059, 0.0000000000000000, + // -0.1581610880357251, 0.0000000000000000, 1.4217408543890222, + // 0.0000000000000000, 1.0257824393236514, 0.0000000000000000, + // -0.7372432681003268, 0.0000000000000000, + // -0.3970498940841519, 0.0000000000000000, 0.6054703847278113 + VCMP_U64(6, v8, 0xbfb3b16484d96110, 0x3fe85dcc8bb06629, 0xbfc87ec5fcb06230, + 0xbfec40fbe46ea001, 0x3fca59411dac8758, 0xbfc43e9f5e4e7ddd, + 0x3fe0e17f24429b70, 0x3ff6bf73568fcea3, 0x3fe4cb329a1542de, + 0x3ff0699ad8db4c8b, 0xbfead5c4a4b40f1a, 0xbfe7977f31b5fc6d, + 0x3fe2ff5484485472, 0xbfd96943f57e3046, 0xbfe04ecd797a151a, + 0x3fe360036da34794); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.1300 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3029); + // -0.2844, 0.4070, -0.1837, -0.2321, -0.5283, -0.6104, -0.7183, + // -0.1191, 0.7998, 0.1169, 0.1169, -0.9214, -0.4360, -0.6250, + // -0.5386, 0.6543 + VLOAD_16(v4, 0xb48d, 0x3683, 0xb1e1, 0xb36d, 0xb83a, 0xb8e2, 0xb9bf, 0xaf9f, + 0x3a66, 0x2f7c, 0x2f7c, 0xbb5f, 0xb6fa, 0xb900, 0xb84f, 0x393c); + // 0.9268, -0.3337, -0.3225, -0.8306, -0.1857, -0.6831, 0.0557, + // 0.5586, 0.2352, 0.6294, 0.6294, -0.8877, -0.2426, 0.5488, + // 0.4001, 0.1772 + VLOAD_16(v2, 0x3b6a, 0xb557, 0xb529, 0xbaa5, 0xb1f1, 0xb977, 0x2b21, 0x3878, + 0x3387, 0x3909, 0x3909, 0xbb1a, 0xb3c3, 0x3864, 0x3667, 0x31ac); + asm volatile("vfnmacc.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.8896, 0.2808, 0.3464, 0.8608, 0.2544, 0.7627, 0.0377, + // -0.5430, -0.3394, -0.6445, -0.6445, 1.0078, 0.2993, -0.4675, + // -0.3301, -0.2622 + VCMP_U16(7, v2, 0xbb1e, 0x347e, 0x358b, 0x3ae3, 0x3412, 0x3a1a, 0x28d3, + 0xb858, 0xb56d, 0xb928, 0xb928, 0x3c08, 0x34ca, 0xb77b, 0xb548, + 0xb432); + + VSET(16, e32, m4); + double dscalar_32; + // -0.26917368 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe89d122); + // -0.27745819, -0.86308837, -0.16746511, -0.68674469, + // -0.49064314, -0.74352056, -0.17169137, 0.26071417, + // 0.71857828, 0.07920383, -0.43244356, -0.58339220, 0.80679923, + // 0.23900302, 0.73513943, -0.80685192 + VLOAD_32(v8, 0xbe8e0f00, 0xbf5cf35c, 0xbe2b7bf9, 0xbf2fce80, 0xbefb3594, + 0xbf3e575d, 0xbe2fcfdd, 0x3e857c54, 0x3f37f4bf, 0x3da2359e, + 0xbedd693e, 0xbf155931, 0x3f4e8a65, 0x3e74bd35, 0x3f3c3219, + 0xbf4e8dd9); + // 0.13509545, -0.29169917, 0.80494332, -0.63637137, + // 0.63772237, -0.87242430, -0.44194883, -0.41286576, + // -0.57735479, 0.61664599, 0.94073379, -0.89744234, + // -0.70681161, 0.23247144, 0.06774496, -0.38581881 + VLOAD_32(v4, 0x3e0a5676, 0xbe955998, 0x3f4e10c4, 0xbf22e93c, 0x3f2341c6, + 0xbf5f5733, 0xbee2471e, 0xbed36324, 0xbf13cd86, 0x3f1ddc83, + 0x3f70d3ee, 0xbf65bec8, 0xbf34f19b, 0x3e6e0cfe, 0x3d8abdde, + 0xbec58a0b); + asm volatile("vfnmacc.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // -0.20977989, 0.05937849, -0.85002053, 0.45151776, + // -0.76979059, 0.67228812, 0.39573404, 0.48304313, + // 0.77077717, -0.59532642, -1.05713618, 0.74040854, 0.92398071, + // -0.16813812, 0.13013524, 0.16863550 + VCMP_U32(8, v4, 0xbe56d08a, 0x3d7336de, 0xbf599af2, 0x3ee72d57, 0xbf4510ff, + 0x3f2c1b13, 0x3eca9da7, 0x3ef7516f, 0x3f4551a7, 0xbf186750, + 0xbf87503d, 0x3f3d8b6a, 0x3f6c8a00, 0xbe2c2c66, 0x3e05422c, + 0x3e2caec9); + + VSET(16, e64, m8); + double dscalar_64; + // 0.1021836258281641 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fba28b4c31e60e0); + // 0.3274079371230154, 0.9254873656544997, 0.9683609308176633, + // -0.6778040243955326, 0.9669615854915627, + // 0.8026267269428324, -0.7388821308618641, 0.7432413708598076, + // 0.9355143976513562, -0.4219868517017851, 0.8950700270161456, + // 0.6727820676214205, -0.8833440526985297, 0.0357808590148252, + // -0.3802125831332157, 0.9831607630398518 + VLOAD_64(v16, 0x3fd4f4406b993a2c, 0x3fed9d97ae0b1cd6, 0x3feefcd01012c05e, + 0xbfe5b09210bc082e, 0x3feef1596c459614, 0x3fe9af1e3ee3adfa, + 0xbfe7a4ec2374d0e2, 0x3fe7c8a2209c110c, 0x3fedefbbe3db30dc, + 0xbfdb01d523d9acc0, 0x3feca469e5b540fa, 0x3fe5876e42389dca, + 0xbfec445abf2e99e4, 0x3fa251de66953a60, 0xbfd8556728856e10, + 0x3fef760d8f7eee22); + // 0.1671854121593166, 0.6264287337062140, 0.1587305627009998, + // -0.3348358495277817, 0.4721131630506652, + // 0.2878076790245236, 0.5083797506594245, 0.9444607965181537, + // -0.2805814092841707, -0.7218856627753110, + // -0.3443302881655670, 0.3680926220616383, + // -0.2344410843781140, 0.3553553454507421, + // 0.0951222110617760, -0.8329780449088213 + VLOAD_64(v8, 0x3fc56654e2cbd888, 0x3fe40bb445915f4a, 0x3fc4514877d696a0, + 0xbfd56df357d00344, 0x3fde371a20d41408, 0x3fd26b70e63cabf0, + 0x3fe044a59c60fcd4, 0x3fee3905d92cc95e, 0xbfd1f50bba2f6e40, + 0xbfe719aff62247a4, 0xbfd60981e7ac601c, 0x3fd78ed45b69d4fc, + 0xbfce022a5b1f1348, 0x3fd6be2458cadcb0, 0x3fb859ede1a22f80, + 0xbfeaa7c192a56bc8); + asm volatile("vfnmacc.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // -0.2006411422994659, -0.7209983883869466, -0.2576811937222847, + // 0.4040963223414386, -0.5709208038927434, -0.3698229881701340, + // -0.4328780954683192, -1.0204078946581041, 0.1849871561177042, + // 0.7650058093340112, 0.2528687874349445, -0.4368399331233641, + // 0.3247043825365946, -0.3590115633601233, -0.0562707107317317, + // 0.7325151133694248 + VCMP_U64(9, v8, 0xbfc9ae9be43442c9, 0xbfe7126b3652e68a, 0xbfd07dd942f53687, + 0x3fd9dcb6d238fb8a, 0xbfe244fbb4aa695e, 0xbfd7ab2e09dffb6d, + 0xbfdbb44653cc3c92, 0xbff053973a823036, 0x3fc7ada8bcda50a5, + 0x3fe87aed768addeb, 0x3fd02f00910d95b4, 0xbfdbf52f7a9681dc, + 0x3fd4c7f4e3f733f9, 0xbfd6fa0ba2e11fba, 0xbfaccf83bca18481, + 0x3fe770c388f7eacc); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.1300 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3029); + // -0.2844, 0.4070, -0.1837, -0.2321, -0.5283, -0.6104, + // -0.7183, -0.1191, 0.7998, 0.1169, 0.2551, -0.9214, + // -0.4360, -0.6250, -0.5386, 0.6543 + VLOAD_16(v4, 0xb48d, 0x3683, 0xb1e1, 0xb36d, 0xb83a, 0xb8e2, 0xb9bf, 0xaf9f, + 0x3a66, 0x2f7c, 0x3415, 0xbb5f, 0xb6fa, 0xb900, 0xb84f, 0x393c); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.9268, -0.3337, -0.3225, -0.8306, -0.1857, -0.6831, 0.0557, + // 0.5586, 0.2352, 0.6294, -0.0325, -0.8877, -0.2426, 0.5488, + // 0.4001, 0.1772 + VLOAD_16(v2, 0x3b6a, 0xb557, 0xb529, 0xbaa5, 0xb1f1, 0xb977, 0x2b21, 0x3878, + 0x3387, 0x3909, 0xa828, 0xbb1a, 0xb3c3, 0x3864, 0x3667, 0x31ac); + asm volatile("vfnmacc.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.2808, 0.0000, 0.8608, 0.0000, 0.7627, 0.0000, + // -0.5430, 0.0000, -0.6445, 0.0000, 1.0078, 0.0000, + // -0.4675, 0.0000, -0.2622 + VCMP_U16(10, v2, 0x3b6a, 0x347e, 0xb529, 0x3ae3, 0xb1f1, 0x3a1a, 0x2b21, + 0xb858, 0x3387, 0xb928, 0xa828, 0x3c08, 0xb3c3, 0xb77b, 0x3667, + 0xb432); + + VSET(16, e32, m4); + double dscalar_32; + // -0.26917368 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe89d122); + // -0.27745819, -0.86308837, -0.16746511, -0.68674469, + // -0.49064314, -0.74352056, -0.17169137, 0.26071417, + // 0.71857828, 0.07920383, -0.43244356, -0.58339220, + // 0.80679923, 0.23900302, 0.73513943, -0.80685192 + VLOAD_32(v8, 0xbe8e0f00, 0xbf5cf35c, 0xbe2b7bf9, 0xbf2fce80, 0xbefb3594, + 0xbf3e575d, 0xbe2fcfdd, 0x3e857c54, 0x3f37f4bf, 0x3da2359e, + 0xbedd693e, 0xbf155931, 0x3f4e8a65, 0x3e74bd35, 0x3f3c3219, + 0xbf4e8dd9); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.13509545, -0.29169917, 0.80494332, -0.63637137, + // 0.63772237, -0.87242430, -0.44194883, -0.41286576, + // -0.57735479, 0.61664599, 0.94073379, -0.89744234, + // -0.70681161, 0.23247144, 0.06774496, -0.38581881 + VLOAD_32(v4, 0x3e0a5676, 0xbe955998, 0x3f4e10c4, 0xbf22e93c, 0x3f2341c6, + 0xbf5f5733, 0xbee2471e, 0xbed36324, 0xbf13cd86, 0x3f1ddc83, + 0x3f70d3ee, 0xbf65bec8, 0xbf34f19b, 0x3e6e0cfe, 0x3d8abdde, + 0xbec58a0b); + asm volatile("vfnmacc.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, 0.05937849, 0.00000000, 0.45151776, + // 0.00000000, 0.67228812, 0.00000000, 0.48304313, + // 0.00000000, -0.59532642, 0.00000000, 0.74040854, + // 0.00000000, -0.16813812, 0.00000000, 0.16863550 + VCMP_U32(11, v4, 0x3e0a5676, 0x3d7336de, 0x3f4e10c4, 0x3ee72d57, 0x3f2341c6, + 0x3f2c1b13, 0xbee2471e, 0x3ef7516f, 0xbf13cd86, 0xbf186750, + 0x3f70d3ee, 0x3f3d8b6a, 0xbf34f19b, 0xbe2c2c66, 0x3d8abdde, + 0x3e2caec9); + + VSET(16, e64, m8); + double dscalar_64; + // 0.1021836258281641 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fba28b4c31e60e0); + // 0.3274079371230154, 0.9254873656544997, + // 0.9683609308176633, -0.6778040243955326, + // 0.9669615854915627, 0.8026267269428324, + // -0.7388821308618641, 0.7432413708598076, + // 0.9355143976513562, -0.4219868517017851, + // 0.8950700270161456, 0.6727820676214205, + // -0.8833440526985297, 0.0357808590148252, + // -0.3802125831332157, 0.9831607630398518 + VLOAD_64(v16, 0x3fd4f4406b993a2c, 0x3fed9d97ae0b1cd6, 0x3feefcd01012c05e, + 0xbfe5b09210bc082e, 0x3feef1596c459614, 0x3fe9af1e3ee3adfa, + 0xbfe7a4ec2374d0e2, 0x3fe7c8a2209c110c, 0x3fedefbbe3db30dc, + 0xbfdb01d523d9acc0, 0x3feca469e5b540fa, 0x3fe5876e42389dca, + 0xbfec445abf2e99e4, 0x3fa251de66953a60, 0xbfd8556728856e10, + 0x3fef760d8f7eee22); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.1671854121593166, 0.6264287337062140, + // 0.1587305627009998, -0.3348358495277817, + // 0.4721131630506652, 0.2878076790245236, + // 0.5083797506594245, 0.9444607965181537, + // -0.2805814092841707, -0.7218856627753110, + // -0.3443302881655670, 0.3680926220616383, + // -0.2344410843781140, 0.3553553454507421, + // 0.0951222110617760, -0.8329780449088213 + VLOAD_64(v8, 0x3fc56654e2cbd888, 0x3fe40bb445915f4a, 0x3fc4514877d696a0, + 0xbfd56df357d00344, 0x3fde371a20d41408, 0x3fd26b70e63cabf0, + 0x3fe044a59c60fcd4, 0x3fee3905d92cc95e, 0xbfd1f50bba2f6e40, + 0xbfe719aff62247a4, 0xbfd60981e7ac601c, 0x3fd78ed45b69d4fc, + 0xbfce022a5b1f1348, 0x3fd6be2458cadcb0, 0x3fb859ede1a22f80, + 0xbfeaa7c192a56bc8); + asm volatile("vfnmacc.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + // -0.2006411422994659, -0.7209983883869466, + // -0.2576811937222847, 0.4040963223414386, + // -0.5709208038927434, -0.3698229881701340, + // -0.4328780954683192, -1.0204078946581041, + // 0.1849871561177042, 0.7650058093340112, 0.2528687874349445, + // -0.4368399331233641, 0.3247043825365946, + // -0.3590115633601233, -0.0562707107317317, 0.7325151133694248 + VCMP_U64(12, v8, 0x3fc56654e2cbd888, 0xbfe7126b3652e68a, 0x3fc4514877d696a0, + 0x3fd9dcb6d238fb8a, 0x3fde371a20d41408, 0xbfd7ab2e09dffb6d, + 0x3fe044a59c60fcd4, 0xbff053973a823036, 0xbfd1f50bba2f6e40, + 0x3fe87aed768addeb, 0xbfd60981e7ac601c, 0xbfdbf52f7a9681dc, + 0xbfce022a5b1f1348, 0xbfd6fa0ba2e11fba, 0x3fb859ede1a22f80, + 0x3fe770c388f7eacc); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmadd.c new file mode 100644 index 000000000..407bafc88 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmadd.c @@ -0,0 +1,458 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.7461, 0.0514, -0.3071, -0.4934, -0.5220, -0.7983, 0.4907, + // -0.9028, 0.1752, 0.0676, 0.1040, 0.4526, 0.3525, -0.2686, + // 0.3540, -0.0847 + VLOAD_16(v4, 0xb9f8, 0x2a94, 0xb4ea, 0xb7e5, 0xb82d, 0xba63, 0x37da, 0xbb39, + 0x319b, 0x2c54, 0x2ea8, 0x373e, 0x35a4, 0xb44c, 0x35aa, 0xad6c); + // 0.1573, -0.7700, 0.0804, -0.9438, 0.0790, 0.7998, -0.2854, + // 0.1963, -0.0687, -0.2123, 0.3625, -0.0002, 0.7168, -0.4033, + // 0.2812, -0.3159 + VLOAD_16(v6, 0x3109, 0xba29, 0x2d25, 0xbb8d, 0x2d0e, 0x3a66, 0xb491, 0x3248, + 0xac65, 0xb2cb, 0x35cd, 0x897c, 0x39bc, 0xb674, 0x3480, 0xb50e); + // 0.0337, 0.2034, -0.1886, 0.8242, 0.3225, 0.0331, 0.0698, + // 0.6777, -0.2539, -0.5825, -0.4319, -0.6323, 0.0674, -0.2903, + // -0.8145, 0.1893 + VLOAD_16(v2, 0x284f, 0x3282, 0xb209, 0x3a98, 0x3529, 0x283b, 0x2c77, 0x396c, + 0xb410, 0xb8a9, 0xb6e9, 0xb90f, 0x2c50, 0xb4a5, 0xba84, 0x320f); + asm volatile("vfnmadd.vv v2, v4, v6"); + // -0.1322, 0.7598, -0.1383, 1.3506, 0.0894, -0.7734, 0.2512, + // 0.4155, 0.1132, 0.2517, -0.3176, 0.2864, -0.7407, 0.3254, + // 0.0071, 0.3320 + VCMP_U16(1, v2, 0xb03b, 0x3a14, 0xb06d, 0x3d67, 0x2db8, 0xba30, 0x3405, + 0x36a6, 0x2f3e, 0x3407, 0xb515, 0x3495, 0xb9ed, 0x3535, 0x1f3d, + 0x3550); + + VSET(16, e32, m4); + // -0.36820358, 0.10496315, -0.32905263, -0.92334682, + // 0.43153936, 0.92736709, -0.59600371, 0.75117606, 0.84123290, + // 0.33028743, -0.43412161, 0.95273590, 0.06816643, + // -0.88978988, 0.18573478, 0.61926919 + VLOAD_32(v8, 0xbebc852e, 0x3dd6f6ec, 0xbea87996, 0xbf6c6075, 0x3edcf2ba, + 0x3f6d67ee, 0xbf1893b3, 0x3f404d13, 0x3f575b0a, 0x3ea91b6f, + 0xbede4530, 0x3f73e680, 0x3d8b9ad8, 0xbf63c945, 0x3e3e3142, + 0x3f1e886d); + // 0.69083834, -0.31329882, -0.54809541, 0.25019145, + // -0.67489260, 0.23259214, -0.14038530, 0.09741956, + // -0.23567833, 0.75417399, -0.90357685, -0.87489468, + // 0.54726779, -0.06705534, -0.15476358, -0.96940458 + VLOAD_32(v12, 0x3f30dac8, 0xbea068b4, 0xbf0c4ffb, 0x3e801918, 0xbf2cc5c3, + 0x3e6e2ca2, 0xbe0fc12a, 0x3dc783e8, 0xbe7155a9, 0x3f41118c, + 0xbf6750d0, 0xbf5ff919, 0x3f0c19be, 0xbd895450, 0xbe1e7a58, + 0xbf782ae6); + // 0.03722767, 0.80796093, 0.53925264, -0.50804031, + // 0.63562357, -0.45508829, -0.22051410, 0.42499006, + // -0.59229839, -0.50074077, -0.80474108, -0.20762257, + // 0.15367362, 0.98349953, -0.15871963, -0.07445616 + VLOAD_32(v4, 0x3d187c0a, 0x3f4ed687, 0x3f0a0c76, 0xbf020eee, 0x3f22b83a, + 0xbee90155, 0xbe61ce73, 0x3ed9984c, 0xbf17a0de, 0xbf00308c, + 0xbf4e0383, 0xbe549b03, 0x3e1d5c9e, 0x3f7bc6a0, 0xbe228766, + 0xbd987c79); + asm volatile("vfnmadd.vv v4, v8, v12"); + // -0.67713100, 0.22849269, 0.72553790, -0.71928883, + // 0.40059602, 0.18944177, 0.00895807, -0.41666192, 0.73393923, + // -0.58878565, 0.55422139, 1.07270420, -0.55774319, + // 0.94216329, 0.18424334, 1.01551294 + VCMP_U32(2, v4, 0xbf2d5875, 0x3e69f9fd, 0x3f39bcda, 0xbf382350, 0x3ecd1aec, + 0x3e41fd06, 0x3c12c4e5, 0xbed554b6, 0x3f3be371, 0xbf16baa7, + 0x3f0de173, 0x3f894e5f, 0xbf0ec842, 0x3f71319d, 0x3e3caa49, + 0x3f81fc54); + + VSET(16, e64, m8); + // -0.1517393950396491, -0.0976116299317518, 0.4195080955516000, + // -0.8346165642452430, 0.0078216057137750, -0.5126918345148062, + // -0.9302856586058497, -0.8971839537614414, 0.1317157676127678, + // -0.3423297874984121, 0.7678405723111816, -0.6465198020108864, + // 0.4795090517472360, -0.9006147069685106, 0.9841759200408695, + // 0.8437352562659637 + VLOAD_64(v16, 0xbfc36c324d9ae520, 0xbfb8fd1366442100, 0x3fdad9387bb34990, + 0xbfeab52dcc044330, 0x3f8004c625f16600, 0xbfe067f8b4c55ad2, + 0xbfedc4e66df4cc5a, 0xbfecb5bb1f7cd800, 0x3fc0dc0ff121d700, + 0xbfd5e8bb327025d8, 0x3fe892266453ca54, 0xbfe4b04a4bbb4d06, + 0x3fdeb046bbd8fb80, 0xbfecd1d5ef173e7a, 0x3fef7e5e7fc2c286, + 0x3feaffe114849fb0); + // 0.3915682245289982, 0.0468282563045201, 0.4640582663413180, + // 0.9199907734666593, -0.6702920875531786, 0.6250479001245852, + // -0.3716310293668668, 0.2191474803863191, + // -0.3398132406457823, -0.1436002174993440, + // -0.7049093483038609, 0.0726450331160087, 0.3054536350672581, + // -0.9906780567812383, 0.2659677084286980, -0.6111168392293305 + VLOAD_64(v24, 0x3fd90f742ba04f2c, 0x3fa7f9df8ab696e0, 0x3fddb3217157f678, + 0x3fed70907d95274a, 0xbfe573086459defe, 0x3fe40064742efe82, + 0xbfd7c8cd8353cefc, 0x3fcc0d064ea14910, 0xbfd5bf8008d49208, + 0xbfc2617deeedd880, 0xbfe68e9e0cb3831e, 0x3fb298dd69733960, + 0x3fd38c8d6743b96c, 0xbfefb3a277d7b020, 0x3fd1059d6c5f9294, + 0xbfe38e44e6d0cbb0); + // 0.8932002267748917, 0.5237198185024288, -0.3716642114238491, + // 0.8806741908360942, 0.4285584084885536, -0.9185899240339090, + // -0.3906189235600976, -0.8681987020972610, + // -0.8703598457154336, -0.2254866845234647, 0.7002825787534324, + // 0.0892712008047818, 0.9241326299982451, 0.6615225744181676, + // 0.7351775340550828, -0.4044996673659886 + VLOAD_64(v8, 0x3fec9518a458e4ea, 0x3fe0c25010978504, 0xbfd7c958b04a2e10, + 0x3fec2e7ba402502e, 0x3fdb6d803f3895dc, 0xbfed6516b24524fa, + 0xbfd8ffe68378eb00, 0xbfebc848a4fdff0a, 0xbfebd9fce4232e3a, + 0xbfccdcbf67db1aa0, 0x3fe668b702b68b90, 0x3fb6da7a37ee6240, + 0x3fed927e97e0492a, 0x3fe52b3163d622de, 0x3fe786930930a7fe, + 0xbfd9e35292a51b70); + asm volatile("vfnmadd.vv v8, v16, v24"); + // -0.2560345624688988, 0.0042928888070631, -0.3081421208222118, + // -0.1849655060915788, 0.6669400726566582, -1.0960014534443465, + // 0.0082438467988533, -0.9980814245844917, 0.4544533558235209, + // 0.0664094087027049, 0.1672039722542752, -0.0149294340464271, + // -0.7485835961663959, 1.5864550162939111, -0.9895117344007368, + // 0.9524074697338700 + VCMP_U64(3, v8, 0xbfd062deca1cc612, 0x3f71956b9081d880, 0xbfd3b899badce50e, + 0xbfc7acf31fc694ed, 0x3fe55792b50e7883, 0xbff18938d1ee9749, + 0x3f80e22663278b8b, 0xbfeff04874aabc45, 0x3fdd15c38734723f, + 0x3fb10034fe865a4b, 0x3fc566f0944bf4a6, 0xbf8e9352b7d14aa9, + 0xbfe7f46595fb6a6d, 0x3ff9621ea7b8eb80, 0xbfefaa1483484d07, + 0x3fee7a1f3adf237b); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.7461, 0.0514, -0.3071, -0.4934, -0.5220, -0.7983, 0.4907, + // -0.9028, 0.1752, 0.0676, 0.1040, 0.4526, 0.3525, -0.2686, + // 0.3540, -0.0847 + VLOAD_16(v4, 0xb9f8, 0x2a94, 0xb4ea, 0xb7e5, 0xb82d, 0xba63, 0x37da, 0xbb39, + 0x319b, 0x2c54, 0x2ea8, 0x373e, 0x35a4, 0xb44c, 0x35aa, 0xad6c); + // 0.1573, -0.7700, 0.0804, -0.9438, 0.0790, 0.7998, -0.2854, + // 0.1963, -0.0687, -0.2123, 0.3625, -0.0002, 0.7168, -0.4033, + // 0.2812, -0.3159 + VLOAD_16(v6, 0x3109, 0xba29, 0x2d25, 0xbb8d, 0x2d0e, 0x3a66, 0xb491, 0x3248, + 0xac65, 0xb2cb, 0x35cd, 0x897c, 0x39bc, 0xb674, 0x3480, 0xb50e); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.0337, 0.2034, -0.1886, 0.8242, 0.3225, 0.0331, 0.0698, + // 0.6777, -0.2539, -0.5825, -0.4319, -0.6323, 0.0674, -0.2903, + // -0.8145, 0.1893 + VLOAD_16(v2, 0x284f, 0x3282, 0xb209, 0x3a98, 0x3529, 0x283b, 0x2c77, 0x396c, + 0xb410, 0xb8a9, 0xb6e9, 0xb90f, 0x2c50, 0xb4a5, 0xba84, 0x320f); + asm volatile("vfnmadd.vv v2, v4, v6, v0.t"); + // 0.0337, 0.7598, -0.1886, 1.3506, 0.3225, -0.7734, 0.0698, + // 0.4155, -0.2539, 0.2517, -0.4319, 0.2864, 0.0674, 0.3254, + // -0.8145, 0.3320 + VCMP_U16(4, v2, 0x284f, 0x3a14, 0xb209, 0x3d67, 0x3529, 0xba30, 0x2c77, + 0x36a6, 0xb410, 0x3407, 0xb6e9, 0x3495, 0x2c50, 0x3535, 0xba84, + 0x3550); + + VSET(16, e32, m4); + // -0.36820358, 0.10496315, -0.32905263, -0.92334682, + // 0.43153936, 0.92736709, -0.59600371, 0.75117606, 0.84123290, + // 0.33028743, -0.43412161, 0.95273590, 0.06816643, + // -0.88978988, 0.18573478, 0.61926919 + VLOAD_32(v8, 0xbebc852e, 0x3dd6f6ec, 0xbea87996, 0xbf6c6075, 0x3edcf2ba, + 0x3f6d67ee, 0xbf1893b3, 0x3f404d13, 0x3f575b0a, 0x3ea91b6f, + 0xbede4530, 0x3f73e680, 0x3d8b9ad8, 0xbf63c945, 0x3e3e3142, + 0x3f1e886d); + // 0.69083834, -0.31329882, -0.54809541, 0.25019145, + // -0.67489260, 0.23259214, -0.14038530, 0.09741956, + // -0.23567833, 0.75417399, -0.90357685, -0.87489468, + // 0.54726779, -0.06705534, -0.15476358, -0.96940458 + VLOAD_32(v12, 0x3f30dac8, 0xbea068b4, 0xbf0c4ffb, 0x3e801918, 0xbf2cc5c3, + 0x3e6e2ca2, 0xbe0fc12a, 0x3dc783e8, 0xbe7155a9, 0x3f41118c, + 0xbf6750d0, 0xbf5ff919, 0x3f0c19be, 0xbd895450, 0xbe1e7a58, + 0xbf782ae6); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.03722767, 0.80796093, 0.53925264, -0.50804031, + // 0.63562357, -0.45508829, -0.22051410, 0.42499006, + // -0.59229839, -0.50074077, -0.80474108, -0.20762257, + // 0.15367362, 0.98349953, -0.15871963, -0.07445616 + VLOAD_32(v4, 0x3d187c0a, 0x3f4ed687, 0x3f0a0c76, 0xbf020eee, 0x3f22b83a, + 0xbee90155, 0xbe61ce73, 0x3ed9984c, 0xbf17a0de, 0xbf00308c, + 0xbf4e0383, 0xbe549b03, 0x3e1d5c9e, 0x3f7bc6a0, 0xbe228766, + 0xbd987c79); + asm volatile("vfnmadd.vv v4, v8, v12, v0.t"); + // 0.03722767, 0.22849269, 0.53925264, -0.71928883, + // 0.63562357, 0.18944177, -0.22051410, -0.41666192, + // -0.59229839, -0.58878565, -0.80474108, 1.07270420, + // 0.15367362, 0.94216329, -0.15871963, 1.01551294 + VCMP_U32(5, v4, 0x3d187c0a, 0x3e69f9fd, 0x3f0a0c76, 0xbf382350, 0x3f22b83a, + 0x3e41fd06, 0xbe61ce73, 0xbed554b6, 0xbf17a0de, 0xbf16baa7, + 0xbf4e0383, 0x3f894e5f, 0x3e1d5c9e, 0x3f71319d, 0xbe228766, + 0x3f81fc54); + + VSET(16, e64, m8); + // -0.1517393950396491, -0.0976116299317518, 0.4195080955516000, + // -0.8346165642452430, 0.0078216057137750, -0.5126918345148062, + // -0.9302856586058497, -0.8971839537614414, 0.1317157676127678, + // -0.3423297874984121, 0.7678405723111816, -0.6465198020108864, + // 0.4795090517472360, -0.9006147069685106, 0.9841759200408695, + // 0.8437352562659637 + VLOAD_64(v16, 0xbfc36c324d9ae520, 0xbfb8fd1366442100, 0x3fdad9387bb34990, + 0xbfeab52dcc044330, 0x3f8004c625f16600, 0xbfe067f8b4c55ad2, + 0xbfedc4e66df4cc5a, 0xbfecb5bb1f7cd800, 0x3fc0dc0ff121d700, + 0xbfd5e8bb327025d8, 0x3fe892266453ca54, 0xbfe4b04a4bbb4d06, + 0x3fdeb046bbd8fb80, 0xbfecd1d5ef173e7a, 0x3fef7e5e7fc2c286, + 0x3feaffe114849fb0); + // 0.3915682245289982, 0.0468282563045201, 0.4640582663413180, + // 0.9199907734666593, -0.6702920875531786, 0.6250479001245852, + // -0.3716310293668668, 0.2191474803863191, + // -0.3398132406457823, -0.1436002174993440, + // -0.7049093483038609, 0.0726450331160087, 0.3054536350672581, + // -0.9906780567812383, 0.2659677084286980, -0.6111168392293305 + VLOAD_64(v24, 0x3fd90f742ba04f2c, 0x3fa7f9df8ab696e0, 0x3fddb3217157f678, + 0x3fed70907d95274a, 0xbfe573086459defe, 0x3fe40064742efe82, + 0xbfd7c8cd8353cefc, 0x3fcc0d064ea14910, 0xbfd5bf8008d49208, + 0xbfc2617deeedd880, 0xbfe68e9e0cb3831e, 0x3fb298dd69733960, + 0x3fd38c8d6743b96c, 0xbfefb3a277d7b020, 0x3fd1059d6c5f9294, + 0xbfe38e44e6d0cbb0); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.8932002267748917, 0.5237198185024288, -0.3716642114238491, + // 0.8806741908360942, 0.4285584084885536, -0.9185899240339090, + // -0.3906189235600976, -0.8681987020972610, + // -0.8703598457154336, -0.2254866845234647, 0.7002825787534324, + // 0.0892712008047818, 0.9241326299982451, 0.6615225744181676, + // 0.7351775340550828, -0.4044996673659886 + VLOAD_64(v8, 0x3fec9518a458e4ea, 0x3fe0c25010978504, 0xbfd7c958b04a2e10, + 0x3fec2e7ba402502e, 0x3fdb6d803f3895dc, 0xbfed6516b24524fa, + 0xbfd8ffe68378eb00, 0xbfebc848a4fdff0a, 0xbfebd9fce4232e3a, + 0xbfccdcbf67db1aa0, 0x3fe668b702b68b90, 0x3fb6da7a37ee6240, + 0x3fed927e97e0492a, 0x3fe52b3163d622de, 0x3fe786930930a7fe, + 0xbfd9e35292a51b70); + asm volatile("vfnmadd.vv v8, v16, v24, v0.t"); + // 0.8932002267748917, 0.0042928888070631, -0.3716642114238491, + // -0.1849655060915788, 0.4285584084885536, + // -1.0960014534443465, -0.3906189235600976, + // -0.9980814245844917, -0.8703598457154336, 0.0664094087027049, + // 0.7002825787534324, -0.0149294340464271, + // 0.9241326299982451, 1.5864550162939111, 0.7351775340550828, + // 0.9524074697338700 + VCMP_U64(6, v8, 0x3fec9518a458e4ea, 0x3f71956b9081d880, 0xbfd7c958b04a2e10, + 0xbfc7acf31fc694ed, 0x3fdb6d803f3895dc, 0xbff18938d1ee9749, + 0xbfd8ffe68378eb00, 0xbfeff04874aabc45, 0xbfebd9fce4232e3a, + 0x3fb10034fe865a4b, 0x3fe668b702b68b90, 0xbf8e9352b7d14aa9, + 0x3fed927e97e0492a, 0x3ff9621ea7b8eb80, 0x3fe786930930a7fe, + 0x3fee7a1f3adf237b); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.2646 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x343c); + // 0.4216, -0.2148, 0.0047, 0.6802, -0.8965, -0.2986, -0.1786, + // -0.1904, 0.2805, 0.5322, -0.5298, 0.3208, 0.0567, + // -0.9897, -0.5400, -0.4187 + VLOAD_16(v4, 0x36bf, 0xb2e0, 0x1cc3, 0x3971, 0xbb2c, 0xb4c7, 0xb1b7, 0xb218, + 0x347d, 0x3842, 0xb83d, 0x3522, 0x2b41, 0xbbeb, 0xb852, 0xb6b3); + // -0.7886, -0.5435, -0.8345, 0.7793, 0.5796, -0.8374, -0.8623, + // -0.3313, -0.2690, -0.9214, 0.2126, -0.6772, -0.6514, -0.5703, + // -0.2585, -0.3320 + VLOAD_16(v2, 0xba4f, 0xb859, 0xbaad, 0x3a3c, 0x38a3, 0xbab3, 0xbae6, 0xb54d, + 0xb44e, 0xbb5f, 0x32ce, 0xb96b, 0xb936, 0xb890, 0xb423, 0xb550); + asm volatile("vfnmadd.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.2129, 0.3586, 0.2162, -0.8867, 0.7432, 0.5200, 0.4067, + // 0.2781, -0.2092, -0.2883, 0.4736, -0.1416, 0.1157, 1.1406, + // 0.6084, 0.5068 + VCMP_U16(7, v2, 0xb2d0, 0x35bd, 0x32eb, 0xbb17, 0x39f2, 0x3829, 0x3682, + 0x3473, 0xb2b3, 0xb49d, 0x3793, 0xb088, 0x2f68, 0x3c90, 0x38de, + 0x380d); + + VSET(16, e32, m4); + double dscalar_32; + // -0.13809182 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe0d67f1); + // -0.16977388, -0.30800357, -0.37010264, -0.92290556, + // 0.55768263, 0.47349435, 0.77556002, 0.16363664, 0.80314618, + // -0.48171839, -0.60694915, 0.16937894, 0.86316317, + // 0.00897404, -0.96310323, -0.27890080 + VLOAD_32(v8, 0xbe2dd934, 0xbe9db2a5, 0xbebd7e18, 0xbf6c438a, 0x3f0ec44a, + 0x3ef26dda, 0x3f468b1a, 0x3e27905d, 0x3f4d9afd, 0xbef6a3cb, + 0xbf1b6105, 0x3e2d71ac, 0x3f5cf843, 0x3c1307df, 0xbf768def, + 0xbe8ecc16); + // -0.33133313, -0.48972869, 0.95656961, -0.89211702, + // 0.72045243, -0.36672497, 0.69402671, 0.44954479, + // -0.77024877, -0.83221292, 0.37576449, -0.77536738, + // -0.55040795, -0.71568310, -0.75874990, 0.91956782 + VLOAD_32(v4, 0xbea9a47f, 0xbefabdb8, 0x3f74e1bf, 0xbf6461c8, 0x3f386f92, + 0xbebbc360, 0x3f31abbc, 0x3ee62abc, 0xbf452f06, 0xbf550be8, + 0x3ec06434, 0xbf467e7a, 0xbf0ce789, 0xbf373702, 0xbf423d6f, + 0x3f6b68cc); + asm volatile("vfnmadd.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // 0.12401948, 0.24037606, 0.50219709, 0.79971153, + // -0.45819405, -0.52413607, -0.67972064, -0.10155818, + // -0.90951121, 0.36679661, 0.65883917, -0.27645081, + // -0.93917000, -0.10780402, 0.85832608, 0.40588558 + VCMP_U32(8, v4, 0x3dfdfded, 0x3e762524, 0x3f008ffd, 0x3f4cb9e5, 0xbeea9869, + 0xbf062dc8, 0xbf2e022c, 0xbdcffdbd, 0xbf68d5ba, 0x3ebbccc3, + 0x3f28a9af, 0xbe8d8af6, 0xbf706d72, 0xbddcc85b, 0x3f5bbb42, + 0x3ecfd03d); + + VSET(16, e64, m8); + double dscalar_64; + // 0.8978909040536565 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fecbb85b489299a); + // 0.4119623576675431, -0.1190899643735133, 0.9903323592718865, + // 0.5311038519754858, 0.1686986553141236, + // -0.8788301781199843, 0.1880579223718752, + // 0.7610824660598337, -0.0872931389118274, + // -0.6855627317033812, -0.0181686933036735, + // -0.9796673648941667, 0.2148282430178909, + // -0.1529278220414154, -0.7708574130314993, + // -0.4104905538508556 + VLOAD_64(v16, 0x3fda5d975d575ea8, 0xbfbe7cae0e441b80, 0x3fefb0cd7ce7c8e8, + 0x3fe0fecd81607572, 0x3fc597eae3ba06f8, 0xbfec1f6077386c08, + 0x3fc81248312ba2f8, 0x3fe85ac99da9270a, 0xbfb658d7d8ca9eb0, + 0xbfe5f0214100b7de, 0xbf929ad05e338a40, 0xbfef596f5fa5b9ea, + 0x3fcb7f7deb026e00, 0xbfc393238d287f60, 0xbfe8aadd2a5b2eba, + 0xbfda457a2c06ce78); + // 0.5050016609492949, 0.8257750946258060, + // -0.2631016891694440, 0.8041841986447893, + // -0.0322547653971421, -0.3994438840519345, + // -0.6154540433263920, -0.9209485498858390, + // 0.3334000822950238, -0.6004917796663505, 0.4588428764280068, + // 0.8937156106780619, 0.8421999503441004, 0.3083609158934253, + // -0.2219824502919918, 0.5118870280625194 + VLOAD_64(v8, 0x3fe028f93e467e2c, 0x3fea6cbfe4289cd0, 0xbfd0d6a877a053e0, + 0x3fe9bbe080247574, 0xbfa083b2550ab080, 0xbfd9907d14a5c710, + 0xbfe3b1ccad88e3e8, 0xbfed786917e1dd9e, 0x3fd5566d4c7c36a8, + 0xbfe3373a8965e1cc, 0x3fdd5dae8310b1e8, 0x3fec99517af92ea6, + 0x3feaf34d4f6d76aa, 0x3fd3bc2f6c481e9c, 0xbfcc69ebc2252060, + 0x3fe06160e798ce12); + asm volatile("vfnmadd.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // -0.8653987555659035, -0.6223659818850454, -0.7540957457254903, + // -1.2531735291223209, -0.1397373948516451, 1.2374872082900796, + // 0.3645526649939371, 0.0658288599840662, + // -0.2120637623916150, 1.2247388386247897, -0.3938221518308497, + // 0.1772082472555760, -0.9710319178262998, -0.1239466395049457, + // 0.9701734360082217, -0.0491281525495393 + VCMP_U64(9, v8, 0xbfebb158bb24f2ec, 0xbfe3ea6c104adab7, 0xbfe8218d66be32e5, + 0xbff40cffafbcb13e, 0xbfc1e2ea3a754147, 0x3ff3ccbf630d300f, + 0x3fd754d4b3746402, 0x3fb0da2900c3b814, 0xbfcb24e7c611c0f4, + 0x3ff39887c0a08d49, 0xbfd93461d3a37236, 0x3fc6aec28545a7b7, + 0xbfef12b1874df4ac, 0xbfbfbaf78b1f72ad, 0x3fef0ba929634dea, + 0xbfa927534106be44); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.2646 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x343c); + // 0.4216, -0.2148, 0.0047, 0.6802, -0.8965, -0.2986, + // -0.1786, -0.1904, 0.2805, 0.5322, -0.5298, 0.3208, + // 0.0567, -0.9897, -0.5400, -0.4187 + VLOAD_16(v16, 0x36bf, 0xb2e0, 0x1cc3, 0x3971, 0xbb2c, 0xb4c7, 0xb1b7, 0xb218, + 0x347d, 0x3842, 0xb83d, 0x3522, 0x2b41, 0xbbeb, 0xb852, 0xb6b3); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.7886, -0.5435, -0.8345, 0.7793, 0.5796, -0.8374, + // -0.8623, -0.3313, -0.2690, -0.9214, 0.2126, -0.6772, + // -0.6514, -0.5703, -0.2585, -0.3320 + VLOAD_16(v8, 0xba4f, 0xb859, 0xbaad, 0x3a3c, 0x38a3, 0xbab3, 0xbae6, 0xb54d, + 0xb44e, 0xbb5f, 0x32ce, 0xb96b, 0xb936, 0xb890, 0xb423, 0xb550); + asm volatile("vfnmadd.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_16)); + // -0.7886, 0.3586, -0.8345, -0.8867, 0.5796, 0.5200, + // -0.8623, 0.2781, -0.2690, -0.2883, 0.2126, -0.1416, + // -0.6514, 1.1406, -0.2585, 0.5068 + VCMP_U16(10, v8, 0xba4f, 0x35bd, 0xbaad, 0xbb17, 0x38a3, 0x3829, 0xbae6, + 0x3473, 0xb44e, 0xb49d, 0x32ce, 0xb088, 0xb936, 0x3c90, 0xb423, + 0x380d); + + VSET(16, e32, m4); + double dscalar_32; + // -0.13809182 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe0d67f1); + // -0.16977388, -0.30800357, -0.37010264, -0.92290556, + // 0.55768263, 0.47349435, 0.77556002, 0.16363664, + // 0.80314618, -0.48171839, -0.60694915, 0.16937894, + // 0.86316317, 0.00897404, -0.96310323, -0.27890080 + VLOAD_32(v8, 0xbe2dd934, 0xbe9db2a5, 0xbebd7e18, 0xbf6c438a, 0x3f0ec44a, + 0x3ef26dda, 0x3f468b1a, 0x3e27905d, 0x3f4d9afd, 0xbef6a3cb, + 0xbf1b6105, 0x3e2d71ac, 0x3f5cf843, 0x3c1307df, 0xbf768def, + 0xbe8ecc16); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.33133313, -0.48972869, 0.95656961, -0.89211702, + // 0.72045243, -0.36672497, 0.69402671, 0.44954479, + // -0.77024877, -0.83221292, 0.37576449, -0.77536738, + // -0.55040795, -0.71568310, -0.75874990, 0.91956782 + VLOAD_32(v4, 0xbea9a47f, 0xbefabdb8, 0x3f74e1bf, 0xbf6461c8, 0x3f386f92, + 0xbebbc360, 0x3f31abbc, 0x3ee62abc, 0xbf452f06, 0xbf550be8, + 0x3ec06434, 0xbf467e7a, 0xbf0ce789, 0xbf373702, 0xbf423d6f, + 0x3f6b68cc); + asm volatile("vfnmadd.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // -0.33133313, 0.24037606, 0.95656961, 0.79971153, + // 0.72045243, -0.52413607, 0.69402671, -0.10155818, + // -0.77024877, 0.36679661, 0.37576449, -0.27645081, + // -0.55040795, -0.10780402, -0.75874990, 0.40588558 + VCMP_U32(11, v4, 0xbea9a47f, 0x3e762524, 0x3f74e1bf, 0x3f4cb9e5, 0x3f386f92, + 0xbf062dc8, 0x3f31abbc, 0xbdcffdbd, 0xbf452f06, 0x3ebbccc3, + 0x3ec06434, 0xbe8d8af6, 0xbf0ce789, 0xbddcc85b, 0xbf423d6f, + 0x3ecfd03d); + + VSET(16, e64, m8); + double dscalar_64; + // 0.8978909040536565 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fecbb85b489299a); + // 0.4119623576675431, -0.1190899643735133, + // 0.9903323592718865, 0.5311038519754858, + // 0.1686986553141236, -0.8788301781199843, + // 0.1880579223718752, 0.7610824660598337, + // -0.0872931389118274, -0.6855627317033812, + // -0.0181686933036735, -0.9796673648941667, + // 0.2148282430178909, -0.1529278220414154, + // -0.7708574130314993, -0.4104905538508556 + VLOAD_64(v16, 0x3fda5d975d575ea8, 0xbfbe7cae0e441b80, 0x3fefb0cd7ce7c8e8, + 0x3fe0fecd81607572, 0x3fc597eae3ba06f8, 0xbfec1f6077386c08, + 0x3fc81248312ba2f8, 0x3fe85ac99da9270a, 0xbfb658d7d8ca9eb0, + 0xbfe5f0214100b7de, 0xbf929ad05e338a40, 0xbfef596f5fa5b9ea, + 0x3fcb7f7deb026e00, 0xbfc393238d287f60, 0xbfe8aadd2a5b2eba, + 0xbfda457a2c06ce78); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.5050016609492949, 0.8257750946258060, + // -0.2631016891694440, 0.8041841986447893, + // -0.0322547653971421, -0.3994438840519345, + // -0.6154540433263920, -0.9209485498858390, + // 0.3334000822950238, -0.6004917796663505, + // 0.4588428764280068, 0.8937156106780619, + // 0.8421999503441004, 0.3083609158934253, + // -0.2219824502919918, 0.5118870280625194 + VLOAD_64(v8, 0x3fe028f93e467e2c, 0x3fea6cbfe4289cd0, 0xbfd0d6a877a053e0, + 0x3fe9bbe080247574, 0xbfa083b2550ab080, 0xbfd9907d14a5c710, + 0xbfe3b1ccad88e3e8, 0xbfed786917e1dd9e, 0x3fd5566d4c7c36a8, + 0xbfe3373a8965e1cc, 0x3fdd5dae8310b1e8, 0x3fec99517af92ea6, + 0x3feaf34d4f6d76aa, 0x3fd3bc2f6c481e9c, 0xbfcc69ebc2252060, + 0x3fe06160e798ce12); + asm volatile("vfnmadd.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + // 0.5050016609492949, -0.6223659818850454, + // -0.2631016891694440, -1.2531735291223209, + // -0.0322547653971421, 1.2374872082900796, + // -0.6154540433263920, 0.0658288599840662, + // 0.3334000822950238, 1.2247388386247897, 0.4588428764280068, + // 0.1772082472555760, 0.8421999503441004, + // -0.1239466395049457, -0.2219824502919918, + // -0.0491281525495393 + VCMP_U64(12, v8, 0x3fe028f93e467e2c, 0xbfe3ea6c104adab7, 0xbfd0d6a877a053e0, + 0xbff40cffafbcb13e, 0xbfa083b2550ab080, 0x3ff3ccbf630d300f, + 0xbfe3b1ccad88e3e8, 0x3fb0da2900c3b814, 0x3fd5566d4c7c36a8, + 0x3ff39887c0a08d49, 0x3fdd5dae8310b1e8, 0x3fc6aec28545a7b7, + 0x3feaf34d4f6d76aa, 0xbfbfbaf78b1f72ad, 0xbfcc69ebc2252060, + 0xbfa927534106be44); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsac.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsac.c new file mode 100644 index 000000000..27e606e82 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsac.c @@ -0,0 +1,455 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.3474, -0.9888, 0.2810, 0.4199, 0.1704, -0.3772, 0.2998, + // 0.7871, -0.2527, -0.8618, 0.2646, 0.5488, -0.3184, -0.3508, + // -0.3589, -0.3914 + VLOAD_16(v4, 0x358f, 0xbbe9, 0x347f, 0x36b8, 0x3174, 0xb609, 0x34cc, 0x3a4c, + 0xb40b, 0xbae5, 0x343c, 0x3864, 0xb518, 0xb59d, 0xb5be, 0xb643); + // 0.0417, 0.3862, -0.9619, -0.5659, 0.1731, 0.4827, 0.7334, + // -0.7271, -0.9814, 0.8003, -0.4836, 0.5234, -0.8540, + // -0.2036, -0.8823, 0.2603 + VLOAD_16(v6, 0x2958, 0x362e, 0xbbb2, 0xb887, 0x318a, 0x37b9, 0x39de, 0xb9d1, + 0xbbda, 0x3a67, 0xb7bd, 0x3830, 0xbad5, 0xb284, 0xbb0f, 0x342a); + // -0.2739, 0.2146, 0.5264, -0.8853, 0.8877, -0.6748, -0.7563, + // -0.9634, 0.7451, -0.5166, 0.0698, 0.4790, -0.0681, -0.9746, + // 0.2129, 0.9072 + VLOAD_16(v2, 0xb462, 0x32de, 0x3836, 0xbb15, 0x3b1a, 0xb966, 0xba0d, 0xbbb5, + 0x39f6, 0xb822, 0x2c77, 0x37aa, 0xac5b, 0xbbcc, 0x32d0, 0x3b42); + asm volatile("vfnmsac.vv v2, v4, v6"); + // -0.2883, 0.5967, 0.7969, -0.6475, 0.8584, -0.4927, -0.9761, + // -0.3911, 0.4971, 0.1733, 0.1978, 0.1917, -0.3401, -1.0459, + // -0.1038, 1.0088 + VCMP_U16(1, v2, 0xb49d, 0x38c6, 0x3a60, 0xb92e, 0x3ade, 0xb7e2, 0xbbcf, + 0xb642, 0x37f4, 0x318a, 0x3254, 0x3223, 0xb570, 0xbc2f, 0xaea4, + 0x3c09); + + VSET(16, e32, m4); + // 0.11577118, -0.10074481, 0.13861528, 0.44782066, + // 0.42196107, -0.67597556, 0.34948668, -0.87903690, + // -0.34136006, -0.19722189, 0.76997000, -0.68663412, + // 0.45603558, 0.60629857, -0.86984915, -0.08019307 + VLOAD_32(v8, 0x3ded1971, 0xbdce534c, 0x3e0df12a, 0x3ee548c0, 0x3ed80b48, + 0xbf2d0cbc, 0x3eb2efeb, 0xbf610890, 0xbeaec6bf, 0xbe49f489, + 0x3f451cc1, 0xbf2fc741, 0x3ee97d7f, 0x3f1b3662, 0xbf5eae6f, + 0xbda43c43); + // -0.38970658, 0.40460527, 0.69067985, -0.98108912, + // 0.47494572, -0.34277225, -0.54462087, -0.90492284, 0.60100728, + // -0.02819708, -0.46859986, 0.87238866, 0.46812481, + // 0.49922746, 0.97036403, 0.04279163 + VLOAD_32(v12, 0xbec7879f, 0x3ecf286c, 0x3f30d065, 0xbf7b28a8, 0x3ef32c16, + 0xbeaf7fd8, 0xbf0b6c46, 0xbf67a906, 0x3f19db9d, 0xbce6fd92, + 0xbeefec52, 0x3f5f54dd, 0x3eefae0e, 0x3eff9abe, 0x3f7869c7, + 0x3d2f4647); + // 0.79804420, -0.70010293, -0.51047552, 0.38566175, + // 0.15318950, 0.15531392, -0.20705318, -0.82493448, + // 0.12047531, 0.57526720, 0.23939800, -0.19725421, + // 0.15403098, 0.03931713, -0.45930895, -0.15395784 + VLOAD_32(v4, 0x3f4c4ca0, 0xbf3339f2, 0xbf02ae86, 0x3ec57575, 0x3e1cddb5, + 0x3e1f0a9d, 0xbe5405c0, 0xbf532ee8, 0x3df6bbc3, 0x3f1344b6, + 0x3e7524c0, 0xbe49fd02, 0x3e1dba4c, 0x3d210b00, 0xbeeb2a8b, + 0xbe1da720); + asm volatile("vfnmsac.vv v4, v8, v12"); + // 0.84316099, -0.65934104, -0.60621428, 0.82501376, + // -0.04721911, -0.07639174, -0.01671545, -1.62039506, + // 0.32563519, 0.56970614, 0.60020584, 0.40175763, + // -0.05945060, -0.26336378, 0.38476136, -0.15052626 + VCMP_U32(2, v4, 0x3f57d966, 0xbf28ca93, 0xbf1b30dc, 0x3f53341a, 0xbd4168d3, + 0xbd9c7345, 0xbc88eed4, 0xbfcf691b, 0x3ea6b9a8, 0x3f11d843, + 0x3f19a717, 0x3ecdb32c, 0xbd738277, 0xbe86d79e, 0x3ec4ff71, + 0xbe1a238e); + + VSET(16, e64, m8); + // -0.1779684802061718, 0.1122733699429854, -0.0166033088608786, + // -0.0418350503858864, 0.0809510021720363, -0.9993917101510512, + // -0.2139048161619248, 0.7196716914796224, 0.6489783595942558, + // 0.5950689618839839, -0.7376256302221853, -0.5442228345597713, + // -0.8234113806545975, -0.6424001059348645, -0.3817524674245201, + // -0.8801262923106541 + VLOAD_64(v16, 0xbfc6c7abd11a2788, 0x3fbcbdf2941de8b0, 0xbf91007532405e80, + 0xbfa56b675a77c100, 0x3fb4b93472e84630, 0xbfeffb0452dfc0ba, + 0xbfcb613ba6efa978, 0x3fe7078ced586224, 0x3fe4c46e43c89c1c, + 0x3fe30ace10450114, 0xbfe79aa110cfdc92, 0xbfe16a46018575da, + 0xbfea5962d2e21a3e, 0xbfe48e8aaabdfd5e, 0xbfd86ea1e6b05c10, + 0xbfec29fe9d3a5e2c); + // 0.6809772463364707, -0.3512739833826983, -0.3746023351803702, + // -0.7912172181005324, 0.8292434726428350, 0.4103374079106952, + // -0.0850673796598582, -0.5834949864830523, + // -0.9215678788036654, 0.4412210589054084, 0.3537359089001260, + // -0.4889461402031243, 0.2341577339668230, 0.0593866008892341, + // -0.4825773777931026, 0.8989772522533539 + VLOAD_64(v24, 0x3fe5ca90cb4aba98, 0xbfd67b45dfa41e18, 0xbfd7f97c12a6b704, + 0xbfe951a6c578c3ac, 0x3fea89299b6d84b0, 0x3fda42f7d4d35178, + 0xbfb5c6f9cd987320, 0xbfe2abfdad8a63b6, 0xbfed7d7beb902fcc, + 0x3fdc3cf7409388b4, 0x3fd6a39bf009666c, 0xbfdf4ae4c06b61e4, + 0x3fcdf8e170bf19c0, 0x3fae67eba9479c60, 0xbfdee28c39da4ac0, + 0x3fecc46bf148d5ca); + // 0.2213384305747967, -0.6962211546566610, -0.0896076892809434, + // -0.1334142611967066, 0.1988436916560323, + // -0.3391391007320459, -0.6137202819751713, 0.6759552396290200, + // -0.7798663937316326, -0.1025181838739857, 0.5296250728149803, + // 0.8832422045338422, 0.8373555508937671, -0.8622529212135799, + // 0.4241832213372883, 0.7769982087360683 + VLOAD_64(v8, 0x3fcc54d154555708, 0xbfe6477196411436, 0xbfb6f0878eee8940, + 0xbfc113b7f0547630, 0x3fc973b5c856db48, 0xbfd5b4747c9c185c, + 0xbfe3a398b77f3552, 0x3fe5a16ce1f8870a, 0xbfe8f4aa5e0a7552, + 0xbfba3ea1b6fbece0, 0x3fe0f2b047dc3902, 0x3fec438527dd6ef6, + 0x3feacb9dde46cf34, 0xbfeb9793702fc4f0, 0x3fdb25d161c9f510, + 0x3fe8dd2b58f24dc8); + asm volatile("vfnmsac.vv v8, v16, v24"); + // 0.3425309161602823, -0.6567824407689892, -0.0958273275519495, + // -0.1665148733821233, 0.1317156015009752, 0.0709487030987733, + // -0.6319166041826899, 1.0958800635211576, + // -0.1817887834908719, -0.3650751413581792, 0.7905497455496533, + // 0.6171465501654385, 1.0301636939103409, -0.8241029625112244, + // 0.2399581166415165, 1.5682117246334322 + VCMP_U64(3, v8, 0x3fd5ec06cab1bfc9, 0xbfe5045c9bf61361, 0xbfb88823c5e61162, + 0xbfc5505bffbca57e, 0x3fc0dc0e8c68ea87, 0x3fb229b1b780ba72, + 0xbfe438a92b9872ef, 0x3ff188b9889296a1, 0xbfc744dad7efbbd3, + 0xbfd75d64202dd23c, 0x3fe94c2efadfe675, 0x3fe3bfaa1f3997d0, + 0x3ff07b8cecedf189, 0xbfea5f0d2d10b7b0, 0x3fceb6f293b149df, + 0x3ff917652d6811d0); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.3474, -0.9888, 0.2810, 0.4199, 0.1704, -0.3772, 0.2998, + // 0.7871, -0.2527, -0.8618, 0.2646, 0.5488, -0.3184, -0.3508, + // -0.3589, -0.3914 + VLOAD_16(v4, 0x358f, 0xbbe9, 0x347f, 0x36b8, 0x3174, 0xb609, 0x34cc, 0x3a4c, + 0xb40b, 0xbae5, 0x343c, 0x3864, 0xb518, 0xb59d, 0xb5be, 0xb643); + // 0.0417, 0.3862, -0.9619, -0.5659, 0.1731, 0.4827, 0.7334, + // -0.7271, -0.9814, 0.8003, -0.4836, 0.5234, -0.8540, + // -0.2036, -0.8823, 0.2603 + VLOAD_16(v6, 0x2958, 0x362e, 0xbbb2, 0xb887, 0x318a, 0x37b9, 0x39de, 0xb9d1, + 0xbbda, 0x3a67, 0xb7bd, 0x3830, 0xbad5, 0xb284, 0xbb0f, 0x342a); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.2739, 0.2146, 0.5264, -0.8853, 0.8877, -0.6748, -0.7563, + // -0.9634, 0.7451, -0.5166, 0.0698, 0.4790, -0.0681, -0.9746, + // 0.2129, 0.9072 + VLOAD_16(v2, 0xb462, 0x32de, 0x3836, 0xbb15, 0x3b1a, 0xb966, 0xba0d, 0xbbb5, + 0x39f6, 0xb822, 0x2c77, 0x37aa, 0xac5b, 0xbbcc, 0x32d0, 0x3b42); + asm volatile("vfnmsac.vv v2, v4, v6, v0.t"); + // -0.2739, 0.5967, 0.5264, -0.6475, 0.8877, -0.4927, -0.7563, + // -0.3911, 0.7451, 0.1733, 0.0698, 0.1917, -0.0681, -1.0459, + // 0.2129, 1.0088 + VCMP_U16(4, v2, 0xb462, 0x38c6, 0x3836, 0xb92e, 0x3b1a, 0xb7e2, 0xba0d, + 0xb642, 0x39f6, 0x318a, 0x2c77, 0x3223, 0xac5b, 0xbc2f, 0x32d0, + 0x3c09); + + VSET(16, e32, m4); + // 0.11577118, -0.10074481, 0.13861528, 0.44782066, + // 0.42196107, -0.67597556, 0.34948668, -0.87903690, + // -0.34136006, -0.19722189, 0.76997000, -0.68663412, + // 0.45603558, 0.60629857, -0.86984915, -0.08019307 + VLOAD_32(v8, 0x3ded1971, 0xbdce534c, 0x3e0df12a, 0x3ee548c0, 0x3ed80b48, + 0xbf2d0cbc, 0x3eb2efeb, 0xbf610890, 0xbeaec6bf, 0xbe49f489, + 0x3f451cc1, 0xbf2fc741, 0x3ee97d7f, 0x3f1b3662, 0xbf5eae6f, + 0xbda43c43); + // -0.38970658, 0.40460527, 0.69067985, -0.98108912, + // 0.47494572, -0.34277225, -0.54462087, -0.90492284, 0.60100728, + // -0.02819708, -0.46859986, 0.87238866, 0.46812481, + // 0.49922746, 0.97036403, 0.04279163 + VLOAD_32(v12, 0xbec7879f, 0x3ecf286c, 0x3f30d065, 0xbf7b28a8, 0x3ef32c16, + 0xbeaf7fd8, 0xbf0b6c46, 0xbf67a906, 0x3f19db9d, 0xbce6fd92, + 0xbeefec52, 0x3f5f54dd, 0x3eefae0e, 0x3eff9abe, 0x3f7869c7, + 0x3d2f4647); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.79804420, -0.70010293, -0.51047552, 0.38566175, + // 0.15318950, 0.15531392, -0.20705318, -0.82493448, + // 0.12047531, 0.57526720, 0.23939800, -0.19725421, + // 0.15403098, 0.03931713, -0.45930895, -0.15395784 + VLOAD_32(v4, 0x3f4c4ca0, 0xbf3339f2, 0xbf02ae86, 0x3ec57575, 0x3e1cddb5, + 0x3e1f0a9d, 0xbe5405c0, 0xbf532ee8, 0x3df6bbc3, 0x3f1344b6, + 0x3e7524c0, 0xbe49fd02, 0x3e1dba4c, 0x3d210b00, 0xbeeb2a8b, + 0xbe1da720); + asm volatile("vfnmsac.vv v4, v8, v12, v0.t"); + // 0.79804420, -0.65934104, -0.51047552, 0.82501376, + // 0.15318950, -0.07639174, -0.20705318, -1.62039506, + // 0.12047531, 0.56970614, 0.23939800, 0.40175763, + // 0.15403098, -0.26336378, -0.45930895, -0.15052626 + VCMP_U32(5, v4, 0x3f4c4ca0, 0xbf28ca93, 0xbf02ae86, 0x3f53341a, 0x3e1cddb5, + 0xbd9c7345, 0xbe5405c0, 0xbfcf691b, 0x3df6bbc3, 0x3f11d843, + 0x3e7524c0, 0x3ecdb32c, 0x3e1dba4c, 0xbe86d79e, 0xbeeb2a8b, + 0xbe1a238e); + + VSET(16, e64, m8); + // -0.1779684802061718, 0.1122733699429854, -0.0166033088608786, + // -0.0418350503858864, 0.0809510021720363, -0.9993917101510512, + // -0.2139048161619248, 0.7196716914796224, 0.6489783595942558, + // 0.5950689618839839, -0.7376256302221853, -0.5442228345597713, + // -0.8234113806545975, -0.6424001059348645, -0.3817524674245201, + // -0.8801262923106541 + VLOAD_64(v16, 0xbfc6c7abd11a2788, 0x3fbcbdf2941de8b0, 0xbf91007532405e80, + 0xbfa56b675a77c100, 0x3fb4b93472e84630, 0xbfeffb0452dfc0ba, + 0xbfcb613ba6efa978, 0x3fe7078ced586224, 0x3fe4c46e43c89c1c, + 0x3fe30ace10450114, 0xbfe79aa110cfdc92, 0xbfe16a46018575da, + 0xbfea5962d2e21a3e, 0xbfe48e8aaabdfd5e, 0xbfd86ea1e6b05c10, + 0xbfec29fe9d3a5e2c); + // 0.6809772463364707, -0.3512739833826983, -0.3746023351803702, + // -0.7912172181005324, 0.8292434726428350, 0.4103374079106952, + // -0.0850673796598582, -0.5834949864830523, + // -0.9215678788036654, 0.4412210589054084, 0.3537359089001260, + // -0.4889461402031243, 0.2341577339668230, 0.0593866008892341, + // -0.4825773777931026, 0.8989772522533539 + VLOAD_64(v24, 0x3fe5ca90cb4aba98, 0xbfd67b45dfa41e18, 0xbfd7f97c12a6b704, + 0xbfe951a6c578c3ac, 0x3fea89299b6d84b0, 0x3fda42f7d4d35178, + 0xbfb5c6f9cd987320, 0xbfe2abfdad8a63b6, 0xbfed7d7beb902fcc, + 0x3fdc3cf7409388b4, 0x3fd6a39bf009666c, 0xbfdf4ae4c06b61e4, + 0x3fcdf8e170bf19c0, 0x3fae67eba9479c60, 0xbfdee28c39da4ac0, + 0x3fecc46bf148d5ca); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.2213384305747967, -0.6962211546566610, -0.0896076892809434, + // -0.1334142611967066, 0.1988436916560323, + // -0.3391391007320459, -0.6137202819751713, 0.6759552396290200, + // -0.7798663937316326, -0.1025181838739857, 0.5296250728149803, + // 0.8832422045338422, 0.8373555508937671, -0.8622529212135799, + // 0.4241832213372883, 0.7769982087360683 + VLOAD_64(v8, 0x3fcc54d154555708, 0xbfe6477196411436, 0xbfb6f0878eee8940, + 0xbfc113b7f0547630, 0x3fc973b5c856db48, 0xbfd5b4747c9c185c, + 0xbfe3a398b77f3552, 0x3fe5a16ce1f8870a, 0xbfe8f4aa5e0a7552, + 0xbfba3ea1b6fbece0, 0x3fe0f2b047dc3902, 0x3fec438527dd6ef6, + 0x3feacb9dde46cf34, 0xbfeb9793702fc4f0, 0x3fdb25d161c9f510, + 0x3fe8dd2b58f24dc8); + asm volatile("vfnmsac.vv v8, v16, v24, v0.t"); + // 0.2213384305747967, -0.6567824407689892, -0.0896076892809434, + // -0.1665148733821233, 0.1988436916560323, 0.0709487030987733, + // -0.6137202819751713, 1.0958800635211576, + // -0.7798663937316326, -0.3650751413581792, 0.5296250728149803, + // 0.6171465501654385, 0.8373555508937671, -0.8241029625112244, + // 0.4241832213372883, 1.5682117246334322 + VCMP_U64(6, v8, 0x3fcc54d154555708, 0xbfe5045c9bf61361, 0xbfb6f0878eee8940, + 0xbfc5505bffbca57e, 0x3fc973b5c856db48, 0x3fb229b1b780ba72, + 0xbfe3a398b77f3552, 0x3ff188b9889296a1, 0xbfe8f4aa5e0a7552, + 0xbfd75d64202dd23c, 0x3fe0f2b047dc3902, 0x3fe3bfaa1f3997d0, + 0x3feacb9dde46cf34, 0xbfea5f0d2d10b7b0, 0x3fdb25d161c9f510, + 0x3ff917652d6811d0); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.4771 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb7a2); + // -0.6172, 0.8584, -0.1088, -0.6719, 0.3579, 0.5889, 0.1724, + // -0.5239, -0.5732, -0.6167, 0.8271, -0.7334, 0.3489, -0.7607, + // -0.7788, -0.5264 + VLOAD_16(v4, 0xb8f0, 0x3ade, 0xaef7, 0xb960, 0x35ba, 0x38b6, 0x3184, 0xb831, + 0xb896, 0xb8ef, 0x3a9e, 0xb9de, 0x3595, 0xba16, 0xba3b, 0xb836); + // 0.0186, 0.9351, 0.6201, 0.9463, 0.2512, 0.5786, 0.9424, + // -0.5132, -0.7646, 0.0194, -0.2507, -0.2905, 0.3452, + // -0.7803, -0.7666, -0.1387 + VLOAD_16(v2, 0x24c1, 0x3b7b, 0x38f6, 0x3b92, 0x3405, 0x38a1, 0x3b8a, 0xb81b, + 0xba1e, 0x24f6, 0xb403, 0xb4a6, 0x3586, 0xba3e, 0xba22, 0xb070); + asm volatile("vfnmsac.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.2759, 1.3447, 0.5684, 0.6260, 0.4219, 0.8594, 1.0244, + // -0.7632, -1.0381, -0.2749, 0.1438, -0.6406, 0.5117, -1.1426, + // -1.1387, -0.3899 + VCMP_U16(7, v2, 0xb46a, 0x3d61, 0x388c, 0x3902, 0x36c0, 0x3ae0, 0x3c19, + 0xba1b, 0xbc27, 0xb466, 0x309a, 0xb920, 0x3818, 0xbc93, 0xbc8d, + 0xb63d); + + VSET(16, e32, m4); + double dscalar_32; + // -0.73549986 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf3c49b8); + // 0.74252719, 0.73023552, 0.75118375, 0.04020444, + // -0.77184784, -0.41120139, -0.57577437, -0.15976480, + // -0.05041125, 0.42673740, 0.88473374, -0.49891368, + // -0.84324479, -0.26009968, -0.01877740, -0.13754985 + VLOAD_32(v8, 0x3f3e1643, 0x3f3af0b7, 0x3f404d94, 0x3d24ad6a, 0xbf4597d2, + 0xbed288fd, 0xbf1365f3, 0xbe239962, 0xbd4e7c07, 0x3eda7d53, + 0x3f627de9, 0xbeff719d, 0xbf57dee4, 0xbe852bc9, 0xbc99d30f, + 0xbe0cd9de); + // 0.89538908, 0.68592542, 0.67501348, 0.08327232, + // 0.28473541, -0.93230879, -0.77235961, -0.92498165, + // -0.55227244, 0.97729182, 0.28253901, 0.45306230, + // -0.50359881, 0.40307203, -0.65891176, -0.59297264 + VLOAD_32(v4, 0x3f653838, 0x3f2f98cf, 0x3f2ccdaf, 0x3daa8aad, 0x3e91c8d7, + 0xbf6eabca, 0xbf45b95c, 0xbf6ccb99, 0xbf0d61ba, 0x3f7a2fcc, + 0x3e90a8f4, 0x3ee7f7c8, 0xbf00ebda, 0x3ece5f75, 0xbf28ae71, + 0xbf17cd0e); + asm volatile("vfnmsac.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // 1.44151771, 1.22301352, 1.22750902, 0.11284268, + // -0.28295860, -1.23474741, -1.19584155, -1.04248869, + // -0.58934993, 1.29115713, 0.93326056, 0.08611137, + // -1.12380528, 0.21176875, -0.67272252, -0.69414055 + VCMP_U32(8, v4, 0x3fb883a7, 0x3f9c8bb5, 0x3f9d1f04, 0x3de71a10, 0xbe90dff2, + 0xbf9e0c34, 0xbf991156, 0xbf857045, 0xbf16dfa3, 0x3fa544a3, + 0x3f6eea2a, 0x3db05b27, 0xbf8fd8da, 0x3e58d9e8, 0xbf2c378b, + 0xbf31b332); + + VSET(16, e64, m8); + double dscalar_64; + // 0.5178244899339752 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe09204aa2ab2a0); + // 0.8646249694399413, 0.0049558737185129, 0.9278624830778543, + // -0.8820434014846885, 0.9252937592855630, + // -0.0640564429957495, -0.9483662154664578, + // 0.6036726974274773, 0.6321354499926264, 0.8250130840364809, + // 0.1494192541908572, 0.0196333207724690, + // -0.2272946521816408, 0.9289723385226867, + // -0.5162193242581365, -0.3917544955022987 + VLOAD_64(v16, 0x3febab01fbe195e4, 0x3f744c9c38b4b600, 0x3fedb10ca98026d0, + 0xbfec39b31560f050, 0x3fed9c01a86a6166, 0xbfb06600c7c3cc10, + 0xbfee59041b01e946, 0x3fe35149679e0d42, 0x3fe43a741f8b89d2, + 0x3fea6681d6d6ae5e, 0x3fc3202b8d123b90, 0x3f941ac1da84be40, + 0xbfcd17fdbcd68200, 0x3fedba24329afe9c, 0xbfe084de63680fb2, + 0xbfd91281728f9314); + // 0.3888129269587612, -0.8225750100551035, 0.8430062678626642, + // -0.6316792665412014, -0.6696614660277347, + // 0.6130084754374303, -0.2295750183537659, 0.1572393304616742, + // -0.7542147373874082, 0.6149250820738357, + // 0.1236692515687874, 0.9290168852760794, + // -0.0433411597165929, -0.8422695068160440, + // 0.6519328829008422, -0.3347506024828231 + VLOAD_64(v8, 0x3fd8e24f9d6331d4, 0xbfea5288d3d6317c, 0x3feaf9e847d9618e, + 0xbfe436b76feb621e, 0xbfe56ddde1ff608c, 0x3fe39dc3f345962e, + 0xbfcd62b6d5e76d58, 0x3fc4206b1afd2970, 0xbfe82286f1e7af48, + 0x3fe3ad775d9fe964, 0x3fbfa8c9bf023cb0, 0x3fedba819e762954, + 0xbfa630cfff1b61a0, 0xbfeaf3df2e462cb6, 0x3fe4dca25967e02c, + 0xbfd56c8dca7eb8ac); + asm volatile("vfnmsac.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // -0.0589110568256553, -0.8251412828355696, 0.3625363508340025, + // -0.1749355920677641, -1.1488012349688719, 0.6461784703586890, + // 0.2615122334407671, -0.1553571761707763, -1.0815499543490239, + // 0.1877131026437891, 0.0462963024810918, 0.9188502709613655, + // 0.0743575776140855, -1.3233141341743264, 0.9192438911788732, + // -0.1318905306700034 + VCMP_U64(9, v8, 0xbfae29970ce0c2e6, 0xbfea678eb10b76d9, 0x3fd733cbaa9c5dc5, + 0xbfc6644a1b6b315b, 0xbff2617d675cbb41, 0x3fe4ad7e78b23c6a, + 0x3fd0bc9dce8872e3, 0xbfc3e2be736d1b43, 0xbff14e07532eb5a9, + 0x3fc806fba27160c3, 0x3fa7b4262229fd93, 0x3fed6738aef664ca, + 0x3fb30919240ff186, 0xbff52c4b7109d007, 0x3fed6a722a352743, + 0xbfc0e1c9f5f09ba4); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.4771 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb7a2); + // -0.6172, 0.8584, -0.1088, -0.6719, 0.3579, 0.5889, 0.1724, + // -0.5239, -0.5732, -0.6167, 0.8271, -0.7334, 0.3489, + // -0.7607, -0.7788, -0.5264 + VLOAD_16(v4, 0xb8f0, 0x3ade, 0xaef7, 0xb960, 0x35ba, 0x38b6, 0x3184, 0xb831, + 0xb896, 0xb8ef, 0x3a9e, 0xb9de, 0x3595, 0xba16, 0xba3b, 0xb836); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.0186, 0.9351, 0.6201, 0.9463, 0.2512, 0.5786, 0.9424, + // -0.5132, -0.7646, 0.0194, -0.2507, -0.2905, 0.3452, + // -0.7803, -0.7666, -0.1387 + VLOAD_16(v2, 0x24c1, 0x3b7b, 0x38f6, 0x3b92, 0x3405, 0x38a1, 0x3b8a, 0xb81b, + 0xba1e, 0x24f6, 0xb403, 0xb4a6, 0x3586, 0xba3e, 0xba22, 0xb070); + asm volatile("vfnmsac.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // 0.0186, 1.3447, 0.6201, 0.6260, 0.2512, 0.8594, 0.9424, + // -0.7632, -0.7646, -0.2749, -0.2507, -0.6406, 0.3452, + // -1.1426, -0.7666, -0.3899 + VCMP_U16(10, v2, 0x24c1, 0x3d61, 0x38f6, 0x3902, 0x3405, 0x3ae0, 0x3b8a, + 0xba1b, 0xba1e, 0xb466, 0xb403, 0xb920, 0x3586, 0xbc93, 0xba22, + 0xb63d); + + VSET(16, e32, m4); + double dscalar_32; + // -0.73549986 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf3c49b8); + // 0.74252719, 0.73023552, 0.75118375, 0.04020444, + // -0.77184784, -0.41120139, -0.57577437, -0.15976480, + // -0.05041125, 0.42673740, 0.88473374, -0.49891368, + // -0.84324479, -0.26009968, -0.01877740, -0.13754985 + VLOAD_32(v8, 0x3f3e1643, 0x3f3af0b7, 0x3f404d94, 0x3d24ad6a, 0xbf4597d2, + 0xbed288fd, 0xbf1365f3, 0xbe239962, 0xbd4e7c07, 0x3eda7d53, + 0x3f627de9, 0xbeff719d, 0xbf57dee4, 0xbe852bc9, 0xbc99d30f, + 0xbe0cd9de); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.89538908, 0.68592542, 0.67501348, 0.08327232, + // 0.28473541, -0.93230879, -0.77235961, -0.92498165, + // -0.55227244, 0.97729182, 0.28253901, 0.45306230, + // -0.50359881, 0.40307203, -0.65891176, -0.59297264 + VLOAD_32(v4, 0x3f653838, 0x3f2f98cf, 0x3f2ccdaf, 0x3daa8aad, 0x3e91c8d7, + 0xbf6eabca, 0xbf45b95c, 0xbf6ccb99, 0xbf0d61ba, 0x3f7a2fcc, + 0x3e90a8f4, 0x3ee7f7c8, 0xbf00ebda, 0x3ece5f75, 0xbf28ae71, + 0xbf17cd0e); + asm volatile("vfnmsac.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // 0.89538908, 1.22301352, 0.67501348, 0.11284268, + // 0.28473541, -1.23474741, -0.77235961, -1.04248869, + // -0.55227244, 1.29115713, 0.28253901, 0.08611137, + // -0.50359881, 0.21176875, -0.65891176, -0.69414055 + VCMP_U32(11, v4, 0x3f653838, 0x3f9c8bb5, 0x3f2ccdaf, 0x3de71a10, 0x3e91c8d7, + 0xbf9e0c34, 0xbf45b95c, 0xbf857045, 0xbf0d61ba, 0x3fa544a3, + 0x3e90a8f4, 0x3db05b27, 0xbf00ebda, 0x3e58d9e8, 0xbf28ae71, + 0xbf31b332); + + VSET(16, e64, m8); + double dscalar_64; + // 0.5178244899339752 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe09204aa2ab2a0); + // 0.8646249694399413, 0.0049558737185129, + // 0.9278624830778543, -0.8820434014846885, + // 0.9252937592855630, -0.0640564429957495, + // -0.9483662154664578, 0.6036726974274773, + // 0.6321354499926264, 0.8250130840364809, + // 0.1494192541908572, 0.0196333207724690, + // -0.2272946521816408, 0.9289723385226867, + // -0.5162193242581365, -0.3917544955022987 + VLOAD_64(v16, 0x3febab01fbe195e4, 0x3f744c9c38b4b600, 0x3fedb10ca98026d0, + 0xbfec39b31560f050, 0x3fed9c01a86a6166, 0xbfb06600c7c3cc10, + 0xbfee59041b01e946, 0x3fe35149679e0d42, 0x3fe43a741f8b89d2, + 0x3fea6681d6d6ae5e, 0x3fc3202b8d123b90, 0x3f941ac1da84be40, + 0xbfcd17fdbcd68200, 0x3fedba24329afe9c, 0xbfe084de63680fb2, + 0xbfd91281728f9314); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.3888129269587612, -0.8225750100551035, + // 0.8430062678626642, -0.6316792665412014, + // -0.6696614660277347, 0.6130084754374303, + // -0.2295750183537659, 0.1572393304616742, + // -0.7542147373874082, 0.6149250820738357, + // 0.1236692515687874, 0.9290168852760794, + // -0.0433411597165929, -0.8422695068160440, + // 0.6519328829008422, -0.3347506024828231 + VLOAD_64(v8, 0x3fd8e24f9d6331d4, 0xbfea5288d3d6317c, 0x3feaf9e847d9618e, + 0xbfe436b76feb621e, 0xbfe56ddde1ff608c, 0x3fe39dc3f345962e, + 0xbfcd62b6d5e76d58, 0x3fc4206b1afd2970, 0xbfe82286f1e7af48, + 0x3fe3ad775d9fe964, 0x3fbfa8c9bf023cb0, 0x3fedba819e762954, + 0xbfa630cfff1b61a0, 0xbfeaf3df2e462cb6, 0x3fe4dca25967e02c, + 0xbfd56c8dca7eb8ac); + asm volatile("vfnmsac.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + // 0.3888129269587612, -0.8251412828355696, 0.8430062678626642, + // -0.1749355920677641, -0.6696614660277347, + // 0.6461784703586890, -0.2295750183537659, + // -0.1553571761707763, -0.7542147373874082, + // 0.1877131026437891, 0.1236692515687874, 0.9188502709613655, + // -0.0433411597165929, -1.3233141341743264, + // 0.6519328829008422, -0.1318905306700034 + VCMP_U64(12, v8, 0x3fd8e24f9d6331d4, 0xbfea678eb10b76d9, 0x3feaf9e847d9618e, + 0xbfc6644a1b6b315b, 0xbfe56ddde1ff608c, 0x3fe4ad7e78b23c6a, + 0xbfcd62b6d5e76d58, 0xbfc3e2be736d1b43, 0xbfe82286f1e7af48, + 0x3fc806fba27160c3, 0x3fbfa8c9bf023cb0, 0x3fed6738aef664ca, + 0xbfa630cfff1b61a0, 0xbff52c4b7109d007, 0x3fe4dca25967e02c, + 0xbfc0e1c9f5f09ba4); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsub.c new file mode 100644 index 000000000..dcbb1f51d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsub.c @@ -0,0 +1,454 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.0091, -0.3794, -0.0005, -0.0464, 0.4834, 0.2932, -0.3042, + // -0.3096, -0.9844, -0.1815, -0.8760, 0.0853, -0.3723, -0.8877, + // 0.1584, 0.1943 + VLOAD_16(v4, 0xa0ac, 0xb612, 0x8f83, 0xa9f0, 0x37bc, 0x34b1, 0xb4de, 0xb4f4, + 0xbbe0, 0xb1cf, 0xbb02, 0x2d75, 0xb5f5, 0xbb1a, 0x3112, 0x3238); + // -0.3301, 0.7769, 0.6572, -0.8193, 0.4529, 0.2349, 0.5264, + // -0.2456, 0.0873, 0.5381, 0.4670, 0.8564, -0.1790, 0.6641, + // 0.0182, 0.0447 + VLOAD_16(v6, 0xb548, 0x3a37, 0x3942, 0xba8e, 0x373f, 0x3384, 0x3836, 0xb3dc, + 0x2d97, 0x384e, 0x3779, 0x3ada, 0xb1ba, 0x3950, 0x24a7, 0x29b9); + // 0.5835, 0.4404, -0.3459, 0.0516, -0.4866, -0.2191, 0.0685, + // -0.5430, -0.1429, -0.7539, -0.6416, -0.6758, -0.1147, 0.3438, + // 0.3440, 0.1991 + VLOAD_16(v2, 0x38ab, 0x370c, 0xb589, 0x2a9b, 0xb7c9, 0xb303, 0x2c62, 0xb858, + 0xb093, 0xba08, 0xb922, 0xb968, 0xaf57, 0x3580, 0x3581, 0x325f); + asm volatile("vfnmsub.vv v2, v4, v6"); + // -0.3247, 0.9438, 0.6572, -0.8169, 0.6880, 0.2991, 0.5474, + // -0.4136, -0.0534, 0.4014, -0.0950, 0.9141, -0.2217, 0.9692, + // -0.0363, 0.0060 + VCMP_U16(1, v2, 0xb532, 0x3b8d, 0x3942, 0xba89, 0x3981, 0x34c9, 0x3861, + 0xb69e, 0xaad5, 0x366b, 0xae14, 0x3b50, 0xb318, 0x3bc1, 0xa8a7, + 0x1e29); + + VSET(16, e32, m4); + // 0.76259303, -0.43966120, -0.19390504, -0.57240725, + // -0.57148474, -0.93710214, 0.24273214, 0.44242114, + // -0.93160200, -0.56412256, -0.75430351, -0.02741535, + // -0.60542876, -0.93627954, 0.02798123, 0.23119579 + VLOAD_32(v8, 0x3f43394c, 0xbee11b46, 0xbe468f0b, 0xbf128948, 0xbf124cd3, + 0xbf6fe5ed, 0x3e788ec6, 0x3ee28506, 0xbf6e7d78, 0xbf106a56, + 0xbf411a09, 0xbce09629, 0xbf1afd61, 0xbf6fb004, 0x3ce538e6, + 0x3e6cbe97); + // 0.48736989, 0.19715627, -0.47227743, 0.13752034, + // -0.16710435, 0.84761631, 0.37147006, 0.25389814, + // -0.44707820, 0.38169226, -0.82191414, -0.81056035, + // 0.29047397, -0.46743703, -0.91869444, -0.08079135 + VLOAD_32(v12, 0x3ef9888c, 0x3e49e355, 0xbef1ce59, 0x3e0cd222, 0xbe2b1d67, + 0x3f58fd62, 0x3ebe3153, 0x3e81fef0, 0xbee4e76f, 0x3ec36d2b, + 0xbf5268f7, 0xbf4f80e2, 0x3e94b901, 0xbeef53e8, 0xbf6b2f8f, + 0xbda575f0); + // -0.48655373, -0.87417608, 0.17854533, 0.67417324, + // 0.46947387, 0.29113689, -0.11920074, 0.63394654, + // -0.82611400, -0.84088647, -0.13328743, 0.29885510, + // 0.91797447, -0.15480036, 0.76857966, 0.16230854 + VLOAD_32(v4, 0xbef91d92, 0xbf5fca01, 0x3e36d496, 0x3f2c969e, 0x3ef05ee1, + 0x3e950fe5, 0xbdf41f84, 0x3f224a52, 0xbf537c35, 0xbf574456, + 0xbe087c80, 0x3e990389, 0x3f6b0060, 0xbe1e83fc, 0x3f44c1a3, + 0x3e263436); + asm volatile("vfnmsub.vv v4, v8, v12"); + // 0.85841238, -0.18718503, -0.43765658, 0.52342200, + // 0.10119282, 1.12044132, 0.40040392, -0.02657321, + // -1.21668768, -0.09267077, -0.92245328, -0.80236715, + // 0.84624207, -0.61237341, -0.94020027, -0.11831641 + VCMP_U32(2, v4, 0x3f5bc0ea, 0xbe3fad70, 0xbee01486, 0x3f05fefc, 0x3dcf3e2c, + 0x3f8f6a9f, 0x3ecd01be, 0xbcd9b00d, 0xbf9bbc6c, 0xbdbdca2c, + 0xbf6c25e6, 0xbf4d67ef, 0x3f58a353, 0xbf1cc481, 0xbf70b0f7, + 0xbdf24fdf); + + VSET(16, e64, m8); + // -0.1307639483617093, 0.9224167823566942, 0.8635785104096312, + // -0.1786758246437388, 0.0810514505300033, 0.4196384170211611, + // 0.9100790646565715, -0.5457616411379209, -0.5513001815564993, + // -0.4320693373833464, 0.2818536966914695, 0.5493933224246561, + // 0.0505621823765807, 0.7247332126666939, -0.8702311369694951, + // -0.0660417836134264 + VLOAD_64(v16, 0xbfc0bcdf80daccc8, 0x3fed847033301d18, 0x3feba26f66779bbe, + 0xbfc6ded973b720d0, 0x3fb4bfc9b151d990, 0x3fdadb5b175011f8, + 0x3fed1f5e216f2d02, 0xbfe176e11e032836, 0xbfe1a44047420c82, + 0xbfdba706266a9d80, 0x3fd209e41662faec, 0x3fe194a14e0e8cc0, + 0x3fa9e3494f719000, 0x3fe73103b4d74f92, 0xbfebd8eef827d60a, + 0xbfb0e81d44ca0760); + // 0.6650460871127466, -0.8389896062690501, 0.3260860096573337, + // 0.4421797679090849, -0.1921872051427089, -0.1798768047606598, + // -0.5065656464186716, 0.8248933299429206, + // -0.3169052211432897, -0.0970247500649024, 0.8584276150948376, + // -0.2642287948226270, 0.2403355182026823, + // -0.0814065713760876, -0.7437013715700231, -0.6226210619792329 + VLOAD_64(v24, 0x3fe5480ebb4f6ca8, 0xbfead900bb1380ea, 0x3fd4de97daca5430, + 0x3fdc4cac5e87d53c, 0xbfc8999720661708, 0xbfc7063400e0c4f8, + 0xbfe035c92894a640, 0x3fea6586b2596362, 0xbfd4482cd62f7e30, + 0xbfb8d69d306e6ba0, 0x3feb783d309a196c, 0xbfd0e91fe41b2de8, + 0x3fcec350735fb5b8, 0xbfb4d70fa1bd62a0, 0xbfe7cc66d19c4666, + 0xbfe3ec8301600d10); + // -0.2122847293404504, 0.9074328134093839, -0.4150374170703475, + // -0.4511563805942409, -0.9126942371441604, -0.8237861842027401, + // -0.0636244117792013, 0.7124530373845765, -0.4126670585839094, + // -0.2810978842877421, -0.3240264495739638, 0.5540367578795606, + // 0.7398533272929233, 0.4690189457399407, -0.2427822500985419, + // 0.2399358773396087 + VLOAD_64(v8, 0xbfcb2c2560fa8d98, 0x3fed09b08a1d236c, 0xbfda8ff9193bbdb8, + 0xbfdcdfbf03022cf4, 0xbfed34ca8b7904d0, 0xbfea5c74d801844e, + 0xbfb049b07fd1a3e0, 0x3fe6cc6a4ff0157a, 0xbfda692318304834, + 0xbfd1fd81faff66b8, 0xbfd4bcd96efd6300, 0x3fe1baab4b7bfc96, + 0x3fe7ace0e291ea9e, 0x3fde04680a4a1390, 0xbfcf137d201c7be0, + 0x3fceb6380527c498); + asm volatile("vfnmsub.vv v8, v16, v24"); + // 0.6372868977272925, -1.6760208622190165, 0.6845034040552052, + // 0.3615690295631244, -0.1182120133317999, 0.1658155255420807, + // -0.4486624012573315, 1.2137228688596235, + // -0.5444086454629855, -0.2184785266689677, 0.9497556677330713, + // -0.5686128899794636, 0.2029269193361774, + // -0.4213201787237407, -0.9549780451092895, -0.6067752686868729 + VCMP_U64(3, v8, 0x3fe464a77dfd0e7c, 0xbffad0fb406a4f74, 0x3fe5e773aecd5e74, + 0x3fd723f26d4e15bc, 0xbfbe43247b412024, 0x3fc5397171afa72c, + 0xbfdcb6e281161599, 0x3ff36b68abc28cd2, 0xbfe16bcbadfd8ab4, + 0xbfcbf71ab775f310, 0x3fee6465ff835579, 0xbfe23213a8d1778a, + 0x3fc9f982610371db, 0xbfdaf6e8e930da95, 0xbfee8f2e1e048ea3, + 0xbfe36ab3f7e103f3); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.0091, -0.3794, -0.0005, -0.0464, 0.4834, 0.2932, -0.3042, + // -0.3096, -0.9844, -0.1815, -0.8760, 0.0853, -0.3723, -0.8877, + // 0.1584, 0.1943 + VLOAD_16(v4, 0xa0ac, 0xb612, 0x8f83, 0xa9f0, 0x37bc, 0x34b1, 0xb4de, 0xb4f4, + 0xbbe0, 0xb1cf, 0xbb02, 0x2d75, 0xb5f5, 0xbb1a, 0x3112, 0x3238); + // -0.3301, 0.7769, 0.6572, -0.8193, 0.4529, 0.2349, 0.5264, + // -0.2456, 0.0873, 0.5381, 0.4670, 0.8564, -0.1790, 0.6641, + // 0.0182, 0.0447 + VLOAD_16(v6, 0xb548, 0x3a37, 0x3942, 0xba8e, 0x373f, 0x3384, 0x3836, 0xb3dc, + 0x2d97, 0x384e, 0x3779, 0x3ada, 0xb1ba, 0x3950, 0x24a7, 0x29b9); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.5835, 0.4404, -0.3459, 0.0516, -0.4866, -0.2191, 0.0685, + // -0.5430, -0.1429, -0.7539, -0.6416, -0.6758, -0.1147, 0.3438, + // 0.3440, 0.1991 + VLOAD_16(v2, 0x38ab, 0x370c, 0xb589, 0x2a9b, 0xb7c9, 0xb303, 0x2c62, 0xb858, + 0xb093, 0xba08, 0xb922, 0xb968, 0xaf57, 0x3580, 0x3581, 0x325f); + asm volatile("vfnmsub.vv v2, v4, v6, v0.t"); + // 0.5835, 0.9438, -0.3459, -0.8169, -0.4866, 0.2991, 0.0685, + // -0.4136, -0.1429, 0.4014, -0.6416, 0.9141, -0.1147, 0.9692, + // 0.3440, 0.0060 + VCMP_U16(4, v2, 0x38ab, 0x3b8d, 0xb589, 0xba89, 0xb7c9, 0x34c9, 0x2c62, + 0xb69e, 0xb093, 0x366b, 0xb922, 0x3b50, 0xaf57, 0x3bc1, 0x3581, + 0x1e29); + + VSET(16, e32, m4); + // 0.76259303, -0.43966120, -0.19390504, -0.57240725, + // -0.57148474, -0.93710214, 0.24273214, 0.44242114, + // -0.93160200, -0.56412256, -0.75430351, -0.02741535, + // -0.60542876, -0.93627954, 0.02798123, 0.23119579 + VLOAD_32(v8, 0x3f43394c, 0xbee11b46, 0xbe468f0b, 0xbf128948, 0xbf124cd3, + 0xbf6fe5ed, 0x3e788ec6, 0x3ee28506, 0xbf6e7d78, 0xbf106a56, + 0xbf411a09, 0xbce09629, 0xbf1afd61, 0xbf6fb004, 0x3ce538e6, + 0x3e6cbe97); + // 0.48736989, 0.19715627, -0.47227743, 0.13752034, + // -0.16710435, 0.84761631, 0.37147006, 0.25389814, + // -0.44707820, 0.38169226, -0.82191414, -0.81056035, + // 0.29047397, -0.46743703, -0.91869444, -0.08079135 + VLOAD_32(v12, 0x3ef9888c, 0x3e49e355, 0xbef1ce59, 0x3e0cd222, 0xbe2b1d67, + 0x3f58fd62, 0x3ebe3153, 0x3e81fef0, 0xbee4e76f, 0x3ec36d2b, + 0xbf5268f7, 0xbf4f80e2, 0x3e94b901, 0xbeef53e8, 0xbf6b2f8f, + 0xbda575f0); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.48655373, -0.87417608, 0.17854533, 0.67417324, + // 0.46947387, 0.29113689, -0.11920074, 0.63394654, + // -0.82611400, -0.84088647, -0.13328743, 0.29885510, + // 0.91797447, -0.15480036, 0.76857966, 0.16230854 + VLOAD_32(v4, 0xbef91d92, 0xbf5fca01, 0x3e36d496, 0x3f2c969e, 0x3ef05ee1, + 0x3e950fe5, 0xbdf41f84, 0x3f224a52, 0xbf537c35, 0xbf574456, + 0xbe087c80, 0x3e990389, 0x3f6b0060, 0xbe1e83fc, 0x3f44c1a3, + 0x3e263436); + asm volatile("vfnmsub.vv v4, v8, v12, v0.t"); + // -0.48655373, -0.18718503, 0.17854533, 0.52342200, + // 0.46947387, 1.12044132, -0.11920074, -0.02657321, + // -0.82611400, -0.09267077, -0.13328743, -0.80236715, + // 0.91797447, -0.61237341, 0.76857966, -0.11831641 + VCMP_U32(5, v4, 0xbef91d92, 0xbe3fad70, 0x3e36d496, 0x3f05fefc, 0x3ef05ee1, + 0x3f8f6a9f, 0xbdf41f84, 0xbcd9b00d, 0xbf537c35, 0xbdbdca2c, + 0xbe087c80, 0xbf4d67ef, 0x3f6b0060, 0xbf1cc481, 0x3f44c1a3, + 0xbdf24fdf); + + VSET(16, e64, m8); + // -0.1307639483617093, 0.9224167823566942, 0.8635785104096312, + // -0.1786758246437388, 0.0810514505300033, 0.4196384170211611, + // 0.9100790646565715, -0.5457616411379209, -0.5513001815564993, + // -0.4320693373833464, 0.2818536966914695, 0.5493933224246561, + // 0.0505621823765807, 0.7247332126666939, -0.8702311369694951, + // -0.0660417836134264 + VLOAD_64(v16, 0xbfc0bcdf80daccc8, 0x3fed847033301d18, 0x3feba26f66779bbe, + 0xbfc6ded973b720d0, 0x3fb4bfc9b151d990, 0x3fdadb5b175011f8, + 0x3fed1f5e216f2d02, 0xbfe176e11e032836, 0xbfe1a44047420c82, + 0xbfdba706266a9d80, 0x3fd209e41662faec, 0x3fe194a14e0e8cc0, + 0x3fa9e3494f719000, 0x3fe73103b4d74f92, 0xbfebd8eef827d60a, + 0xbfb0e81d44ca0760); + // 0.6650460871127466, -0.8389896062690501, 0.3260860096573337, + // 0.4421797679090849, -0.1921872051427089, -0.1798768047606598, + // -0.5065656464186716, 0.8248933299429206, + // -0.3169052211432897, -0.0970247500649024, 0.8584276150948376, + // -0.2642287948226270, 0.2403355182026823, + // -0.0814065713760876, -0.7437013715700231, -0.6226210619792329 + VLOAD_64(v24, 0x3fe5480ebb4f6ca8, 0xbfead900bb1380ea, 0x3fd4de97daca5430, + 0x3fdc4cac5e87d53c, 0xbfc8999720661708, 0xbfc7063400e0c4f8, + 0xbfe035c92894a640, 0x3fea6586b2596362, 0xbfd4482cd62f7e30, + 0xbfb8d69d306e6ba0, 0x3feb783d309a196c, 0xbfd0e91fe41b2de8, + 0x3fcec350735fb5b8, 0xbfb4d70fa1bd62a0, 0xbfe7cc66d19c4666, + 0xbfe3ec8301600d10); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.2122847293404504, 0.9074328134093839, -0.4150374170703475, + // -0.4511563805942409, -0.9126942371441604, -0.8237861842027401, + // -0.0636244117792013, 0.7124530373845765, -0.4126670585839094, + // -0.2810978842877421, -0.3240264495739638, 0.5540367578795606, + // 0.7398533272929233, 0.4690189457399407, -0.2427822500985419, + // 0.2399358773396087 + VLOAD_64(v8, 0xbfcb2c2560fa8d98, 0x3fed09b08a1d236c, 0xbfda8ff9193bbdb8, + 0xbfdcdfbf03022cf4, 0xbfed34ca8b7904d0, 0xbfea5c74d801844e, + 0xbfb049b07fd1a3e0, 0x3fe6cc6a4ff0157a, 0xbfda692318304834, + 0xbfd1fd81faff66b8, 0xbfd4bcd96efd6300, 0x3fe1baab4b7bfc96, + 0x3fe7ace0e291ea9e, 0x3fde04680a4a1390, 0xbfcf137d201c7be0, + 0x3fceb6380527c498); + asm volatile("vfnmsub.vv v8, v16, v24, v0.t"); + // -0.2122847293404504, -1.6760208622190165, -0.4150374170703475, + // 0.3615690295631244, -0.9126942371441604, 0.1658155255420807, + // -0.0636244117792013, 1.2137228688596235, -0.4126670585839094, + // -0.2184785266689677, -0.3240264495739638, -0.5686128899794636, + // 0.7398533272929233, -0.4213201787237407, -0.2427822500985419, + // -0.6067752686868729 + VCMP_U64(6, v8, 0xbfcb2c2560fa8d98, 0xbffad0fb406a4f74, 0xbfda8ff9193bbdb8, + 0x3fd723f26d4e15bc, 0xbfed34ca8b7904d0, 0x3fc5397171afa72c, + 0xbfb049b07fd1a3e0, 0x3ff36b68abc28cd2, 0xbfda692318304834, + 0xbfcbf71ab775f310, 0xbfd4bcd96efd6300, 0xbfe23213a8d1778a, + 0x3fe7ace0e291ea9e, 0xbfdaf6e8e930da95, 0xbfcf137d201c7be0, + 0xbfe36ab3f7e103f3); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.1346 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb04f); + // -0.1886, 0.9912, -0.0325, 0.5850, 0.2578, -0.2350, -0.8701, + // 0.9209, 0.5859, -0.4795, 0.8682, 0.9233, -0.8896, -0.5981, + // -0.3223, 0.6924 + VLOAD_16(v4, 0xb209, 0x3bee, 0xa82a, 0x38ae, 0x3420, 0xb385, 0xbaf6, 0x3b5e, + 0x38b0, 0xb7ac, 0x3af2, 0x3b63, 0xbb1e, 0xb8c9, 0xb528, 0x398a); + // -0.0126, 0.9678, 0.8945, -0.9600, -0.9272, 0.4412, 0.5527, + // 0.1136, 0.7207, -0.5181, -0.2810, -0.4048, 0.1648, 0.7612, + // -0.8853, 0.1761 + VLOAD_16(v2, 0xa27a, 0x3bbe, 0x3b28, 0xbbae, 0xbb6b, 0x370f, 0x386c, 0x2f45, + 0x39c4, 0xb825, 0xb47f, 0xb67a, 0x3146, 0x3a17, 0xbb15, 0x31a3); + asm volatile("vfnmsub.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.1903, 1.1211, 0.0879, 0.4556, 0.1329, -0.1755, -0.7959, + // 0.9360, 0.6831, -0.5493, 0.8301, 0.8687, -0.8677, -0.4956, + // -0.4414, 0.7163 + VCMP_U16(7, v2, 0xb217, 0x3c7c, 0x2da0, 0x374b, 0x3041, 0xb19e, 0xba5e, + 0x3b7d, 0x3977, 0xb865, 0x3aa5, 0x3af3, 0xbaf1, 0xb7ee, 0xb710, + 0x39bb); + + VSET(16, e32, m4); + double dscalar_32; + // -0.16110219 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe24f7f9); + // -0.31537205, -0.17563045, -0.79069936, 0.22939304, + // -0.89379781, 0.26157290, 0.56702632, -0.11594663, + // 0.09605245, 0.45930776, -0.76518077, -0.26341528, 0.74385208, + // 0.89362013, -0.21185355, 0.23924881 + VLOAD_32(v8, 0xbea17872, 0xbe33d878, 0xbf4a6b46, 0x3e6ae602, 0xbf64cfef, + 0x3e85ece2, 0x3f1128a3, 0xbded756d, 0x3dc4b726, 0x3eeb2a63, + 0xbf43e2e3, 0xbe86de5e, 0x3f3e6d17, 0x3f64c44a, 0xbe58f023, + 0x3e74fda4); + // 0.31856158, 0.48641542, 0.57264513, 0.30210373, + // -0.19719712, 0.85649359, 0.36901370, -0.78377151, + // 0.22567192, -0.75179213, -0.65690833, 0.11298654, + // -0.64884853, -0.48376039, -0.11539485, -0.42667609 + VLOAD_32(v4, 0x3ea31a81, 0x3ef90b71, 0x3f1298df, 0x3e9aad57, 0xbe49ee0b, + 0x3f5b432a, 0x3ebcef5d, 0xbf48a540, 0x3e67168a, 0xbf407573, + 0xbf282b25, 0x3de7657d, 0xbf261af0, 0xbef7af71, 0xbdec5422, + 0xbeda754a); + asm volatile("vfnmsub.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // -0.26405108, -0.09726786, -0.69844496, 0.27806261, + // -0.92556667, 0.39955589, 0.62647521, -0.24221393, + // 0.13240869, 0.33819240, -0.87101012, -0.24521290, 0.63932115, + // 0.81568527, -0.23044391, 0.17051035 + VCMP_U32(8, v4, 0xbe8731b4, 0xbdc7345f, 0xbf32cd4a, 0x3e8e5e39, 0xbf6cf1f1, + 0x3ecc9297, 0x3f2060ae, 0xbe7806ee, 0x3e079625, 0x3ead278e, + 0xbf5efa85, 0xbe7b1917, 0x3f23aa8d, 0x3f50d0c0, 0xbe6bf97d, + 0x3e2e9a44); + + VSET(16, e64, m8); + double dscalar_64; + // 0.5849101968457469 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe2b79596d194ba); + // -0.7607808895269514, -0.0192591699518767, 0.6815284686654297, + // -0.5163928614577513, -0.9560613023939111, + // -0.8652684824342871, 0.4588682754059621, + // -0.5708244737077264, -0.7636024500128011, 0.2236424444447431, + // 0.8245435877598175, 0.8527344486412596, -0.3097355632002228, + // 0.0764086736442742, 0.2567358761671383, 0.1904958118727702 + VLOAD_64(v16, 0xbfe8585129fe14da, 0xbf93b8ad045d9c40, 0x3fe5cf14ca86c05c, + 0xbfe0864a527b2a32, 0xbfee980ddf5818b2, 0xbfebb047874a12f4, + 0x3fdd5e190b029804, 0xbfe24431afca9858, 0xbfe86f6e67be6a22, + 0x3fcca050cc719f20, 0x3fea62a93bf1c1ec, 0x3feb4999c122c714, + 0xbfd3d2b51c969928, 0x3fb38f84d26ed230, 0x3fd06e5c4ff641c8, + 0x3fc8622ab1025ec0); + // 0.1182575129292827, -0.8756460666506833, + // -0.3686593299789440, 0.2802900907620893, + // -0.5167592439660142, 0.3872686605057347, + // -0.0640775227939985, -0.4352087179743556, + // -0.1509314378482451, -0.9803534868251271, + // 0.9211862470421908, 0.7804942879773937, 0.5029472314120484, + // 0.1158347026033590, 0.7422982722940397, 0.0792254120441500 + VLOAD_64(v8, 0x3fbe461fd6899df0, 0xbfec054ae66457d4, 0xbfd7981d4d67fd54, + 0x3fd1f045d94dd3b0, 0xbfe0894aae97abc2, 0x3fd8c9027de8d2a8, + 0xbfb0676270cf1540, 0xbfdbda75aaa8928c, 0xbfc351b8aabf8c50, + 0xbfef5f0e468ddf22, 0x3fed7a5b94924728, 0x3fe8f9cf28327c9e, + 0x3fe01824cad0e968, 0x3fbda757d36c5e40, 0x3fe7c0e84e6c2aa0, + 0x3fb4481dd9bde340); + asm volatile("vfnmsub.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // -0.8299509146929066, 0.4929151432599784, 0.8971610699324351, + // -0.6803373936193172, -0.6538035512838903, -1.0917858708828851, + // 0.4963478718767876, -0.3162664568083610, -0.6753211129907725, + // 0.7970611954020426, 0.2857323586707750, 0.3962153810234212, + // -0.6039145273284674, 0.0086557749429749, -0.1774419523986262, + // 0.1441560605188410 + VCMP_U64(9, v8, 0xbfea8ef5387c85b1, 0x3fdf8bebf5004e06, 0x3fecb58b21d3556c, + 0xbfe5c552ecfae837, 0xbfe4ebf56cd8bc27, 0xbff177f4761ad476, + 0x3fdfc429dd49999d, 0xbfd43db5aa3413c6, 0xbfe59c3b05d2ff7f, + 0x3fe981867ae532b2, 0x3fd249705ff9984b, 0x3fd95b97c1eabccb, + 0xbfe353448f0e8fe6, 0x3f81ba1e7269b44b, 0xbfc6b66afb3ec852, + 0x3fc273b4aeb96c59); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.1346 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb04f); + // -0.1886, 0.9912, -0.0325, 0.5850, 0.2578, -0.2350, + // -0.8701, 0.9209, 0.5859, -0.4795, 0.8682, 0.9233, + // -0.8896, -0.5981, -0.3223, 0.6924 + VLOAD_16(v4, 0xb209, 0x3bee, 0xa82a, 0x38ae, 0x3420, 0xb385, 0xbaf6, 0x3b5e, + 0x38b0, 0xb7ac, 0x3af2, 0x3b63, 0xbb1e, 0xb8c9, 0xb528, 0x398a); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.0126, 0.9678, 0.8945, -0.9600, -0.9272, 0.4412, 0.5527, + // 0.1136, 0.7207, -0.5181, -0.2810, -0.4048, 0.1648, 0.7612, + // -0.8853, 0.1761 + VLOAD_16(v2, 0xa27a, 0x3bbe, 0x3b28, 0xbbae, 0xbb6b, 0x370f, 0x386c, 0x2f45, + 0x39c4, 0xb825, 0xb47f, 0xb67a, 0x3146, 0x3a17, 0xbb15, 0x31a3); + asm volatile("vfnmsub.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // -0.0126, 1.1211, 0.8945, 0.4556, -0.9272, -0.1755, 0.5527, + // 0.9360, 0.7207, -0.5493, -0.2810, 0.8687, 0.1648, -0.4956, + // -0.8853, 0.7163 + VCMP_U16(10, v2, 0xa27a, 0x3c7c, 0x3b28, 0x374b, 0xbb6b, 0xb19e, 0x386c, + 0x3b7d, 0x39c4, 0xb865, 0xb47f, 0x3af3, 0x3146, 0xb7ee, 0xbb15, + 0x39bb); + + VSET(16, e32, m4); + double dscalar_32; + // -0.16110219 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe24f7f9); + // -0.31537205, -0.17563045, -0.79069936, 0.22939304, + // -0.89379781, 0.26157290, 0.56702632, -0.11594663, + // 0.09605245, 0.45930776, -0.76518077, -0.26341528, + // 0.74385208, 0.89362013, -0.21185355, 0.23924881 + VLOAD_32(v8, 0xbea17872, 0xbe33d878, 0xbf4a6b46, 0x3e6ae602, 0xbf64cfef, + 0x3e85ece2, 0x3f1128a3, 0xbded756d, 0x3dc4b726, 0x3eeb2a63, + 0xbf43e2e3, 0xbe86de5e, 0x3f3e6d17, 0x3f64c44a, 0xbe58f023, + 0x3e74fda4); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.31856158, 0.48641542, 0.57264513, 0.30210373, + // -0.19719712, 0.85649359, 0.36901370, -0.78377151, + // 0.22567192, -0.75179213, -0.65690833, 0.11298654, + // -0.64884853, -0.48376039, -0.11539485, -0.42667609 + VLOAD_32(v4, 0x3ea31a81, 0x3ef90b71, 0x3f1298df, 0x3e9aad57, 0xbe49ee0b, + 0x3f5b432a, 0x3ebcef5d, 0xbf48a540, 0x3e67168a, 0xbf407573, + 0xbf282b25, 0x3de7657d, 0xbf261af0, 0xbef7af71, 0xbdec5422, + 0xbeda754a); + asm volatile("vfnmsub.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // 0.31856158, -0.09726786, 0.57264513, 0.27806261, + // -0.19719712, 0.39955589, 0.36901370, -0.24221393, + // 0.22567192, 0.33819240, -0.65690833, -0.24521290, + // -0.64884853, 0.81568527, -0.11539485, 0.17051035 + VCMP_U32(11, v4, 0x3ea31a81, 0xbdc7345f, 0x3f1298df, 0x3e8e5e39, 0xbe49ee0b, + 0x3ecc9297, 0x3ebcef5d, 0xbe7806ee, 0x3e67168a, 0x3ead278e, + 0xbf282b25, 0xbe7b1917, 0xbf261af0, 0x3f50d0c0, 0xbdec5422, + 0x3e2e9a44); + + VSET(16, e64, m8); + double dscalar_64; + // 0.5849101968457469 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe2b79596d194ba); + // -0.7607808895269514, -0.0192591699518767, + // 0.6815284686654297, -0.5163928614577513, + // -0.9560613023939111, -0.8652684824342871, + // 0.4588682754059621, -0.5708244737077264, + // -0.7636024500128011, 0.2236424444447431, + // 0.8245435877598175, 0.8527344486412596, + // -0.3097355632002228, 0.0764086736442742, + // 0.2567358761671383, 0.1904958118727702 + VLOAD_64(v16, 0xbfe8585129fe14da, 0xbf93b8ad045d9c40, 0x3fe5cf14ca86c05c, + 0xbfe0864a527b2a32, 0xbfee980ddf5818b2, 0xbfebb047874a12f4, + 0x3fdd5e190b029804, 0xbfe24431afca9858, 0xbfe86f6e67be6a22, + 0x3fcca050cc719f20, 0x3fea62a93bf1c1ec, 0x3feb4999c122c714, + 0xbfd3d2b51c969928, 0x3fb38f84d26ed230, 0x3fd06e5c4ff641c8, + 0x3fc8622ab1025ec0); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.1182575129292827, -0.8756460666506833, + // -0.3686593299789440, 0.2802900907620893, + // -0.5167592439660142, 0.3872686605057347, + // -0.0640775227939985, -0.4352087179743556, + // -0.1509314378482451, -0.9803534868251271, + // 0.9211862470421908, 0.7804942879773937, + // 0.5029472314120484, 0.1158347026033590, + // 0.7422982722940397, 0.0792254120441500 + VLOAD_64(v8, 0x3fbe461fd6899df0, 0xbfec054ae66457d4, 0xbfd7981d4d67fd54, + 0x3fd1f045d94dd3b0, 0xbfe0894aae97abc2, 0x3fd8c9027de8d2a8, + 0xbfb0676270cf1540, 0xbfdbda75aaa8928c, 0xbfc351b8aabf8c50, + 0xbfef5f0e468ddf22, 0x3fed7a5b94924728, 0x3fe8f9cf28327c9e, + 0x3fe01824cad0e968, 0x3fbda757d36c5e40, 0x3fe7c0e84e6c2aa0, + 0x3fb4481dd9bde340); + asm volatile("vfnmsub.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + // 0.1182575129292827, 0.4929151432599784, + // -0.3686593299789440, -0.6803373936193172, + // -0.5167592439660142, -1.0917858708828851, + // -0.0640775227939985, -0.3162664568083610, + // -0.1509314378482451, 0.7970611954020426, + // 0.9211862470421908, 0.3962153810234212, 0.5029472314120484, + // 0.0086557749429749, 0.7422982722940397, 0.1441560605188410 + VCMP_U64(12, v8, 0x3fbe461fd6899df0, 0x3fdf8bebf5004e06, 0xbfd7981d4d67fd54, + 0xbfe5c552ecfae837, 0xbfe0894aae97abc2, 0xbff177f4761ad476, + 0xbfb0676270cf1540, 0xbfd43db5aa3413c6, 0xbfc351b8aabf8c50, + 0x3fe981867ae532b2, 0x3fed7a5b94924728, 0x3fd95b97c1eabccb, + 0x3fe01824cad0e968, 0x3f81ba1e7269b44b, 0x3fe7c0e84e6c2aa0, + 0x3fc273b4aeb96c59); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrdiv.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrdiv.c new file mode 100644 index 000000000..357bcc2ef --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrdiv.c @@ -0,0 +1,179 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values (vector-scalar) +void TEST_CASE1(void) { + VSET(16, e16, m2); + double dscalar_16; + // -35.5312, -61.8125, -37.3125, 23.5938, 44.4688, 38.1250, + // -93.5000, -23.2031, -62.8125, 27.9844, -26.2344, -10.3594, + // -10.7109, -42.0938, 11.0625, 17.8281 + VLOAD_16(v2, 0xd071, 0xd3ba, 0xd0aa, 0x4de6, 0x518f, 0x50c4, 0xd5d8, 0xcdcd, + 0xd3da, 0x4eff, 0xce8f, 0xc92e, 0xc95b, 0xd143, 0x4988, 0x4c75); + // -17.4844 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xcc5f); + asm volatile("vfrdiv.vf v4, v2, %[A]" ::[A] "f"(dscalar_16)); + // 0.4922, 0.2830, 0.4685, -0.7412, -0.3931, -0.4585, 0.1870, + // 0.7534, 0.2783, -0.6250, 0.6665, 1.6875, 1.6328, 0.4153, + // -1.5801, -0.9810 + VCMP_U16(1, v4, 0x37df, 0x3486, 0x377f, 0xb9ed, 0xb64a, 0xb756, 0x31fb, + 0x3a07, 0x3474, 0xb8ff, 0x3954, 0x3ec0, 0x3e87, 0x36a5, 0xbe52, + 0xbbd8); + + VSET(16, e32, m4); + double dscalar_32; + // 981163.06250000, -831670.37500000, -85439.06250000, + // 64225.75781250, -215361.43750000, -292944.75000000, + // 396490.21875000, 954345.93750000, 241910.40625000, + // -62372.83593750, 391838.50000000, 263890.03125000, + // 755217.06250000, -6653.31689453, 526939.25000000, + // -759232.75000000 + VLOAD_32(v4, 0x496f8ab1, 0xc94b0b66, 0xc7a6df88, 0x477ae1c2, 0xc852505c, + 0xc88f0a18, 0x48c19947, 0x4968fe9f, 0x486c3d9a, 0xc773a4d6, + 0x48bf53d0, 0x4880da41, 0x49386111, 0xc5cfea89, 0x4900a5b4, + 0xc9395c0c); + // -816463.43750000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc94754f7); + asm volatile("vfrdiv.vf v8, v4, %[A]" ::[A] "f"(dscalar_32)); + // -0.83213836, 0.98171520, 9.55609035, + // -12.71239853, 3.79113102, 2.78709030, -2.05922723, + // -0.85552144, -3.37506533, 13.09004879, -2.08367324, + // -3.09395337, -1.08109772, 122.71524811, + // -1.54944515, 1.07537961 + VCMP_U32(2, v8, 0xbf550705, 0x3f7b51af, 0x4118e5bf, 0xc14b65fc, 0x4072a1e4, + 0x40325faf, 0xc003ca60, 0xbf5b0374, 0xc0580112, 0x415170d6, + 0xc0055ae7, 0xc0460354, 0xbf8a6168, 0x42f56e35, 0xbfc65437, + 0x3f89a60a); + + VSET(16, e64, m8); + double dscalar_64; + // -1436518.0384849868714809, 7616315.8933699131011963, + // -3920170.8619796745479107, -8788296.3276759665459394, + // -4048340.2138868225738406, 7863298.6869412772357464, + // 6686376.3073008488863707, 7004262.4451152756810188, + // 5533006.3396991230547428, 2002846.6050684414803982, + // -1239975.7277694121003151, 4133787.1656649876385927, + // 2465999.3703419454395771, -4337686.8389181373640895, + // -5741249.6292232554405928, 1762825.0474482532590628 + VLOAD_64(v8, 0xc135eb6609da26f0, 0x415d0dcef92cf900, 0xc14de8956e555998, + 0xc160c3290a7c524f, 0xc14ee2ea1b60a4b6, 0x415dff00abf6d88c, + 0x415981aa13aad12e, 0x415ab8199c7cc4c8, 0x41551b5395bda164, + 0x413e8f9e9ae5c3f0, 0xc132eba7ba4f18a0, 0x414f89cd953482a4, + 0x4142d067af675d68, 0xc1508c05b5b0d5b3, 0xc155e6b06845319e, + 0x413ae6090c259198); + // -181636.6228598635643721 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xc1062c24fb9df3c0); + asm volatile("vfrdiv.vf v16, v8, %[A]" ::[A] "f"(dscalar_64)); + // 0.1264422847425051, -0.0238483573164265, 0.0463338536137523, + // 0.0206680130126992, 0.0448669363895861, -0.0230992907800273, + // -0.0271651810355835, -0.0259322982659703, + // -0.0328278356662322, -0.0906892332144711, 0.1464840148013292, + // -0.0439395197625382, -0.0736563946626949, 0.0418740747326899, + // 0.0316371233773435, -0.1030372373723578 + VCMP_U64(3, v16, 0x3fc02f42c2e6795f, 0xbf986bb42af3122b, 0x3fa7b91223effbc4, + 0x3f9529fedfd9f42e, 0x3fa6f8cc90ee127a, 0xbf97a75729d81370, + 0xbf9bd130708d0e6e, 0xbf9a8dff13d98f11, 0xbfa0cecf612b7be2, + 0xbfb73768dac16680, 0x3fc2bffcfa7aafc4, 0xbfa67f3da0c39bb5, + 0xbfb2db253e37b0f2, 0x3fa57084cb0de853, 0x3fa032bdb47d8bce, + 0xbfba60a5fcc8d2be); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + double dscalar_16; + // -35.5312, -61.8125, -37.3125, 23.5938, 44.4688, 38.1250, + // -93.5000, -23.2031, -62.8125, 27.9844, -26.2344, -10.3594, + // -10.7109, -42.0938, 11.0625, 17.8281 + VLOAD_16(v2, 0xd071, 0xd3ba, 0xd0aa, 0x4de6, 0x518f, 0x50c4, 0xd5d8, 0xcdcd, + 0xd3da, 0x4eff, 0xce8f, 0xc92e, 0xc95b, 0xd143, 0x4988, 0x4c75); + // -17.4844 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xcc5f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfrdiv.vf v4, v2, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.2830, 0.0000, -0.7412, 0.0000, -0.4585, 0.0000, + // 0.7534, 0.0000, -0.6250, 0.0000, 1.6875, 0.0000, 0.4153, + // 0.0000, -0.9810 + VCMP_U16(4, v4, 0x0, 0x3486, 0x0, 0xb9ed, 0x0, 0xb756, 0x0, 0x3a07, 0x0, + 0xb8ff, 0x0, 0x3ec0, 0x0, 0x36a5, 0x0, 0xbbd8); + + VSET(16, e32, m4); + double dscalar_32; + // 981163.06250000, -831670.37500000, -85439.06250000, + // 64225.75781250, -215361.43750000, -292944.75000000, + // 396490.21875000, 954345.93750000, 241910.40625000, + // -62372.83593750, 391838.50000000, 263890.03125000, + // 755217.06250000, -6653.31689453, 526939.25000000, + // -759232.75000000 + VLOAD_32(v4, 0x496f8ab1, 0xc94b0b66, 0xc7a6df88, 0x477ae1c2, 0xc852505c, + 0xc88f0a18, 0x48c19947, 0x4968fe9f, 0x486c3d9a, 0xc773a4d6, + 0x48bf53d0, 0x4880da41, 0x49386111, 0xc5cfea89, 0x4900a5b4, + 0xc9395c0c); + // -816463.43750000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc94754f7); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfrdiv.vf v8, v4, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, 0.98171520, 0.00000000, -12.71239853, + // 0.00000000, 2.78709030, 0.00000000, -0.85552144, + // 0.00000000, 13.09004879, 0.00000000, -3.09395337, + // 0.00000000, 122.71524811, 0.00000000, 1.07537961 + VCMP_U32(5, v8, 0x0, 0x3f7b51af, 0x0, 0xc14b65fc, 0x0, 0x40325faf, 0x0, + 0xbf5b0374, 0x0, 0x415170d6, 0x0, 0xc0460354, 0x0, 0x42f56e35, 0x0, + 0x3f89a60a); + + VSET(16, e64, m8); + double dscalar_64; + // -1436518.0384849868714809, 7616315.8933699131011963, + // -3920170.8619796745479107, -8788296.3276759665459394, + // -4048340.2138868225738406, 7863298.6869412772357464, + // 6686376.3073008488863707, 7004262.4451152756810188, + // 5533006.3396991230547428, 2002846.6050684414803982, + // -1239975.7277694121003151, 4133787.1656649876385927, + // 2465999.3703419454395771, -4337686.8389181373640895, + // -5741249.6292232554405928, 1762825.0474482532590628 + VLOAD_64(v8, 0xc135eb6609da26f0, 0x415d0dcef92cf900, 0xc14de8956e555998, + 0xc160c3290a7c524f, 0xc14ee2ea1b60a4b6, 0x415dff00abf6d88c, + 0x415981aa13aad12e, 0x415ab8199c7cc4c8, 0x41551b5395bda164, + 0x413e8f9e9ae5c3f0, 0xc132eba7ba4f18a0, 0x414f89cd953482a4, + 0x4142d067af675d68, 0xc1508c05b5b0d5b3, 0xc155e6b06845319e, + 0x413ae6090c259198); + // -181636.6228598635643721 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xc1062c24fb9df3c0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vfrdiv.vf v16, v8, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, -0.0238483573164265, 0.0000000000000000, + // 0.0206680130126992, 0.0000000000000000, + // -0.0230992907800273, 0.0000000000000000, + // -0.0259322982659703, 0.0000000000000000, + // -0.0906892332144711, 0.0000000000000000, + // -0.0439395197625382, 0.0000000000000000, + // 0.0418740747326899, 0.0000000000000000, -0.1030372373723578 + VCMP_U64(6, v16, 0x0, 0xbf986bb42af3122b, 0x0, 0x3f9529fedfd9f42e, 0x0, + 0xbf97a75729d81370, 0x0, 0xbf9a8dff13d98f11, 0x0, 0xbfb73768dac16680, + 0x0, 0xbfa67f3da0c39bb5, 0x0, 0x3fa57084cb0de853, 0x0, + 0xbfba60a5fcc8d2be); +}; + +int main(void) { + enable_vec(); + enable_fp(); + // Change RM to RTZ since there are issues with FDIV + RNE in fpnew + // Update: there are issues also with RTZ... + CHANGE_RM(RM_RTZ); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmax.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmax.c new file mode 100644 index 000000000..985fbf8b9 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmax.c @@ -0,0 +1,348 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Xiaorui Yin +// Date: 2022/05/03 + +#include "float_macros.h" +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredmax.vs v2, v4, v6"); + VCMP_U16(1, v2, 0x4800); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredmax.vs v4, v8, v12"); + VCMP_U32(2, v4, 0x41000000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U64(3, v8, 0x4020000000000000); + + // Super lang vector length + VSET(32, e32, m8); + VLOAD_32( + v16, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, + 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000, + + 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000); + + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + // 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + // 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000, + + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + // 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + // 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000); + VLOAD_32(v24, 0x3F800000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U32(4, v8, 0x41000000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredmax.vs v2, v4, v6, v0.t"); + VCMP_U16(5, v2, 0x4800); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredmax.vs v8, v8, v12, v0.t"); + VCMP_U32(6, v8, 0x41000000); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredmax.vs v8, v16, v24, v0.t"); + VCMP_U64(7, v8, 0x4020000000000000); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredmax.vs v2, v4, v6"); + VCMP_U16(8, v2, 0x4800, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredmax.vs v4, v8, v12"); + VCMP_U32(9, v4, 0x41000000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U64(10, v8, 0x4020000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(1, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U64(11, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(3, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U64(12, v8, 0x4008000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(7, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U64(13, v8, 0x401C000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(15, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U64(14, v8, 0x4020000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(7, e16, m2); + VLOAD_8(v0, 0x00, 0xff); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredmax.vs v2, v4, v6, v0.t"); + VCMP_U16(15, v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(1, e32, m4); + VLOAD_8(v0, 0xff, 0x00); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredmax.vs v4, v8, v12, v0.t"); + VCMP_U32(16, v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(3, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmax.vs v8, v16, v24, v0.t"); + VCMP_U64(17, v8, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmin.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmin.c new file mode 100644 index 000000000..eb629bd06 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmin.c @@ -0,0 +1,350 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Xiaorui Yin +// Date: 2022/05/03 + +#include "float_macros.h" +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredmin.vs v2, v4, v6"); + VCMP_U16(1, v2, 0x3c00); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredmin.vs v4, v8, v12"); + VCMP_U32(2, v4, 0x3F800000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U64(3, v8, 0x3FF0000000000000); + + // Super lang vector length + VSET(32, e32, m8); + VLOAD_32( + v16, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, + 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000, + + 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000); + + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + // 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + // 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + // 0x41000000, + // + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + // 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + // 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + // 0x41000000); + VLOAD_32(v24, 0x3F800000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U32(4, v8, 0x3F800000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredmin.vs v2, v4, v6, v0.t"); + VCMP_U16(5, v2, 0x3c00); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredmin.vs v4, v8, v16, v0.t"); + VCMP_U32(6, v4, 0x3F800000); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredmin.vs v8, v16, v24, v0.t"); + VCMP_U64(7, v8, 0x3FF0000000000000); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredmin.vs v2, v4, v6"); + VCMP_U16(8, v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredmin.vs v4, v8, v12"); + VCMP_U32(9, v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U64(10, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(1, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U64(11, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(3, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U64(12, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(7, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U64(13, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(15, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U64(14, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(7, e16, m2); + VLOAD_8(v0, 0x00, 0xff); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredmin.vs v2, v4, v6, v0.t"); + VCMP_U16(15, v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(1, e32, m4); + VLOAD_8(v0, 0xff, 0x00); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredmin.vs v4, v8, v12, v0.t"); + VCMP_U32(16, v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(3, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmin.vs v8, v16, v24, v0.t"); + VCMP_U64(17, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredosum.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredosum.c new file mode 100644 index 000000000..bd4d61467 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredosum.c @@ -0,0 +1,348 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Xiaorui Yin +// Date: 2022/05/03 + +#include "float_macros.h" +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredosum.vs v2, v4, v6"); + VCMP_U16(1, v2, 0x5490); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredosum.vs v4, v8, v12"); + VCMP_U32(2, v4, 0x42920000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U64(3, v8, 0x4052400000000000); + + // Super lang vector length + VSET(32, e32, m8); + VLOAD_32( + v16, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, + 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000, + + 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000); + + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + // 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + // 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000, + + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + // 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + // 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000); + VLOAD_32(v24, 0x3F800000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U32(4, v8, 0x43110000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredosum.vs v2, v4, v6, v0.t"); + VCMP_U16(5, v2, 0x50A0); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredosum.vs v4, v8, v12, v0.t"); + VCMP_U32(6, v4, 0x42140000); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredosum.vs v8, v16, v24, v0.t"); + VCMP_U64(7, v8, 0x4042800000000000); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredosum.vs v2, v4, v6"); + VCMP_U16(8, v2, 0x5490, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredosum.vs v4, v8, v12"); + VCMP_U32(9, v4, 0x42920000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U64(10, v8, 0x4052400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(1, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U64(11, v8, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(3, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U64(12, v8, 0x401C000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(7, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U64(13, v8, 0x403d000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(15, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U64(14, v8, 0x4050400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(7, e16, m2); + VLOAD_8(v0, 0x00, 0xff); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredosum.vs v2, v4, v6, v0.t"); + VCMP_U16(15, v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(1, e32, m4); + VLOAD_8(v0, 0xff, 0x00); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredosum.vs v4, v8, v12, v0.t"); + VCMP_U32(16, v4, 0x40000000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(3, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredosum.vs v8, v16, v24, v0.t"); + VCMP_U64(17, v8, 0x4008000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredusum.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredusum.c new file mode 100644 index 000000000..e019d3787 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredusum.c @@ -0,0 +1,352 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Xiaorui Yin +// Date: 2022/05/03 + +#include "float_macros.h" +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredsum.vs v2, v4, v6"); + VCMP_U16(1, v2, 0x5490); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredsum.vs v4, v8, v12"); + VCMP_F32(2, v4, 0x42920000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredsum.vs v8, v16, v24"); + VCMP_F64(3, v8, 0x4052400000000000); + + // Super lang vector length + // VSET(64, e32, m8); + VSET(32, e32, m8); + VLOAD_32( + v16, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, + 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000, + + 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000); + + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + // 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + // 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + // 0x41000000, + // + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + // 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + // 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + // 0x41000000); + VLOAD_32(v24, 0x3F800000); + asm volatile("vfredsum.vs v8, v16, v24"); + // VCMP_F32(4, v8, 0x43908000); + VCMP_F32(4, v8, 0x43110000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredsum.vs v2, v4, v6, v0.t"); + VCMP_U16(5, v2, 0x50A0); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredsum.vs v4, v8, v12, v0.t"); + VCMP_F32(6, v4, 0x42140000); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredsum.vs v8, v16, v24, v0.t"); + VCMP_F64(7, v8, 0x4042800000000000); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredsum.vs v2, v4, v6"); + VCMP_U16(8, v2, 0x5490, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredsum.vs v4, v8, v12"); + VCMP_F32(9, v4, 0x42920000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredsum.vs v8, v16, v24"); + VCMP_F64(10, v8, 0x4052400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(1, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredsum.vs v8, v16, v24"); + VCMP_F64(11, v8, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(3, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredsum.vs v8, v16, v24"); + VCMP_F64(12, v8, 0x401C000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(7, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredsum.vs v8, v16, v24"); + VCMP_F64(13, v8, 0x403d000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(15, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredsum.vs v8, v16, v24"); + VCMP_F64(14, v8, 0x4050400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(7, e16, m2); + VLOAD_8(v0, 0x00, 0xff); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredsum.vs v2, v4, v6, v0.t"); + VCMP_U16(15, v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(1, e32, m4); + VLOAD_8(v0, 0xff, 0x00); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredsum.vs v4, v8, v12, v0.t"); + VCMP_F32(16, v4, 0x40000000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(3, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredsum.vs v8, v16, v24, v0.t"); + VCMP_F64(17, v8, 0x4008000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrsub.c new file mode 100644 index 000000000..68d22b5ad --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrsub.c @@ -0,0 +1,167 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values (vector-scalar) +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.0273, -0.8511, 0.7173, 0.9551, -0.7842, -0.6509, -0.5771, + // 0.6060, -0.5361, 0.6099, 0.2859, 0.6318, -0.9521, 0.3818, + // 0.2783, -0.7905 + VLOAD_16(v4, 0xa700, 0xbacf, 0x39bd, 0x3ba4, 0xba46, 0xb935, 0xb89e, 0x38d9, + 0xb84a, 0x38e1, 0x3493, 0x390e, 0xbb9e, 0x361c, 0x3474, 0xba53); + double dscalar_16; + // 0.3062 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x34e6); + asm volatile("vfrsub.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // 0.3335, 1.1572, -0.4111, -0.6489, 1.0898, 0.9570, 0.8833, + // -0.2998, 0.8423, -0.3037, 0.0203, -0.3257, 1.2578, + // -0.0757, 0.0278, 1.0967 + VCMP_U16(1, v2, 0x3556, 0x3ca1, 0xb694, 0xb931, 0x3c5c, 0x3ba8, 0x3b11, + 0xb4cc, 0x3abd, 0xb4dc, 0x2530, 0xb536, 0x3d08, 0xacd8, 0x2720, + 0x3c63); + + VSET(16, e32, m4); + // 0.61218858, 0.50298065, 0.82400811, -0.50508654, + // -0.08447543, -0.66344708, -0.94741052, 0.85856712, + // -0.16725175, -0.36700448, -0.86911696, 0.82600677, + // -0.95377433, 0.06016647, 0.67027277, 0.08167093 + VLOAD_32(v8, 0x3f1cb864, 0x3f00c357, 0x3f52f232, 0xbf014d5a, 0xbdad0174, + 0xbf29d7ab, 0xbf72897f, 0x3f5bcb0e, 0xbe2b440b, 0xbebbe803, + 0xbf5e7e73, 0x3f53752e, 0xbf742a8e, 0x3d76711d, 0x3f2b96ff, + 0x3da74316); + double dscalar_32; + // -0.78482366 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf48ea34); + asm volatile("vfrsub.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // -1.39701223, -1.28780437, -1.60883176, -0.27973711, + // -0.70034826, -0.12137657, 0.16258687, -1.64339077, + // -0.61757189, -0.41781917, 0.08429331, -1.61083043, + // 0.16895068, -0.84499013, -1.45509648, -0.86649460 + VCMP_U32(2, v4, 0xbfb2d14c, 0xbfa4d6c6, 0xbfcdee33, 0xbe8f39b4, 0xbf334a06, + 0xbdf89448, 0x3e267d2c, 0xbfd25aa1, 0xbf1e1931, 0xbed5ec65, + 0x3daca1f8, 0xbfce2fb1, 0x3e2d0168, 0xbf585146, 0xbfba409a, + 0xbf5dd297); + + VSET(16, e64, m8); + // -0.0920900511004143, 0.2386858516984947, 0.7068975504949517, + // 0.5997172971219242, 0.7714780386644180, -0.1053493184316212, + // 0.8711121216121871, -0.7388672665065719, 0.0889924652556937, + // 0.3266446452514173, -0.5909707717470494, -0.2733520923877579, + // 0.2365505631181986, 0.9616545156279142, -0.9315790291358075, + // -0.8056559777055108 + VLOAD_64(v16, 0xbfb79336adc36440, 0x3fce8d420b880e70, 0x3fe69ee79c9ff24a, + 0x3fe330e2543f7e66, 0x3fe8aff2b634ab34, 0xbfbaf82c4551d810, + 0x3febe026872f2710, 0xbfe7a4ccf737616c, 0x3fb6c835cfdd1640, + 0x3fd4e7bef1312ccc, 0xbfe2e93b89317464, 0xbfd17e99c6464f50, + 0x3fce4749f238b5c0, 0x3feec5dfb0d5860a, 0xbfedcf7ed2f8e31e, + 0xbfe9c7ef0b824e6e); + double dscalar_64; + // -0.4500891854782252 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfdcce42defa6264); + asm volatile("vfrsub.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // -0.3579991343778108, -0.6887750371767198, -1.1569867359731769, + // -1.0498064826001494, -1.2215672241426432, -0.3447398670466040, + // -1.3212013070904123, 0.2887780810283467, -0.5390816507339189, + // -0.7767338307296425, 0.1408815862688242, -0.1767370930904673, + // -0.6866397485964237, -1.4117437011061393, 0.4814898436575823, + // 0.3555667922272856 + VCMP_U64(3, v8, 0xbfd6e97533898954, 0xbfe60a71f25f34ce, 0xbff28304860e91be, + 0xbff0cc01e1de57cc, 0xbff38b8a12d8ee33, 0xbfd61037cda5ec60, + 0xbff523a3fb562c21, 0x3fd27b570f746074, 0xbfe140282978d3fa, + 0xbfe8db00e815c798, 0x3fc2086866d10cc8, 0xbfc69f5231682628, + 0xbfe5f8f3ec0b5ea2, 0xbff6968090295b9e, 0x3fded0bac6f763d8, + 0x3fd6c19b380a3a78); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.0273, -0.8511, 0.7173, 0.9551, -0.7842, -0.6509, + // -0.5771, 0.6060, -0.5361, 0.6099, 0.2859, 0.6318, + // -0.9521, 0.3818, 0.2783, -0.7905 + VLOAD_16(v4, 0xa700, 0xbacf, 0x39bd, 0x3ba4, 0xba46, 0xb935, 0xb89e, 0x38d9, + 0xb84a, 0x38e1, 0x3493, 0x390e, 0xbb9e, 0x361c, 0x3474, 0xba53); + double dscalar_16; + // 0.3062 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x34e6); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfrsub.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 1.1572, 0.0000, -0.6489, 0.0000, 0.9570, 0.0000, + // -0.2998, 0.0000, -0.3037, 0.0000, -0.3257, 0.0000, + // -0.0757, 0.0000, 1.0967 + VCMP_U16(4, v2, 0x0, 0x3ca1, 0x0, 0xb931, 0x0, 0x3ba8, 0x0, 0xb4cc, 0x0, + 0xb4dc, 0x0, 0xb536, 0x0, 0xacd8, 0x0, 0x3c63); + + VSET(16, e32, m4); + // 0.61218858, 0.50298065, 0.82400811, -0.50508654, + // -0.08447543, -0.66344708, -0.94741052, 0.85856712, + // -0.16725175, -0.36700448, -0.86911696, 0.82600677, + // -0.95377433, 0.06016647, 0.67027277, 0.08167093 + VLOAD_32(v8, 0x3f1cb864, 0x3f00c357, 0x3f52f232, 0xbf014d5a, 0xbdad0174, + 0xbf29d7ab, 0xbf72897f, 0x3f5bcb0e, 0xbe2b440b, 0xbebbe803, + 0xbf5e7e73, 0x3f53752e, 0xbf742a8e, 0x3d76711d, 0x3f2b96ff, + 0x3da74316); + double dscalar_32; + // -0.78482366 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf48ea34); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfrsub.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, -1.28780437, 0.00000000, -0.27973711, + // 0.00000000, -0.12137657, 0.00000000, -1.64339077, + // 0.00000000, -0.41781917, 0.00000000, -1.61083043, + // 0.00000000, -0.84499013, 0.00000000, -0.86649460 + VCMP_U32(5, v4, 0x0, 0xbfa4d6c6, 0x0, 0xbe8f39b4, 0x0, 0xbdf89448, 0x0, + 0xbfd25aa1, 0x0, 0xbed5ec65, 0x0, 0xbfce2fb1, 0x0, 0xbf585146, 0x0, + 0xbf5dd297); + + VSET(16, e64, m8); + // -0.0920900511004143, 0.2386858516984947, + // 0.7068975504949517, 0.5997172971219242, 0.7714780386644180, + // -0.1053493184316212, 0.8711121216121871, + // -0.7388672665065719, 0.0889924652556937, + // 0.3266446452514173, -0.5909707717470494, + // -0.2733520923877579, 0.2365505631181986, + // 0.9616545156279142, -0.9315790291358075, -0.8056559777055108 + VLOAD_64(v16, 0xbfb79336adc36440, 0x3fce8d420b880e70, 0x3fe69ee79c9ff24a, + 0x3fe330e2543f7e66, 0x3fe8aff2b634ab34, 0xbfbaf82c4551d810, + 0x3febe026872f2710, 0xbfe7a4ccf737616c, 0x3fb6c835cfdd1640, + 0x3fd4e7bef1312ccc, 0xbfe2e93b89317464, 0xbfd17e99c6464f50, + 0x3fce4749f238b5c0, 0x3feec5dfb0d5860a, 0xbfedcf7ed2f8e31e, + 0xbfe9c7ef0b824e6e); + double dscalar_64; + // -0.4500891854782252 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfdcce42defa6264); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfrsub.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, -0.6887750371767198, 0.0000000000000000, + // -1.0498064826001494, 0.0000000000000000, + // -0.3447398670466040, 0.0000000000000000, + // 0.2887780810283467, 0.0000000000000000, + // -0.7767338307296425, 0.0000000000000000, + // -0.1767370930904673, 0.0000000000000000, + // -1.4117437011061393, 0.0000000000000000, 0.3555667922272856 + VCMP_U64(6, v8, 0x0, 0xbfe60a71f25f34ce, 0x0, 0xbff0cc01e1de57cc, 0x0, + 0xbfd61037cda5ec60, 0x0, 0x3fd27b570f746074, 0x0, 0xbfe8db00e815c798, + 0x0, 0xbfc69f5231682628, 0x0, 0xbff6968090295b9e, 0x0, + 0x3fd6c19b380a3a78); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnj.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnj.c new file mode 100644 index 000000000..f69ea8d24 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnj.c @@ -0,0 +1,408 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.3784, 0.9043, -0.4600, -0.6748, 0.4448, 0.8804, 0.1497, + // 0.7285, 0.9927, 0.9922, 0.8965, 0.8672, -0.1860, 0.9336, + // -0.2959, 0.9668 + VLOAD_16(v4, 0x360e, 0x3b3c, 0xb75c, 0xb966, 0x371e, 0x3b0b, 0x30ca, 0x39d4, + 0x3bf1, 0x3bf0, 0x3b2c, 0x3af0, 0xb1f4, 0x3b78, 0xb4bc, 0x3bbc); + // -0.7988, -0.5054, -0.9380, -0.7383, -0.7168, 0.2181, -0.1597, + // 0.1833, 0.0045, -0.2152, 0.1919, -0.6914, 0.1748, -0.8604, + // 0.6084, 0.1591 + VLOAD_16(v6, 0xba64, 0xb80b, 0xbb81, 0xb9e8, 0xb9bc, 0x32fb, 0xb11c, 0x31de, + 0x1c8f, 0xb2e3, 0x3224, 0xb988, 0x3198, 0xbae2, 0x38de, 0x3117); + asm volatile("vfsgnj.vv v2, v4, v6"); + // -0.3784, -0.9043, -0.4600, -0.6748, -0.4448, 0.8804, -0.1497, + // 0.7285, 0.9927, -0.9922, 0.8965, -0.8672, 0.1860, -0.9336, + // 0.2959, 0.9668 + VCMP_U16(1, v2, 0xb60e, 0xbb3c, 0xb75c, 0xb966, 0xb71e, 0x3b0b, 0xb0ca, + 0x39d4, 0x3bf1, 0xbbf0, 0x3b2c, 0xbaf0, 0x31f4, 0xbb78, 0x34bc, + 0x3bbc); + + VSET(16, e32, m4); + // 0.30226409, 0.06318295, -0.82590002, -0.17829193, + // 0.45379546, 0.85831785, -0.43186289, -0.32250872, + // 0.35404092, -0.55081791, 0.09124859, -0.13254598, + // 0.95786512, 0.95395225, 0.19890578, 0.76956910 + VLOAD_32(v8, 0x3e9ac25c, 0x3d816610, 0xbf536e2f, 0xbe369229, 0x3ee857e1, + 0x3f5bbab8, 0xbedd1d22, 0xbea51fdd, 0x3eb544da, 0xbf0d0267, + 0x3dbae08b, 0xbe07ba22, 0x3f7536a6, 0x3f743637, 0x3e4badf5, + 0x3f45027b); + // 0.06560040, 0.31805936, 0.14663234, -0.85004497, + // -0.49171701, 0.32139263, -0.09995110, -0.34368968, + // 0.33917251, 0.07372360, 0.70147520, 0.82915747, + // -0.14581841, -0.19974701, -0.58837658, 0.95794803 + VLOAD_32(v12, 0x3d865981, 0x3ea2d8ad, 0x3e1626ca, 0xbf599c8c, 0xbefbc255, + 0x3ea48d93, 0xbdccb329, 0xbeaff818, 0x3eada805, 0x3d96fc66, + 0x3f3393e1, 0x3f5443aa, 0xbe15516c, 0xbe4c8a7b, 0xbf169fd9, + 0x3f753c15); + asm volatile("vfsgnj.vv v4, v8, v12"); + // 0.30226409, 0.06318295, 0.82590002, -0.17829193, + // -0.45379546, 0.85831785, -0.43186289, -0.32250872, + // 0.35404092, 0.55081791, 0.09124859, 0.13254598, + // -0.95786512, -0.95395225, -0.19890578, 0.76956910 + VCMP_U32(2, v4, 0x3e9ac25c, 0x3d816610, 0x3f536e2f, 0xbe369229, 0xbee857e1, + 0x3f5bbab8, 0xbedd1d22, 0xbea51fdd, 0x3eb544da, 0x3f0d0267, + 0x3dbae08b, 0x3e07ba22, 0xbf7536a6, 0xbf743637, 0xbe4badf5, + 0x3f45027b); + + VSET(16, e64, m8); + // -0.1900636538602862, -0.9484843154859770, 0.5869658512198073, + // 0.4707187701595239, 0.1954104859873083, 0.0486819373954939, + // -0.1899986048192088, -0.1837438621239862, 0.2694105234528963, + // -0.7960262036276018, 0.6381040017115214, 0.2199215324293253, + // 0.4219965521278597, -0.6541697303087526, 0.7254411745966671, + // 0.2439726910863504 + VLOAD_64(v16, 0xbfc854017cbe7d20, 0xbfee59fbc778ffbc, 0x3fe2c86c9bdb73b4, + 0x3fde20419edcb428, 0x3fc90335f74e33c8, 0x3fa8ecd6c20a0480, + 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0x3fd13e05a2db6b68, + 0xbfe9790bf1eadde4, 0x3fe46b59155986dc, 0x3fcc266386bc2e10, + 0x3fdb01fdd39a7d9c, 0xbfe4eef55bb6b208, 0x3fe736d06902107a, + 0x3fcf3a7f44aa9f48); + // 0.0713540199640168, 0.3499800646587572, -0.5478360240866667, + // -0.7324007835973676, 0.5646664961108800, + // -0.7430380608733607, -0.5676032662558192, + // -0.7382565525776155, -0.7933198466305424, + // -0.0650991402083496, -0.1766522935757786, + // -0.4663829943595241, -0.1565231028144627, + // -0.0629224333525875, -0.9086692399439535, -0.1206057821437510 + VLOAD_64(v24, 0x3fb24441ce2eff50, 0x3fd66612c8fd8664, 0xbfe187df69e0bb9c, + 0xbfe76fd3c4a3b1e8, 0x3fe211bf78be2e36, 0xbfe7c6f7c1644c86, + 0xbfe229ce53357d20, 0xbfe79fcc34ac1d30, 0xbfe962e04d917824, + 0xbfb0aa5656314cf0, 0xbfc69c8ad7d5ef20, 0xbfddd9380f0bd244, + 0xbfc408f2f3d40a40, 0xbfb01baf416f2160, 0xbfed13d1838e183a, + 0xbfbee005420412c0); + asm volatile("vfsgnj.vv v8, v16, v24"); + // 0.1900636538602862, 0.9484843154859770, -0.5869658512198073, + // -0.4707187701595239, 0.1954104859873083, + // -0.0486819373954939, -0.1899986048192088, + // -0.1837438621239862, -0.2694105234528963, + // -0.7960262036276018, -0.6381040017115214, + // -0.2199215324293253, -0.4219965521278597, + // -0.6541697303087526, -0.7254411745966671, -0.2439726910863504 + VCMP_U64(3, v8, 0x3fc854017cbe7d20, 0x3fee59fbc778ffbc, 0xbfe2c86c9bdb73b4, + 0xbfde20419edcb428, 0x3fc90335f74e33c8, 0xbfa8ecd6c20a0480, + 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0xbfd13e05a2db6b68, + 0xbfe9790bf1eadde4, 0xbfe46b59155986dc, 0xbfcc266386bc2e10, + 0xbfdb01fdd39a7d9c, 0xbfe4eef55bb6b208, 0xbfe736d06902107a, + 0xbfcf3a7f44aa9f48); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.3784, 0.9043, -0.4600, -0.6748, 0.4448, 0.8804, 0.1497, + // 0.7285, 0.9927, 0.9922, 0.8965, 0.8672, -0.1860, 0.9336, + // -0.2959, 0.9668 + VLOAD_16(v4, 0x360e, 0x3b3c, 0xb75c, 0xb966, 0x371e, 0x3b0b, 0x30ca, 0x39d4, + 0x3bf1, 0x3bf0, 0x3b2c, 0x3af0, 0xb1f4, 0x3b78, 0xb4bc, 0x3bbc); + // -0.7988, -0.5054, -0.9380, -0.7383, -0.7168, 0.2181, -0.1597, + // 0.1833, 0.0045, -0.2152, 0.1919, -0.6914, 0.1748, -0.8604, + // 0.6084, 0.1591 + VLOAD_16(v6, 0xba64, 0xb80b, 0xbb81, 0xb9e8, 0xb9bc, 0x32fb, 0xb11c, 0x31de, + 0x1c8f, 0xb2e3, 0x3224, 0xb988, 0x3198, 0xbae2, 0x38de, 0x3117); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsgnj.vv v2, v4, v6, v0.t"); + // 0.0000, -0.9043, 0.0000, -0.6748, 0.0000, 0.8804, 0.0000, + // 0.7285, 0.0000, -0.9922, 0.0000, -0.8672, 0.0000, -0.9336, + // 0.0000, 0.9668 + VCMP_U16(4, v2, 0x0, 0xbb3c, 0x0, 0xb966, 0x0, 0x3b0b, 0x0, 0x39d4, 0x0, + 0xbbf0, 0x0, 0xbaf0, 0x0, 0xbb78, 0x0, 0x3bbc); + + VSET(16, e32, m4); + // 0.30226409, 0.06318295, -0.82590002, -0.17829193, + // 0.45379546, 0.85831785, -0.43186289, -0.32250872, + // 0.35404092, -0.55081791, 0.09124859, -0.13254598, + // 0.95786512, 0.95395225, 0.19890578, 0.76956910 + VLOAD_32(v8, 0x3e9ac25c, 0x3d816610, 0xbf536e2f, 0xbe369229, 0x3ee857e1, + 0x3f5bbab8, 0xbedd1d22, 0xbea51fdd, 0x3eb544da, 0xbf0d0267, + 0x3dbae08b, 0xbe07ba22, 0x3f7536a6, 0x3f743637, 0x3e4badf5, + 0x3f45027b); + // 0.06560040, 0.31805936, 0.14663234, -0.85004497, + // -0.49171701, 0.32139263, -0.09995110, -0.34368968, + // 0.33917251, 0.07372360, 0.70147520, 0.82915747, + // -0.14581841, -0.19974701, -0.58837658, 0.95794803 + VLOAD_32(v12, 0x3d865981, 0x3ea2d8ad, 0x3e1626ca, 0xbf599c8c, 0xbefbc255, + 0x3ea48d93, 0xbdccb329, 0xbeaff818, 0x3eada805, 0x3d96fc66, + 0x3f3393e1, 0x3f5443aa, 0xbe15516c, 0xbe4c8a7b, 0xbf169fd9, + 0x3f753c15); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsgnj.vv v4, v8, v12, v0.t"); + // 0.00000000, 0.06318295, 0.00000000, -0.17829193, + // 0.00000000, 0.85831785, 0.00000000, -0.32250872, + // 0.00000000, 0.55081791, 0.00000000, 0.13254598, + // 0.00000000, -0.95395225, 0.00000000, 0.76956910 + VCMP_U32(5, v4, 0x0, 0x3d816610, 0x0, 0xbe369229, 0x0, 0x3f5bbab8, 0x0, + 0xbea51fdd, 0x0, 0x3f0d0267, 0x0, 0x3e07ba22, 0x0, 0xbf743637, 0x0, + 0x3f45027b); + + VSET(16, e64, m8); + // -0.1900636538602862, -0.9484843154859770, 0.5869658512198073, + // 0.4707187701595239, 0.1954104859873083, 0.0486819373954939, + // -0.1899986048192088, -0.1837438621239862, 0.2694105234528963, + // -0.7960262036276018, 0.6381040017115214, 0.2199215324293253, + // 0.4219965521278597, -0.6541697303087526, 0.7254411745966671, + // 0.2439726910863504 + VLOAD_64(v16, 0xbfc854017cbe7d20, 0xbfee59fbc778ffbc, 0x3fe2c86c9bdb73b4, + 0x3fde20419edcb428, 0x3fc90335f74e33c8, 0x3fa8ecd6c20a0480, + 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0x3fd13e05a2db6b68, + 0xbfe9790bf1eadde4, 0x3fe46b59155986dc, 0x3fcc266386bc2e10, + 0x3fdb01fdd39a7d9c, 0xbfe4eef55bb6b208, 0x3fe736d06902107a, + 0x3fcf3a7f44aa9f48); + // 0.0713540199640168, 0.3499800646587572, -0.5478360240866667, + // -0.7324007835973676, 0.5646664961108800, + // -0.7430380608733607, -0.5676032662558192, + // -0.7382565525776155, -0.7933198466305424, + // -0.0650991402083496, -0.1766522935757786, + // -0.4663829943595241, -0.1565231028144627, + // -0.0629224333525875, -0.9086692399439535, -0.1206057821437510 + VLOAD_64(v24, 0x3fb24441ce2eff50, 0x3fd66612c8fd8664, 0xbfe187df69e0bb9c, + 0xbfe76fd3c4a3b1e8, 0x3fe211bf78be2e36, 0xbfe7c6f7c1644c86, + 0xbfe229ce53357d20, 0xbfe79fcc34ac1d30, 0xbfe962e04d917824, + 0xbfb0aa5656314cf0, 0xbfc69c8ad7d5ef20, 0xbfddd9380f0bd244, + 0xbfc408f2f3d40a40, 0xbfb01baf416f2160, 0xbfed13d1838e183a, + 0xbfbee005420412c0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsgnj.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, 0.9484843154859770, 0.0000000000000000, + // -0.4707187701595239, 0.0000000000000000, + // -0.0486819373954939, 0.0000000000000000, + // -0.1837438621239862, 0.0000000000000000, + // -0.7960262036276018, 0.0000000000000000, + // -0.2199215324293253, 0.0000000000000000, + // -0.6541697303087526, 0.0000000000000000, -0.2439726910863504 + VCMP_U64(6, v8, 0x0, 0x3fee59fbc778ffbc, 0x0, 0xbfde20419edcb428, 0x0, + 0xbfa8ecd6c20a0480, 0x0, 0xbfc784eb3b54e580, 0x0, 0xbfe9790bf1eadde4, + 0x0, 0xbfcc266386bc2e10, 0x0, 0xbfe4eef55bb6b208, 0x0, + 0xbfcf3a7f44aa9f48); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.9023 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3b38); + // 0.5586, 0.0221, 0.7397, 0.9844, -0.1426, 0.6958, 0.0319, + // 0.3943, -0.5425, 0.9814, 0.7852, -0.7271, -0.1810, -0.7485, + // -0.3499, -0.2178 + VLOAD_16(v4, 0x3878, 0x25a7, 0x39eb, 0x3be0, 0xb090, 0x3991, 0x2816, 0x364f, + 0xb857, 0x3bda, 0x3a48, 0xb9d1, 0xb1cb, 0xb9fd, 0xb599, 0xb2f8); + asm volatile("vfsgnj.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // 0.5586, 0.0221, 0.7397, 0.9844, 0.1426, 0.6958, 0.0319, + // 0.3943, 0.5425, 0.9814, 0.7852, 0.7271, 0.1810, 0.7485, + // 0.3499, 0.2178 + VCMP_U16(7, v2, 0x3878, 0x25a7, 0x39eb, 0x3be0, 0x3090, 0x3991, 0x2816, + 0x364f, 0x3857, 0x3bda, 0x3a48, 0x39d1, 0x31cb, 0x39fd, 0x3599, + 0x32f8); + + VSET(16, e32, m4); + double dscalar_32; + // 0.64529878 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f25324d); + // 0.27794743, 0.64720273, 0.88201439, -0.27750894, + // -0.02381280, -0.27677080, -0.58998328, 0.15329099, + // 0.52908343, -0.63265759, 0.48432603, 0.70191479, + // -0.55785930, 0.34719029, -0.06872076, -0.69960916 + VLOAD_32(v8, 0x3e8e4f20, 0x3f25af14, 0x3f61cbb2, 0xbe8e15a7, 0xbcc31310, + 0xbe8db4e7, 0xbf170925, 0x3e1cf850, 0x3f077203, 0xbf21f5d9, + 0x3ef7f995, 0x3f33b0b0, 0xbf0ecfde, 0x3eb1c2ed, 0xbd8cbd78, + 0xbf331996); + asm volatile("vfsgnj.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // 0.27794743, 0.64720273, 0.88201439, 0.27750894, + // 0.02381280, 0.27677080, 0.58998328, 0.15329099, + // 0.52908343, 0.63265759, 0.48432603, 0.70191479, + // 0.55785930, 0.34719029, 0.06872076, 0.69960916 + VCMP_U32(8, v4, 0x3e8e4f20, 0x3f25af14, 0x3f61cbb2, 0x3e8e15a7, 0x3cc31310, + 0x3e8db4e7, 0x3f170925, 0x3e1cf850, 0x3f077203, 0x3f21f5d9, + 0x3ef7f995, 0x3f33b0b0, 0x3f0ecfde, 0x3eb1c2ed, 0x3d8cbd78, + 0x3f331996); + + VSET(16, e64, m8); + double dscalar_64; + // 0.4863995754678485 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fdf212baf5b0d68); + // 0.4577518787562838, -0.0989909265811582, + // -0.5406373582107198, -0.6896639688670565, + // 0.9053190721589099, -0.7617679756965072, 0.4649312111760273, + // 0.6917063611214438, 0.2205644023843889, 0.1217272698758698, + // -0.3345487709580650, 0.1693366988903542, + // 0.4095982059989967, 0.7157757577569959, + // -0.5339346851091937, 0.4946553559543683 + VLOAD_64(v16, 0x3fdd4bce893c3600, 0xbfb9577828444dc0, 0xbfe14ce6b790591e, + 0xbfe611ba2bf06f2a, 0x3fecf85fb3ebc33c, 0xbfe860673bd8363e, + 0x3fddc16ed6b90158, 0x3fe6227560ee74e0, 0x3fcc3b744f738cd0, + 0x3fbf2984b325f230, 0xbfd5693f3f8ba3fc, 0x3fc5acd32fdf92e8, + 0x3fda36db64d10584, 0x3fe6e7a28fdabfd2, 0xbfe115fe3157cf38, + 0x3fdfa86ef0276044); + asm volatile("vfsgnj.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // 0.4577518787562838, 0.0989909265811582, 0.5406373582107198, + // 0.6896639688670565, 0.9053190721589099, 0.7617679756965072, + // 0.4649312111760273, 0.6917063611214438, 0.2205644023843889, + // 0.1217272698758698, 0.3345487709580650, 0.1693366988903542, + // 0.4095982059989967, 0.7157757577569959, 0.5339346851091937, + // 0.4946553559543683 + VCMP_U64(9, v8, 0x3fdd4bce893c3600, 0x3fb9577828444dc0, 0x3fe14ce6b790591e, + 0x3fe611ba2bf06f2a, 0x3fecf85fb3ebc33c, 0x3fe860673bd8363e, + 0x3fddc16ed6b90158, 0x3fe6227560ee74e0, 0x3fcc3b744f738cd0, + 0x3fbf2984b325f230, 0x3fd5693f3f8ba3fc, 0x3fc5acd32fdf92e8, + 0x3fda36db64d10584, 0x3fe6e7a28fdabfd2, 0x3fe115fe3157cf38, + 0x3fdfa86ef0276044); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.9023 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3b38); + // 0.5586, 0.0221, 0.7397, 0.9844, -0.1426, 0.6958, 0.0319, + // 0.3943, -0.5425, 0.9814, 0.7852, -0.7271, -0.1810, + // -0.7485, -0.3499, -0.2178 + VLOAD_16(v4, 0x3878, 0x25a7, 0x39eb, 0x3be0, 0xb090, 0x3991, 0x2816, 0x364f, + 0xb857, 0x3bda, 0x3a48, 0xb9d1, 0xb1cb, 0xb9fd, 0xb599, 0xb2f8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsgnj.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.0221, 0.0000, 0.9844, 0.0000, 0.6958, 0.0000, + // 0.3943, 0.0000, 0.9814, 0.0000, 0.7271, 0.0000, 0.7485, + // 0.0000, 0.2178 + VCMP_U16(10, v2, 0x0, 0x25a7, 0x0, 0x3be0, 0x0, 0x3991, 0x0, 0x364f, 0x0, + 0x3bda, 0x0, 0x39d1, 0x0, 0x39fd, 0x0, 0x32f8); + + VSET(16, e32, m4); + double dscalar_32; + // 0.64529878 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f25324d); + // 0.27794743, 0.64720273, 0.88201439, -0.27750894, + // -0.02381280, -0.27677080, -0.58998328, 0.15329099, + // 0.52908343, -0.63265759, 0.48432603, 0.70191479, + // -0.55785930, 0.34719029, -0.06872076, -0.69960916 + VLOAD_32(v8, 0x3e8e4f20, 0x3f25af14, 0x3f61cbb2, 0xbe8e15a7, 0xbcc31310, + 0xbe8db4e7, 0xbf170925, 0x3e1cf850, 0x3f077203, 0xbf21f5d9, + 0x3ef7f995, 0x3f33b0b0, 0xbf0ecfde, 0x3eb1c2ed, 0xbd8cbd78, + 0xbf331996); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsgnj.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, 0.64720273, 0.00000000, 0.27750894, + // 0.00000000, 0.27677080, 0.00000000, 0.15329099, + // 0.00000000, 0.63265759, 0.00000000, 0.70191479, + // 0.00000000, 0.34719029, 0.00000000, 0.69960916 + VCMP_U32(11, v4, 0x0, 0x3f25af14, 0x0, 0x3e8e15a7, 0x0, 0x3e8db4e7, 0x0, + 0x3e1cf850, 0x0, 0x3f21f5d9, 0x0, 0x3f33b0b0, 0x0, 0x3eb1c2ed, 0x0, + 0x3f331996); + + VSET(16, e64, m8); + double dscalar_64; + // 0.4863995754678485 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fdf212baf5b0d68); + // 0.4577518787562838, -0.0989909265811582, + // -0.5406373582107198, -0.6896639688670565, + // 0.9053190721589099, -0.7617679756965072, + // 0.4649312111760273, 0.6917063611214438, + // 0.2205644023843889, 0.1217272698758698, + // -0.3345487709580650, 0.1693366988903542, + // 0.4095982059989967, 0.7157757577569959, + // -0.5339346851091937, 0.4946553559543683 + VLOAD_64(v16, 0x3fdd4bce893c3600, 0xbfb9577828444dc0, 0xbfe14ce6b790591e, + 0xbfe611ba2bf06f2a, 0x3fecf85fb3ebc33c, 0xbfe860673bd8363e, + 0x3fddc16ed6b90158, 0x3fe6227560ee74e0, 0x3fcc3b744f738cd0, + 0x3fbf2984b325f230, 0xbfd5693f3f8ba3fc, 0x3fc5acd32fdf92e8, + 0x3fda36db64d10584, 0x3fe6e7a28fdabfd2, 0xbfe115fe3157cf38, + 0x3fdfa86ef0276044); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsgnj.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, 0.0989909265811582, 0.0000000000000000, + // 0.6896639688670565, 0.0000000000000000, 0.7617679756965072, + // 0.0000000000000000, 0.6917063611214438, 0.0000000000000000, + // 0.1217272698758698, 0.0000000000000000, 0.1693366988903542, + // 0.0000000000000000, 0.7157757577569959, 0.0000000000000000, + // 0.4946553559543683 + VCMP_U64(12, v8, 0x0, 0x3fb9577828444dc0, 0x0, 0x3fe611ba2bf06f2a, 0x0, + 0x3fe860673bd8363e, 0x0, 0x3fe6227560ee74e0, 0x0, 0x3fbf2984b325f230, + 0x0, 0x3fc5acd32fdf92e8, 0x0, 0x3fe6e7a28fdabfd2, 0x0, + 0x3fdfa86ef0276044); +}; + +// The sign injection should work with NaNs and special values, and should not +// raise any exceptions +void TEST_CASE5(void) { + CLEAR_FFLAGS; + VSET(16, e16, m2); + CHECK_FFLAGS(0); + VLOAD_16(v4, 0x0000, 0x3b3c, 0xb75c, 0x7fff, 0x371e, 0x3b0b, 0x30ca, 0x39d4, + 0x3bf1, 0x3bf0, 0x0000, 0x3af0, 0xb1f4, 0x3b78, 0xb4bc, 0x3bbc); + VLOAD_16(v6, 0x8000, 0xffff, 0xffff, 0xb9e8, 0xb9bc, 0x7fff, 0xb11c, 0x31de, + 0x1c8f, 0xb2e3, 0x7fff, 0xb988, 0x3198, 0xbae2, 0x38de, 0x3117); + asm volatile("vfsgnj.vv v2, v4, v6"); + VCMP_U16(13, v2, 0x8000, 0xbb3c, 0xb75c, 0xffff, 0xb71e, 0x3b0b, 0xb0ca, + 0x39d4, 0x3bf1, 0xbbf0, 0x0000, 0xbaf0, 0x31f4, 0xbb78, 0x34bc, + 0x3bbc); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x00000000, 0x3d816610, 0xbf536e2f, 0xbe369229, 0x3ee857e1, + 0x7fffffff, 0x80000000, 0xbea51fdd, 0x3eb544da, 0xbf0d0267, + 0x3dbae08b, 0xbe07ba22, 0x3f7536a6, 0x3f743637, 0x3e4badf5, + 0x3f45027b); + VLOAD_32(v12, 0x80000000, 0x7fffffff, 0x3e1626ca, 0xffffffff, 0xbefbc255, + 0x7fffffff, 0xffffffff, 0xbeaff818, 0x3eada805, 0x3d96fc66, + 0x3f3393e1, 0x3f5443aa, 0xbe15516c, 0xbe4c8a7b, 0xbf169fd9, + 0x3f753c15); + asm volatile("vfsgnj.vv v4, v8, v12"); + VCMP_U32(14, v4, 0x80000000, 0x3d816610, 0x3f536e2f, 0xbe369229, 0xbee857e1, + 0x7fffffff, 0x80000000, 0xbea51fdd, 0x3eb544da, 0x3f0d0267, + 0x3dbae08b, 0x3e07ba22, 0xbf7536a6, 0xbf743637, 0xbe4badf5, + 0x3f45027b); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000000, 0xbfee59fbc778ffbc, 0x7fffffffffffffff, + 0x3fde20419edcb428, 0x8000000000000000, 0x3fa8ecd6c20a0480, + 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0x3fd13e05a2db6b68, + 0xbfe9790bf1eadde4, 0x3fe46b59155986dc, 0x3fcc266386bc2e10, + 0x3fdb01fdd39a7d9c, 0xbfe4eef55bb6b208, 0x3fe736d06902107a, + 0x3fcf3a7f44aa9f48); + VLOAD_64(v24, 0x8000000000000000, 0x7fffffffffffffff, 0xbfe187df69e0bb9c, + 0xbfe76fd3c4a3b1e8, 0x0000000000000001, 0xbfe7c6f7c1644c86, + 0xbfe229ce53357d20, 0xbfe79fcc34ac1d30, 0xbfe962e04d917824, + 0xbfb0aa5656314cf0, 0xbfc69c8ad7d5ef20, 0xbfddd9380f0bd244, + 0xbfc408f2f3d40a40, 0xbfb01baf416f2160, 0xbfed13d1838e183a, + 0xbfbee005420412c0); + asm volatile("vfsgnj.vv v8, v16, v24"); + VCMP_U64(15, v8, 0x8000000000000000, 0x3fee59fbc778ffbc, 0xffffffffffffffff, + 0xbfde20419edcb428, 0x0000000000000000, 0xbfa8ecd6c20a0480, + 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0xbfd13e05a2db6b68, + 0xbfe9790bf1eadde4, 0xbfe46b59155986dc, 0xbfcc266386bc2e10, + 0xbfdb01fdd39a7d9c, 0xbfe4eef55bb6b208, 0xbfe736d06902107a, + 0xbfcf3a7f44aa9f48); + CHECK_FFLAGS(0); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjn.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjn.c new file mode 100644 index 000000000..f07d8e90d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjn.c @@ -0,0 +1,350 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.5278, -0.6548, 0.2776, 0.8730, 0.2180, 0.6172, -0.8408, + // 0.9922, 0.4250, 0.7393, 0.2549, 0.4998, 0.4609, -0.6348, + // 0.1127, -0.1804 + VLOAD_16(v4, 0x3839, 0xb93d, 0x3471, 0x3afc, 0x32fa, 0x38f0, 0xbaba, 0x3bf0, + 0x36cd, 0x39ea, 0x3414, 0x37ff, 0x3760, 0xb914, 0x2f36, 0xb1c6); + // -0.6348, -0.4368, -0.1896, 0.9419, -0.6108, -0.3594, -0.5166, + // -0.1266, -0.9233, 0.2368, 0.1243, 0.3745, 0.0945, -0.3088, + // 0.0190, -0.6289 + VLOAD_16(v6, 0xb914, 0xb6fd, 0xb211, 0x3b89, 0xb8e3, 0xb5c0, 0xb822, 0xb00d, + 0xbb63, 0x3394, 0x2ff5, 0x35fe, 0x2e0c, 0xb4f1, 0x24da, 0xb908); + asm volatile("vfsgnjn.vv v2, v4, v6"); + // 0.5278, 0.6548, 0.2776, -0.8730, 0.2180, 0.6172, 0.8408, + // 0.9922, 0.4250, -0.7393, -0.2549, -0.4998, -0.4609, 0.6348, + // -0.1127, 0.1804 + VCMP_U16(1, v2, 0x3839, 0x393d, 0x3471, 0xbafc, 0x32fa, 0x38f0, 0x3aba, + 0x3bf0, 0x36cd, 0xb9ea, 0xb414, 0xb7ff, 0xb760, 0x3914, 0xaf36, + 0x31c6); + + VSET(16, e32, m4); + // -0.64186704, 0.87601262, -0.93132722, 0.53574133, + // 0.17954259, -0.80486834, -0.95272040, -0.45182621, 0.20335940, + // 0.96179944, 0.80393785, 0.06180594, 0.86447370, + // -0.24008171, -0.42264909, -0.01868468 + VLOAD_32(v8, 0xbf245166, 0x3f60425d, 0xbf6e6b76, 0x3f092658, 0x3e37da03, + 0xbf4e0bda, 0xbf73e57c, 0xbee755c4, 0x3e503d72, 0x3f76387d, + 0x3f4dcedf, 0x3d7d283b, 0x3f5d4e26, 0xbe75d7fb, 0xbed86576, + 0xbc99109c); + // 0.32984266, -0.78281105, 0.73037797, 0.99060333, + // 0.44768164, 0.66998041, 0.39474848, -0.39895460, + // -0.06065369, 0.53388232, -0.60164928, -0.09839682, + // -0.38704434, 0.47123700, 0.40912241, -0.54495376 + VLOAD_32(v12, 0x3ea8e123, 0xbf48664e, 0x3f3afa0d, 0x3f7d982e, 0x3ee53687, + 0x3f2b83d6, 0x3eca1c79, 0xbecc43c7, 0xbd787002, 0x3f08ac83, + 0xbf1a05b0, 0xbdc98446, 0xbec62aad, 0x3ef145fa, 0x3ed1787e, + 0xbf0b8217); + asm volatile("vfsgnjn.vv v4, v8, v12"); + // -0.64186704, 0.87601262, -0.93132722, -0.53574133, + // -0.17954259, -0.80486834, -0.95272040, 0.45182621, + // 0.20335940, -0.96179944, 0.80393785, 0.06180594, 0.86447370, + // -0.24008171, -0.42264909, 0.01868468 + VCMP_U32(2, v4, 0xbf245166, 0x3f60425d, 0xbf6e6b76, 0xbf092658, 0xbe37da03, + 0xbf4e0bda, 0xbf73e57c, 0x3ee755c4, 0x3e503d72, 0xbf76387d, + 0x3f4dcedf, 0x3d7d283b, 0x3f5d4e26, 0xbe75d7fb, 0xbed86576, + 0x3c99109c); + + VSET(16, e64, m8); + // 0.3054868811191440, -0.2848737407493320, 0.8796894022735833, + // -0.2053728688878902, -0.3336030943630310, 0.2807217618714037, + // 0.4723331455917303, -0.8582398814993568, 0.8015611350975347, + // 0.0545934239457773, 0.8461592442963186, 0.5731810427237676, + // 0.9004228762726765, -0.5815114412549589, -0.4481603571708770, + // -0.3109452697316515 + VLOAD_64(v16, 0x3fd38d18d8f0e180, 0xbfd23b5f12007bec, 0x3fec266a63ace3f8, + 0xbfca49a87dadf9c0, 0xbfd559c0cb088d3c, 0x3fd1f75868a0d7ec, + 0x3fde3ab4cd4887cc, 0xbfeb76b37be53474, 0x3fe9a663899fa232, + 0x3fabf3ab54d8f940, 0x3feb13bc8d2ebe92, 0x3fe2577fc525f1c0, + 0x3fecd043a2c52a30, 0xbfe29bbde1ce1372, 0xbfdcaea8c75a67f8, + 0xbfd3e686fd15f950); + // -0.8601583185162320, -0.2023208019417544, 0.7046992650654684, + // 0.0669209072111863, -0.9495814052980500, 0.4501419112888980, + // 0.1528430256162707, -0.2750771515266404, -0.5539880061109905, + // -0.7302340801247744, 0.1579280396497211, -0.6128023516491234, + // 0.2706272563647967, -0.7982929669593624, -0.2521632643799878, + // 0.9025785865542095 + VLOAD_64(v24, 0xbfeb866abced1b2e, 0xbfc9e5a5e5d1f648, 0x3fe68ce5791f15e4, + 0x3fb121ba83e404a0, 0xbfee62f88b14a294, 0x3fdccf2004e2dd30, + 0x3fc3905c3a38c700, 0xbfd19add326bc2ac, 0xbfe1ba450e13ef3a, + 0xbfe75e13dc91f006, 0x3fc436fc6ab55e68, 0xbfe39c13ad67d608, + 0x3fd151f4fbdf8d78, 0xbfe98b9db136f3e0, 0xbfd023716370f004, + 0x3fece1ec7cea3f5e); + asm volatile("vfsgnjn.vv v8, v16, v24"); + // 0.3054868811191440, 0.2848737407493320, -0.8796894022735833, + // -0.2053728688878902, 0.3336030943630310, + // -0.2807217618714037, -0.4723331455917303, 0.8582398814993568, + // 0.8015611350975347, 0.0545934239457773, -0.8461592442963186, + // 0.5731810427237676, -0.9004228762726765, 0.5815114412549589, + // 0.4481603571708770, -0.3109452697316515 + VCMP_U64(3, v8, 0x3fd38d18d8f0e180, 0x3fd23b5f12007bec, 0xbfec266a63ace3f8, + 0xbfca49a87dadf9c0, 0x3fd559c0cb088d3c, 0xbfd1f75868a0d7ec, + 0xbfde3ab4cd4887cc, 0x3feb76b37be53474, 0x3fe9a663899fa232, + 0x3fabf3ab54d8f940, 0xbfeb13bc8d2ebe92, 0x3fe2577fc525f1c0, + 0xbfecd043a2c52a30, 0x3fe29bbde1ce1372, 0x3fdcaea8c75a67f8, + 0xbfd3e686fd15f950); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.5278, -0.6548, 0.2776, 0.8730, 0.2180, 0.6172, -0.8408, + // 0.9922, 0.4250, 0.7393, 0.2549, 0.4998, 0.4609, -0.6348, + // 0.1127, -0.1804 + VLOAD_16(v4, 0x3839, 0xb93d, 0x3471, 0x3afc, 0x32fa, 0x38f0, 0xbaba, 0x3bf0, + 0x36cd, 0x39ea, 0x3414, 0x37ff, 0x3760, 0xb914, 0x2f36, 0xb1c6); + // -0.6348, -0.4368, -0.1896, 0.9419, -0.6108, -0.3594, -0.5166, + // -0.1266, -0.9233, 0.2368, 0.1243, 0.3745, 0.0945, -0.3088, + // 0.0190, -0.6289 + VLOAD_16(v6, 0xb914, 0xb6fd, 0xb211, 0x3b89, 0xb8e3, 0xb5c0, 0xb822, 0xb00d, + 0xbb63, 0x3394, 0x2ff5, 0x35fe, 0x2e0c, 0xb4f1, 0x24da, 0xb908); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsgnjn.vv v2, v4, v6, v0.t"); + // 0.0000, 0.6548, 0.0000, -0.8730, 0.0000, 0.6172, 0.0000, + // 0.9922, 0.0000, -0.7393, 0.0000, -0.4998, 0.0000, 0.6348, + // 0.0000, 0.1804 + VCMP_U16(4, v2, 0x0, 0x393d, 0x0, 0xbafc, 0x0, 0x38f0, 0x0, 0x3bf0, 0x0, + 0xb9ea, 0x0, 0xb7ff, 0x0, 0x3914, 0x0, 0x31c6); + + VSET(16, e32, m4); + // -0.64186704, 0.87601262, -0.93132722, 0.53574133, + // 0.17954259, -0.80486834, -0.95272040, -0.45182621, 0.20335940, + // 0.96179944, 0.80393785, 0.06180594, 0.86447370, + // -0.24008171, -0.42264909, -0.01868468 + VLOAD_32(v8, 0xbf245166, 0x3f60425d, 0xbf6e6b76, 0x3f092658, 0x3e37da03, + 0xbf4e0bda, 0xbf73e57c, 0xbee755c4, 0x3e503d72, 0x3f76387d, + 0x3f4dcedf, 0x3d7d283b, 0x3f5d4e26, 0xbe75d7fb, 0xbed86576, + 0xbc99109c); + // 0.32984266, -0.78281105, 0.73037797, 0.99060333, + // 0.44768164, 0.66998041, 0.39474848, -0.39895460, + // -0.06065369, 0.53388232, -0.60164928, -0.09839682, + // -0.38704434, 0.47123700, 0.40912241, -0.54495376 + VLOAD_32(v12, 0x3ea8e123, 0xbf48664e, 0x3f3afa0d, 0x3f7d982e, 0x3ee53687, + 0x3f2b83d6, 0x3eca1c79, 0xbecc43c7, 0xbd787002, 0x3f08ac83, + 0xbf1a05b0, 0xbdc98446, 0xbec62aad, 0x3ef145fa, 0x3ed1787e, + 0xbf0b8217); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsgnjn.vv v4, v8, v12, v0.t"); + // 0.00000000, 0.87601262, 0.00000000, -0.53574133, + // 0.00000000, -0.80486834, 0.00000000, 0.45182621, + // 0.00000000, -0.96179944, 0.00000000, 0.06180594, + // 0.00000000, -0.24008171, 0.00000000, 0.01868468 + VCMP_U32(5, v4, 0x0, 0x3f60425d, 0x0, 0xbf092658, 0x0, 0xbf4e0bda, 0x0, + 0x3ee755c4, 0x0, 0xbf76387d, 0x0, 0x3d7d283b, 0x0, 0xbe75d7fb, 0x0, + 0x3c99109c); + + VSET(16, e64, m8); + // 0.3054868811191440, -0.2848737407493320, 0.8796894022735833, + // -0.2053728688878902, -0.3336030943630310, 0.2807217618714037, + // 0.4723331455917303, -0.8582398814993568, 0.8015611350975347, + // 0.0545934239457773, 0.8461592442963186, 0.5731810427237676, + // 0.9004228762726765, -0.5815114412549589, -0.4481603571708770, + // -0.3109452697316515 + VLOAD_64(v16, 0x3fd38d18d8f0e180, 0xbfd23b5f12007bec, 0x3fec266a63ace3f8, + 0xbfca49a87dadf9c0, 0xbfd559c0cb088d3c, 0x3fd1f75868a0d7ec, + 0x3fde3ab4cd4887cc, 0xbfeb76b37be53474, 0x3fe9a663899fa232, + 0x3fabf3ab54d8f940, 0x3feb13bc8d2ebe92, 0x3fe2577fc525f1c0, + 0x3fecd043a2c52a30, 0xbfe29bbde1ce1372, 0xbfdcaea8c75a67f8, + 0xbfd3e686fd15f950); + // -0.8601583185162320, -0.2023208019417544, 0.7046992650654684, + // 0.0669209072111863, -0.9495814052980500, 0.4501419112888980, + // 0.1528430256162707, -0.2750771515266404, -0.5539880061109905, + // -0.7302340801247744, 0.1579280396497211, -0.6128023516491234, + // 0.2706272563647967, -0.7982929669593624, -0.2521632643799878, + // 0.9025785865542095 + VLOAD_64(v24, 0xbfeb866abced1b2e, 0xbfc9e5a5e5d1f648, 0x3fe68ce5791f15e4, + 0x3fb121ba83e404a0, 0xbfee62f88b14a294, 0x3fdccf2004e2dd30, + 0x3fc3905c3a38c700, 0xbfd19add326bc2ac, 0xbfe1ba450e13ef3a, + 0xbfe75e13dc91f006, 0x3fc436fc6ab55e68, 0xbfe39c13ad67d608, + 0x3fd151f4fbdf8d78, 0xbfe98b9db136f3e0, 0xbfd023716370f004, + 0x3fece1ec7cea3f5e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsgnjn.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, 0.2848737407493320, 0.0000000000000000, + // -0.2053728688878902, 0.0000000000000000, + // -0.2807217618714037, 0.0000000000000000, 0.8582398814993568, + // 0.0000000000000000, 0.0545934239457773, 0.0000000000000000, + // 0.5731810427237676, 0.0000000000000000, 0.5815114412549589, + // 0.0000000000000000, -0.3109452697316515 + VCMP_U64(6, v8, 0x0, 0x3fd23b5f12007bec, 0x0, 0xbfca49a87dadf9c0, 0x0, + 0xbfd1f75868a0d7ec, 0x0, 0x3feb76b37be53474, 0x0, 0x3fabf3ab54d8f940, + 0x0, 0x3fe2577fc525f1c0, 0x0, 0x3fe29bbde1ce1372, 0x0, + 0xbfd3e686fd15f950); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.6143 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb8ea); + // -0.9351, 0.6538, -0.6743, -0.4695, -0.1439, 0.6250, -0.1511, + // -0.7476, 0.8496, 0.6279, 0.5234, 0.2610, 0.6299, -0.0123, + // -0.9995, -0.3872 + VLOAD_16(v4, 0xbb7b, 0x393b, 0xb965, 0xb783, 0xb09b, 0x3900, 0xb0d6, 0xb9fb, + 0x3acc, 0x3906, 0x3830, 0x342d, 0x390a, 0xa24d, 0xbbff, 0xb632); + asm volatile("vfsgnjn.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // 0.9351, 0.6538, 0.6743, 0.4695, 0.1439, 0.6250, 0.1511, + // 0.7476, 0.8496, 0.6279, 0.5234, 0.2610, 0.6299, 0.0123, + // 0.9995, 0.3872 + VCMP_U16(7, v2, 0x3b7b, 0x393b, 0x3965, 0x3783, 0x309b, 0x3900, 0x30d6, + 0x39fb, 0x3acc, 0x3906, 0x3830, 0x342d, 0x390a, 0x224d, 0x3bff, + 0x3632); + + VSET(16, e32, m4); + double dscalar_32; + // 0.56259364 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f100623); + // -0.00813205, -0.38676089, 0.94379848, 0.39548567, + // 0.90217608, 0.57424510, 0.05995686, -0.00974263, + // -0.45620662, -0.36967716, -0.56535333, -0.93745488, + // -0.55570704, 0.04399948, -0.57520008, -0.05702910 + VLOAD_32(v8, 0xbc053c4a, 0xbec60586, 0x3f719cc7, 0x3eca7d19, 0x3f66f503, + 0x3f1301ba, 0x3d759554, 0xbc1f9f8d, 0xbee993ea, 0xbebd4653, + 0xbf10baff, 0xbf6ffd0b, 0xbf0e42d1, 0x3d3438cd, 0xbf134050, + 0xbd699758); + asm volatile("vfsgnjn.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // -0.00813205, -0.38676089, -0.94379848, -0.39548567, + // -0.90217608, -0.57424510, -0.05995686, -0.00974263, + // -0.45620662, -0.36967716, -0.56535333, -0.93745488, + // -0.55570704, -0.04399948, -0.57520008, -0.05702910 + VCMP_U32(8, v4, 0xbc053c4a, 0xbec60586, 0xbf719cc7, 0xbeca7d19, 0xbf66f503, + 0xbf1301ba, 0xbd759554, 0xbc1f9f8d, 0xbee993ea, 0xbebd4653, + 0xbf10baff, 0xbf6ffd0b, 0xbf0e42d1, 0xbd3438cd, 0xbf134050, + 0xbd699758); + + VSET(16, e64, m8); + double dscalar_64; + // 0.1909501680714165 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fc8710e1b8426e8); + // -0.2692390874696449, -0.3268380231167121, 0.8386824891028197, + // -0.5650452268361481, -0.6389787807266418, 0.5318945600667211, + // -0.7817543128402196, -0.5679136293897145, + // -0.0001555883762874, 0.5283267089670276, 0.5439688283816015, + // -0.2866314604291811, -0.0576946087921848, 0.7960283598249005, + // -0.8999056473475127, 0.2142070697411482 + VLOAD_64(v16, 0xbfd13b3694df2b24, 0xbfd4eaea07180958, 0x3fead67ca8cd9566, + 0xbfe214d9ba40b584, 0xbfe47283a0c1e25c, 0x3fe10547bd8d051e, + 0xbfe904219ee4fb76, 0xbfe22c593425cec0, 0xbf2464adf9bfe000, + 0x3fe0e80d6a13bbf4, 0x3fe1683150fe2844, 0xbfd2582b7b231344, + 0xbfad8a25d3d5fd40, 0x3fe9791077845df2, 0xbfeccc06ed9afc1e, + 0x3fcb6b23238e1bc8); + asm volatile("vfsgnjn.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // -0.2692390874696449, -0.3268380231167121, -0.8386824891028197, + // -0.5650452268361481, -0.6389787807266418, -0.5318945600667211, + // -0.7817543128402196, -0.5679136293897145, -0.0001555883762874, + // -0.5283267089670276, -0.5439688283816015, -0.2866314604291811, + // -0.0576946087921848, -0.7960283598249005, -0.8999056473475127, + // -0.2142070697411482 + VCMP_U64(9, v8, 0xbfd13b3694df2b24, 0xbfd4eaea07180958, 0xbfead67ca8cd9566, + 0xbfe214d9ba40b584, 0xbfe47283a0c1e25c, 0xbfe10547bd8d051e, + 0xbfe904219ee4fb76, 0xbfe22c593425cec0, 0xbf2464adf9bfe000, + 0xbfe0e80d6a13bbf4, 0xbfe1683150fe2844, 0xbfd2582b7b231344, + 0xbfad8a25d3d5fd40, 0xbfe9791077845df2, 0xbfeccc06ed9afc1e, + 0xbfcb6b23238e1bc8); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.6143 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb8ea); + // -0.9351, 0.6538, -0.6743, -0.4695, -0.1439, 0.6250, + // -0.1511, -0.7476, 0.8496, 0.6279, 0.5234, 0.2610, 0.6299, + // -0.0123, -0.9995, -0.3872 + VLOAD_16(v4, 0xbb7b, 0x393b, 0xb965, 0xb783, 0xb09b, 0x3900, 0xb0d6, 0xb9fb, + 0x3acc, 0x3906, 0x3830, 0x342d, 0x390a, 0xa24d, 0xbbff, 0xb632); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsgnjn.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.6538, 0.0000, 0.4695, 0.0000, 0.6250, 0.0000, + // 0.7476, 0.0000, 0.6279, 0.0000, 0.2610, 0.0000, 0.0123, + // 0.0000, 0.3872 + VCMP_U16(10, v2, 0x0, 0x393b, 0x0, 0x3783, 0x0, 0x3900, 0x0, 0x39fb, 0x0, + 0x3906, 0x0, 0x342d, 0x0, 0x224d, 0x0, 0x3632); + + VSET(16, e32, m4); + double dscalar_32; + // 0.56259364 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f100623); + // -0.00813205, -0.38676089, 0.94379848, 0.39548567, + // 0.90217608, 0.57424510, 0.05995686, -0.00974263, + // -0.45620662, -0.36967716, -0.56535333, -0.93745488, + // -0.55570704, 0.04399948, -0.57520008, -0.05702910 + VLOAD_32(v8, 0xbc053c4a, 0xbec60586, 0x3f719cc7, 0x3eca7d19, 0x3f66f503, + 0x3f1301ba, 0x3d759554, 0xbc1f9f8d, 0xbee993ea, 0xbebd4653, + 0xbf10baff, 0xbf6ffd0b, 0xbf0e42d1, 0x3d3438cd, 0xbf134050, + 0xbd699758); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsgnjn.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, -0.38676089, 0.00000000, -0.39548567, + // 0.00000000, -0.57424510, 0.00000000, -0.00974263, + // 0.00000000, -0.36967716, 0.00000000, -0.93745488, + // 0.00000000, -0.04399948, 0.00000000, -0.05702910 + VCMP_U32(11, v4, 0x0, 0xbec60586, 0x0, 0xbeca7d19, 0x0, 0xbf1301ba, 0x0, + 0xbc1f9f8d, 0x0, 0xbebd4653, 0x0, 0xbf6ffd0b, 0x0, 0xbd3438cd, 0x0, + 0xbd699758); + + VSET(16, e64, m8); + double dscalar_64; + // 0.1909501680714165 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fc8710e1b8426e8); + // -0.2692390874696449, -0.3268380231167121, + // 0.8386824891028197, -0.5650452268361481, + // -0.6389787807266418, 0.5318945600667211, + // -0.7817543128402196, -0.5679136293897145, + // -0.0001555883762874, 0.5283267089670276, + // 0.5439688283816015, -0.2866314604291811, + // -0.0576946087921848, 0.7960283598249005, + // -0.8999056473475127, 0.2142070697411482 + VLOAD_64(v16, 0xbfd13b3694df2b24, 0xbfd4eaea07180958, 0x3fead67ca8cd9566, + 0xbfe214d9ba40b584, 0xbfe47283a0c1e25c, 0x3fe10547bd8d051e, + 0xbfe904219ee4fb76, 0xbfe22c593425cec0, 0xbf2464adf9bfe000, + 0x3fe0e80d6a13bbf4, 0x3fe1683150fe2844, 0xbfd2582b7b231344, + 0xbfad8a25d3d5fd40, 0x3fe9791077845df2, 0xbfeccc06ed9afc1e, + 0x3fcb6b23238e1bc8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsgnjn.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, -0.3268380231167121, 0.0000000000000000, + // -0.5650452268361481, 0.0000000000000000, + // -0.5318945600667211, 0.0000000000000000, + // -0.5679136293897145, 0.0000000000000000, + // -0.5283267089670276, 0.0000000000000000, + // -0.2866314604291811, 0.0000000000000000, + // -0.7960283598249005, 0.0000000000000000, + // -0.2142070697411482 + VCMP_U64(12, v8, 0x0, 0xbfd4eaea07180958, 0x0, 0xbfe214d9ba40b584, 0x0, + 0xbfe10547bd8d051e, 0x0, 0xbfe22c593425cec0, 0x0, 0xbfe0e80d6a13bbf4, + 0x0, 0xbfd2582b7b231344, 0x0, 0xbfe9791077845df2, 0x0, + 0xbfcb6b23238e1bc8); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjx.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjx.c new file mode 100644 index 000000000..2d6ea2cdf --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjx.c @@ -0,0 +1,348 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.0371, 0.8374, 0.7183, 0.8086, -0.8940, 0.4626, 0.5449, + // 0.6831, 0.4661, 0.2981, 0.5615, -0.6167, -0.7075, -0.7603, + // 0.8438, -0.2742 + VLOAD_16(v4, 0xa8c1, 0x3ab3, 0x39bf, 0x3a78, 0xbb27, 0x3767, 0x385c, 0x3977, + 0x3775, 0x34c5, 0x387e, 0xb8ef, 0xb9a9, 0xba15, 0x3ac0, 0xb463); + // 0.3516, 0.7925, -0.5034, -0.1672, 0.0703, -0.9731, 0.0361, + // -0.4077, 0.8965, 0.8242, -0.7822, 0.0265, -0.5361, 0.1226, + // -0.9917, 0.5415 + VLOAD_16(v6, 0x35a0, 0x3a57, 0xb807, 0xb15a, 0x2c7f, 0xbbc9, 0x289f, 0xb686, + 0x3b2c, 0x3a98, 0xba42, 0x26cb, 0xb84a, 0x2fd8, 0xbbef, 0x3855); + asm volatile("vfsgnjx.vv v2, v4, v6"); + // -0.0371, 0.8374, -0.7183, -0.8086, -0.8940, -0.4626, 0.5449, + // -0.6831, 0.4661, 0.2981, -0.5615, -0.6167, 0.7075, -0.7603, + // -0.8438, -0.2742 + VCMP_U16(1, v2, 0xa8c1, 0x3ab3, 0xb9bf, 0xba78, 0xbb27, 0xb767, 0x385c, + 0xb977, 0x3775, 0x34c5, 0xb87e, 0xb8ef, 0x39a9, 0xba15, 0xbac0, + 0xb463); + + VSET(16, e32, m4); + // -0.00918692, -0.23372029, 0.42919466, 0.95128548, + // 0.05014091, 0.08194520, 0.65458435, 0.38167605, + // -0.52784044, 0.46330592, 0.66792834, 0.94584799, + // -0.11679628, 0.12139154, 0.61421394, -0.71422517 + VLOAD_32(v8, 0xbc1684ba, 0xbe6f545f, 0x3edbbf67, 0x3f738772, 0x3d4d608d, + 0x3da7d2e2, 0x3f2792d7, 0x3ec36b0b, 0xbf07208d, 0x3eed366f, + 0x3f2afd5a, 0x3f722318, 0xbdef32e4, 0x3df89c21, 0x3f1d3d20, + 0xbf36d776); + // -0.96525091, -0.82903022, -0.98528612, 0.36915505, + // 0.23285799, 0.19133335, 0.78484982, -0.40654737, + // -0.40144378, -0.94419461, 0.60990387, -0.37662670, + // 0.75369638, -0.82297397, 0.24545205, -0.75572032 + VLOAD_32(v12, 0xbf771aaf, 0xbf543b53, 0xbf7c3bb6, 0x3ebd01e4, 0x3e6e7253, + 0x3e43ece4, 0x3f48ebeb, 0xbed026fa, 0xbecd8a0a, 0xbf71b6bd, + 0x3f1c22a9, 0xbec0d537, 0x3f40f23f, 0xbf52ae6c, 0x3e7b57c8, + 0xbf4176e3); + asm volatile("vfsgnjx.vv v4, v8, v12"); + // 0.00918692, 0.23372029, -0.42919466, 0.95128548, + // 0.05014091, 0.08194520, 0.65458435, -0.38167605, + // 0.52784044, -0.46330592, 0.66792834, -0.94584799, + // -0.11679628, -0.12139154, 0.61421394, 0.71422517 + VCMP_U32(2, v4, 0x3c1684ba, 0x3e6f545f, 0xbedbbf67, 0x3f738772, 0x3d4d608d, + 0x3da7d2e2, 0x3f2792d7, 0xbec36b0b, 0x3f07208d, 0xbeed366f, + 0x3f2afd5a, 0xbf722318, 0xbdef32e4, 0xbdf89c21, 0x3f1d3d20, + 0x3f36d776); + + VSET(16, e64, m8); + // -0.4085246287477386, 0.8681744372264055, -0.9782992825101422, + // 0.9959576051606904, -0.7910104167136705, 0.0799315061445605, + // 0.2562329212571202, -0.0401280831920132, -0.6164331117742006, + // 0.0314794700215042, -0.2391312835511448, 0.2944948324466776, + // -0.3469257666022745, 0.3129356083924371, 0.1418123916338592, + // -0.2697778839142546 + VLOAD_64(v16, 0xbfda25447c0540c8, 0x3febc815c1e38a2c, 0xbfef4e3a4c029a38, + 0x3fefdee27bcbc3c2, 0xbfe94ff513d293d6, 0x3fb4766424cf97d0, + 0x3fd0661ec43d4dd0, 0xbfa48bab09ebf660, 0xbfe3b9d1eee7bc0a, + 0x3fa01e13bc79bd60, 0xbfce9bda9926bde0, 0x3fd2d900da8cc448, + 0xbfd63408216c936c, 0x3fd4072312f3290c, 0x3fc226e8901e1378, + 0xbfd1440a752621b8); + // -0.9042358342806300, 0.2953863994960662, -0.4373909703642964, + // 0.1464626280814265, -0.5161207396769107, -0.3525096032632213, + // -0.0692332757289065, -0.9900711773455610, 0.6225050177521096, + // -0.1361158534833962, 0.1558021548512183, 0.9766583762298613, + // 0.2768845956890595, 0.6672273199701737, -0.4444943981200347, + // 0.5095574851608440 + VLOAD_64(v24, 0xbfecef7ffd03691e, 0x3fd2e79c5b6133f0, 0xbfdbfe36b251f164, + 0x3fc2bf4992d91480, 0xbfe0840fa43663a0, 0xbfd68f847062a774, + 0xbfb1b9459f0cf460, 0xbfefaea9bfed2a32, 0x3fe3eb8fa49aeb32, + 0xbfc16c3e8996d300, 0x3fc3f15333ddbc58, 0x3fef40c91128b1ea, + 0x3fd1b87a2ad00b5c, 0x3fe559ed1bc8a0c2, 0xbfdc7298a1cb9174, + 0x3fe04e4b7fc654a0); + asm volatile("vfsgnjx.vv v8, v16, v24"); + // 0.4085246287477386, 0.8681744372264055, 0.9782992825101422, + // 0.9959576051606904, 0.7910104167136705, -0.0799315061445605, + // -0.2562329212571202, 0.0401280831920132, + // -0.6164331117742006, -0.0314794700215042, + // -0.2391312835511448, 0.2944948324466776, + // -0.3469257666022745, 0.3129356083924371, + // -0.1418123916338592, -0.2697778839142546 + VCMP_U64(3, v8, 0x3fda25447c0540c8, 0x3febc815c1e38a2c, 0x3fef4e3a4c029a38, + 0x3fefdee27bcbc3c2, 0x3fe94ff513d293d6, 0xbfb4766424cf97d0, + 0xbfd0661ec43d4dd0, 0x3fa48bab09ebf660, 0xbfe3b9d1eee7bc0a, + 0xbfa01e13bc79bd60, 0xbfce9bda9926bde0, 0x3fd2d900da8cc448, + 0xbfd63408216c936c, 0x3fd4072312f3290c, 0xbfc226e8901e1378, + 0xbfd1440a752621b8); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.0371, 0.8374, 0.7183, 0.8086, -0.8940, 0.4626, 0.5449, + // 0.6831, 0.4661, 0.2981, 0.5615, -0.6167, -0.7075, -0.7603, + // 0.8438, -0.2742 + VLOAD_16(v4, 0xa8c1, 0x3ab3, 0x39bf, 0x3a78, 0xbb27, 0x3767, 0x385c, 0x3977, + 0x3775, 0x34c5, 0x387e, 0xb8ef, 0xb9a9, 0xba15, 0x3ac0, 0xb463); + // 0.3516, 0.7925, -0.5034, -0.1672, 0.0703, -0.9731, 0.0361, + // -0.4077, 0.8965, 0.8242, -0.7822, 0.0265, -0.5361, 0.1226, + // -0.9917, 0.5415 + VLOAD_16(v6, 0x35a0, 0x3a57, 0xb807, 0xb15a, 0x2c7f, 0xbbc9, 0x289f, 0xb686, + 0x3b2c, 0x3a98, 0xba42, 0x26cb, 0xb84a, 0x2fd8, 0xbbef, 0x3855); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsgnjx.vv v2, v4, v6, v0.t"); + // 0.0000, 0.8374, 0.0000, -0.8086, 0.0000, -0.4626, 0.0000, + // -0.6831, 0.0000, 0.2981, 0.0000, -0.6167, 0.0000, + // -0.7603, 0.0000, -0.2742 + VCMP_U16(4, v2, 0x0, 0x3ab3, 0x0, 0xba78, 0x0, 0xb767, 0x0, 0xb977, 0x0, + 0x34c5, 0x0, 0xb8ef, 0x0, 0xba15, 0x0, 0xb463); + + VSET(16, e32, m4); + // -0.00918692, -0.23372029, 0.42919466, 0.95128548, + // 0.05014091, 0.08194520, 0.65458435, 0.38167605, + // -0.52784044, 0.46330592, 0.66792834, 0.94584799, + // -0.11679628, 0.12139154, 0.61421394, -0.71422517 + VLOAD_32(v8, 0xbc1684ba, 0xbe6f545f, 0x3edbbf67, 0x3f738772, 0x3d4d608d, + 0x3da7d2e2, 0x3f2792d7, 0x3ec36b0b, 0xbf07208d, 0x3eed366f, + 0x3f2afd5a, 0x3f722318, 0xbdef32e4, 0x3df89c21, 0x3f1d3d20, + 0xbf36d776); + // -0.96525091, -0.82903022, -0.98528612, 0.36915505, + // 0.23285799, 0.19133335, 0.78484982, -0.40654737, + // -0.40144378, -0.94419461, 0.60990387, -0.37662670, + // 0.75369638, -0.82297397, 0.24545205, -0.75572032 + VLOAD_32(v12, 0xbf771aaf, 0xbf543b53, 0xbf7c3bb6, 0x3ebd01e4, 0x3e6e7253, + 0x3e43ece4, 0x3f48ebeb, 0xbed026fa, 0xbecd8a0a, 0xbf71b6bd, + 0x3f1c22a9, 0xbec0d537, 0x3f40f23f, 0xbf52ae6c, 0x3e7b57c8, + 0xbf4176e3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsgnjx.vv v4, v8, v12, v0.t"); + // 0.00000000, 0.23372029, 0.00000000, 0.95128548, + // 0.00000000, 0.08194520, 0.00000000, -0.38167605, + // 0.00000000, -0.46330592, 0.00000000, -0.94584799, + // 0.00000000, -0.12139154, 0.00000000, 0.71422517 + VCMP_U32(5, v4, 0x0, 0x3e6f545f, 0x0, 0x3f738772, 0x0, 0x3da7d2e2, 0x0, + 0xbec36b0b, 0x0, 0xbeed366f, 0x0, 0xbf722318, 0x0, 0xbdf89c21, 0x0, + 0x3f36d776); + + VSET(16, e64, m8); + // -0.4085246287477386, 0.8681744372264055, -0.9782992825101422, + // 0.9959576051606904, -0.7910104167136705, 0.0799315061445605, + // 0.2562329212571202, -0.0401280831920132, -0.6164331117742006, + // 0.0314794700215042, -0.2391312835511448, 0.2944948324466776, + // -0.3469257666022745, 0.3129356083924371, 0.1418123916338592, + // -0.2697778839142546 + VLOAD_64(v16, 0xbfda25447c0540c8, 0x3febc815c1e38a2c, 0xbfef4e3a4c029a38, + 0x3fefdee27bcbc3c2, 0xbfe94ff513d293d6, 0x3fb4766424cf97d0, + 0x3fd0661ec43d4dd0, 0xbfa48bab09ebf660, 0xbfe3b9d1eee7bc0a, + 0x3fa01e13bc79bd60, 0xbfce9bda9926bde0, 0x3fd2d900da8cc448, + 0xbfd63408216c936c, 0x3fd4072312f3290c, 0x3fc226e8901e1378, + 0xbfd1440a752621b8); + // -0.9042358342806300, 0.2953863994960662, -0.4373909703642964, + // 0.1464626280814265, -0.5161207396769107, -0.3525096032632213, + // -0.0692332757289065, -0.9900711773455610, 0.6225050177521096, + // -0.1361158534833962, 0.1558021548512183, 0.9766583762298613, + // 0.2768845956890595, 0.6672273199701737, -0.4444943981200347, + // 0.5095574851608440 + VLOAD_64(v24, 0xbfecef7ffd03691e, 0x3fd2e79c5b6133f0, 0xbfdbfe36b251f164, + 0x3fc2bf4992d91480, 0xbfe0840fa43663a0, 0xbfd68f847062a774, + 0xbfb1b9459f0cf460, 0xbfefaea9bfed2a32, 0x3fe3eb8fa49aeb32, + 0xbfc16c3e8996d300, 0x3fc3f15333ddbc58, 0x3fef40c91128b1ea, + 0x3fd1b87a2ad00b5c, 0x3fe559ed1bc8a0c2, 0xbfdc7298a1cb9174, + 0x3fe04e4b7fc654a0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsgnjx.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, 0.8681744372264055, 0.0000000000000000, + // 0.9959576051606904, 0.0000000000000000, -0.0799315061445605, + // 0.0000000000000000, 0.0401280831920132, 0.0000000000000000, + // -0.0314794700215042, 0.0000000000000000, 0.2944948324466776, + // 0.0000000000000000, 0.3129356083924371, 0.0000000000000000, + // -0.2697778839142546 + VCMP_U64(6, v8, 0x0, 0x3febc815c1e38a2c, 0x0, 0x3fefdee27bcbc3c2, 0x0, + 0xbfb4766424cf97d0, 0x0, 0x3fa48bab09ebf660, 0x0, 0xbfa01e13bc79bd60, + 0x0, 0x3fd2d900da8cc448, 0x0, 0x3fd4072312f3290c, 0x0, + 0xbfd1440a752621b8); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.9766 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbbd0); + // 0.8535, -0.3750, -0.8066, -0.9097, -0.2216, -0.5645, -0.3149, + // -0.4512, 0.5981, 0.6587, 0.9546, -0.3040, -0.6157, 0.5723, + // 0.8438, -0.1544 + VLOAD_16(v4, 0x3ad4, 0xb600, 0xba74, 0xbb47, 0xb317, 0xb884, 0xb50a, 0xb738, + 0x38c9, 0x3945, 0x3ba3, 0xb4dd, 0xb8ed, 0x3894, 0x3ac0, 0xb0f1); + asm volatile("vfsgnjx.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // -0.8535, 0.3750, 0.8066, 0.9097, 0.2216, 0.5645, 0.3149, + // 0.4512, -0.5981, -0.6587, -0.9546, 0.3040, 0.6157, -0.5723, + // -0.8438, 0.1544 + VCMP_U16(7, v2, 0xbad4, 0x3600, 0x3a74, 0x3b47, 0x3317, 0x3884, 0x350a, + 0x3738, 0xb8c9, 0xb945, 0xbba3, 0x34dd, 0x38ed, 0xb894, 0xbac0, + 0x30f1); + + VSET(16, e32, m4); + double dscalar_32; + // -0.71056527 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf35e79b); + // -0.13350210, -0.18642496, 0.30152589, -0.62076813, + // 0.00040700, -0.59566921, -0.88075870, 0.08096603, 0.94059193, + // -0.29601631, -0.54263371, -0.86016685, -0.57158113, + // 0.85538357, -0.76839548, 0.28374606 + VLOAD_32(v8, 0xbe08b4c6, 0xbe3ee62f, 0x3e9a619a, 0xbf1eeaa9, 0x39d561f4, + 0xbf187dc7, 0xbf617967, 0x3da5d185, 0x3f70caa2, 0xbe978f73, + 0xbf0aea0b, 0xbf5c33e5, 0xbf125324, 0x3f5afa6b, 0xbf44b591, + 0x3e91472a); + asm volatile("vfsgnjx.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // 0.13350210, 0.18642496, -0.30152589, 0.62076813, + // -0.00040700, 0.59566921, 0.88075870, -0.08096603, + // -0.94059193, 0.29601631, 0.54263371, 0.86016685, + // 0.57158113, -0.85538357, 0.76839548, -0.28374606 + VCMP_U32(8, v4, 0x3e08b4c6, 0x3e3ee62f, 0xbe9a619a, 0x3f1eeaa9, 0xb9d561f4, + 0x3f187dc7, 0x3f617967, 0xbda5d185, 0xbf70caa2, 0x3e978f73, + 0x3f0aea0b, 0x3f5c33e5, 0x3f125324, 0xbf5afa6b, 0x3f44b591, + 0xbe91472a); + + VSET(16, e64, m8); + double dscalar_64; + // -0.1599292306617626 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfc4788f9faef060); + // -0.3770377828689853, 0.5963307040587882, + // -0.4228346580189990, -0.8395360297727528, 0.2884308755790033, + // -0.9332093226534830, -0.3077793113682024, + // -0.3241690978469995, 0.9848431705043186, 0.5835571766262024, + // 0.6934128987139432, -0.8499240402166686, 0.9392758702585176, + // 0.8754505566292561, -0.4187493105472220, -0.7967172481248119 + VLOAD_64(v16, 0xbfd8216314b1d540, 0x3fe3152420f10f90, 0xbfdb0fb918f3a4fc, + 0xbfeadd7aa9f60146, 0x3fd275a6c6712e84, 0xbfeddcd9cc23cf06, + 0xbfd3b2a7ff2d8ea0, 0xbfd4bf2fbe681ba4, 0x3fef83d5d32028f6, + 0x3fe2ac80199e9490, 0x3fe630703f533af4, 0xbfeb3293e69a12ae, + 0x3fee0e8c4515d52c, 0x3fec03b0e2bf9ad6, 0xbfdaccc9e88176a4, + 0xbfe97eb52b9b5dac); + asm volatile("vfsgnjx.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // 0.3770377828689853, -0.5963307040587882, 0.4228346580189990, + // 0.8395360297727528, -0.2884308755790033, 0.9332093226534830, + // 0.3077793113682024, 0.3241690978469995, -0.9848431705043186, + // -0.5835571766262024, -0.6934128987139432, 0.8499240402166686, + // -0.9392758702585176, -0.8754505566292561, 0.4187493105472220, + // 0.7967172481248119 + VCMP_U64(9, v8, 0x3fd8216314b1d540, 0xbfe3152420f10f90, 0x3fdb0fb918f3a4fc, + 0x3feadd7aa9f60146, 0xbfd275a6c6712e84, 0x3feddcd9cc23cf06, + 0x3fd3b2a7ff2d8ea0, 0x3fd4bf2fbe681ba4, 0xbfef83d5d32028f6, + 0xbfe2ac80199e9490, 0xbfe630703f533af4, 0x3feb3293e69a12ae, + 0xbfee0e8c4515d52c, 0xbfec03b0e2bf9ad6, 0x3fdaccc9e88176a4, + 0x3fe97eb52b9b5dac); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.9766 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbbd0); + // 0.8535, -0.3750, -0.8066, -0.9097, -0.2216, -0.5645, + // -0.3149, -0.4512, 0.5981, 0.6587, 0.9546, -0.3040, + // -0.6157, 0.5723, 0.8438, -0.1544 + VLOAD_16(v4, 0x3ad4, 0xb600, 0xba74, 0xbb47, 0xb317, 0xb884, 0xb50a, 0xb738, + 0x38c9, 0x3945, 0x3ba3, 0xb4dd, 0xb8ed, 0x3894, 0x3ac0, 0xb0f1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsgnjx.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.3750, 0.0000, 0.9097, 0.0000, 0.5645, 0.0000, + // 0.4512, 0.0000, -0.6587, 0.0000, 0.3040, 0.0000, + // -0.5723, 0.0000, 0.1544 + VCMP_U16(10, v2, 0x0, 0x3600, 0x0, 0x3b47, 0x0, 0x3884, 0x0, 0x3738, 0x0, + 0xb945, 0x0, 0x34dd, 0x0, 0xb894, 0x0, 0x30f1); + + VSET(16, e32, m4); + double dscalar_32; + // -0.71056527 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf35e79b); + // -0.13350210, -0.18642496, 0.30152589, -0.62076813, + // 0.00040700, -0.59566921, -0.88075870, 0.08096603, + // 0.94059193, -0.29601631, -0.54263371, -0.86016685, + // -0.57158113, 0.85538357, -0.76839548, 0.28374606 + VLOAD_32(v8, 0xbe08b4c6, 0xbe3ee62f, 0x3e9a619a, 0xbf1eeaa9, 0x39d561f4, + 0xbf187dc7, 0xbf617967, 0x3da5d185, 0x3f70caa2, 0xbe978f73, + 0xbf0aea0b, 0xbf5c33e5, 0xbf125324, 0x3f5afa6b, 0xbf44b591, + 0x3e91472a); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsgnjx.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, 0.18642496, 0.00000000, 0.62076813, + // 0.00000000, 0.59566921, 0.00000000, -0.08096603, + // 0.00000000, 0.29601631, 0.00000000, 0.86016685, + // 0.00000000, -0.85538357, 0.00000000, -0.28374606 + VCMP_U32(11, v4, 0x0, 0x3e3ee62f, 0x0, 0x3f1eeaa9, 0x0, 0x3f187dc7, 0x0, + 0xbda5d185, 0x0, 0x3e978f73, 0x0, 0x3f5c33e5, 0x0, 0xbf5afa6b, 0x0, + 0xbe91472a); + + VSET(16, e64, m8); + double dscalar_64; + // -0.1599292306617626 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfc4788f9faef060); + // -0.3770377828689853, 0.5963307040587882, + // -0.4228346580189990, -0.8395360297727528, + // 0.2884308755790033, -0.9332093226534830, + // -0.3077793113682024, -0.3241690978469995, + // 0.9848431705043186, 0.5835571766262024, 0.6934128987139432, + // -0.8499240402166686, 0.9392758702585176, + // 0.8754505566292561, -0.4187493105472220, -0.7967172481248119 + VLOAD_64(v16, 0xbfd8216314b1d540, 0x3fe3152420f10f90, 0xbfdb0fb918f3a4fc, + 0xbfeadd7aa9f60146, 0x3fd275a6c6712e84, 0xbfeddcd9cc23cf06, + 0xbfd3b2a7ff2d8ea0, 0xbfd4bf2fbe681ba4, 0x3fef83d5d32028f6, + 0x3fe2ac80199e9490, 0x3fe630703f533af4, 0xbfeb3293e69a12ae, + 0x3fee0e8c4515d52c, 0x3fec03b0e2bf9ad6, 0xbfdaccc9e88176a4, + 0xbfe97eb52b9b5dac); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsgnjx.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, -0.5963307040587882, 0.0000000000000000, + // 0.8395360297727528, 0.0000000000000000, 0.9332093226534830, + // 0.0000000000000000, 0.3241690978469995, 0.0000000000000000, + // -0.5835571766262024, 0.0000000000000000, + // 0.8499240402166686, 0.0000000000000000, + // -0.8754505566292561, 0.0000000000000000, 0.7967172481248119 + VCMP_U64(12, v8, 0x0, 0xbfe3152420f10f90, 0x0, 0x3feadd7aa9f60146, 0x0, + 0x3feddcd9cc23cf06, 0x0, 0x3fd4bf2fbe681ba4, 0x0, 0xbfe2ac80199e9490, + 0x0, 0x3feb3293e69a12ae, 0x0, 0xbfec03b0e2bf9ad6, 0x0, + 0x3fe97eb52b9b5dac); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1down.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1down.c new file mode 100644 index 000000000..9d7b9524f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1down.c @@ -0,0 +1,101 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +void TEST_CASE1() { + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(8, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vfslide1down.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VCMP_U16(1, v2, 2, 3, 4, 5, 6, 7, 8, 0xbb81); + + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(8, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vfslide1down.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VCMP_U32(2, v4, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 0xbf75e762); + + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(8, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vfslide1down.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VCMP_U64(3, v8, 2, 3, 4, 5, 6, 7, 8, 0x3fed25da5d7296fe); +} + +void TEST_CASE2() { + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(8, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vfslide1down.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VCMP_U16(6, v1, 2, -1, 4, -1, 6, -1, 8, -1); + + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + + VSET(32, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e32, m1); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfslide1down.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VCMP_U32(7, v4, -1, 3, -1, 5, -1, 7, -1, 0xbf75e762); + + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + + VSET(32, e64, m1); + VLOAD_64(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e64, m1); + VLOAD_64(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vfslide1down.vf v1, v2, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCMP_U64(8, v1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1); +} + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1up.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1up.c new file mode 100644 index 000000000..67379b383 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1up.c @@ -0,0 +1,90 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +void TEST_CASE1() { + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vfslide1up.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VCMP_U16(1, v2, 0xbb81, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vfslide1up.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VCMP_U32(2, v4, 0xbf75e762, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15); + + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vfslide1up.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VCMP_U64(3, v8, 0x3fed25da5d7296fe, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15); +} + +void TEST_CASE2() { + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vfslide1up.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VCMP_U16(4, v2, 0xbb81, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, + -1); + + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfslide1up.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VCMP_U32(5, v4, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15); + + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vfslide1up.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCMP_U64(6, v8, 0x3fed25da5d7296fe, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, + 12, -1, 14, -1); +} + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsqrt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsqrt.c new file mode 100644 index 000000000..0b62d412b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsqrt.c @@ -0,0 +1,142 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -4628.000, 5116.000, -9928.000, 9392.000, -140.875, + // 6112.000, 2598.000, 3210.000, 528.000, -3298.000, + // -3674.000, 368.250, 1712.000, -8584.000, -2080.000, + // 4336.000 + VLOAD_16(v2, 0xec85, 0x6cff, 0xf0d9, 0x7096, 0xd867, 0x6df8, 0x6913, 0x6a45, + 0x6020, 0xea71, 0xeb2d, 0x5dc1, 0x66b0, 0xf031, 0xe810, 0x6c3c); + asm volatile("vfsqrt.v v4, v2"); + // nan, 71.500, nan, 96.938, + // nan, 78.188, 50.969, 56.656, 22.984, nan, + // nan, 19.188, 41.375, nan, nan, 65.875 + VCMP_U16(1, v4, 0x7e00, 0x5478, 0x7e00, 0x560e, 0x7e00, 0x54e2, 0x525f, + 0x5315, 0x4dbe, 0x7e00, 0x7e00, 0x4ccc, 0x512c, 0x7e00, 0x7e00, + 0x541d); + + VSET(16, e32, m4); + // 53688.590, -5719.180, -59560.355, -34640.023, -22323.398, + // -52381.586, 19136.160, 13055.238, -68576.781, + // -35066.488, 62475.219, -25604.578, 54705.039, + // -19827.459, 17792.961, -28415.572 + VLOAD_32(v4, 0x4751b897, 0xc5b2b971, 0xc768a85b, 0xc7075006, 0xc6ae66cc, + 0xc74c9d96, 0x46958052, 0x464bfcf4, 0xc785f064, 0xc708fa7d, + 0x47740b38, 0xc6c80928, 0x4755b10a, 0xc69ae6eb, 0x468b01ec, + 0xc6ddff25); + asm volatile("vfsqrt.v v8, v4"); + // 231.708, nan, nan, nan, nan, nan, 138.334, + // 114.260, nan, nan, 249.950, nan, 233.891, nan, + // 133.390, nan + VCMP_U32(2, v8, 0x4367b53e, 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000, + 0x7fc00000, 0x430a5560, 0x42e484e0, 0x7fc00000, 0x7fc00000, + 0x4379f34f, 0x7fc00000, 0x4369e41e, 0x7fc00000, 0x430563e7, + 0x7fc00000); + + VSET(16, e64, m8); + // -2532126.867, -601715.939, -7176821.248, 9617114.284, + // -4651296.040, -9962642.835, 4027953.647, 7849763.850, + // -9544132.585, -8682313.823, 7018932.012, 639358.130, + // -7598169.215, -9585529.793, -4604984.668, 314584.590 + VLOAD_64(v8, 0xc143518f6efce4ae, 0xc1225ce7e096cbf0, 0xc15b609d4fd8b968, + 0x416257db4912ef24, 0xc151be4802974a67, 0xc16300925abc1630, + 0x414ebb18d2c34030, 0x415df1c8f662a87c, 0xc162343892b8d28c, + 0xc1608f693a52837e, 0x415ac66d00c810d8, 0x412382fc427c96a0, + 0xc15cfc164dc9e320, 0xc162486f39607ee9, 0xc151910e2ac0e818, + 0x411333625c861bc0); + asm volatile("vfsqrt.v v16, v8"); + // nan, nan, nan, 3101.147, nan, nan, 2006.976, + // 2801.743, nan, nan, 2649.327, 799.599, nan, nan, + // nan, 560.878 + VCMP_U64(3, v16, 0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000, + 0x40a83a4b64b82189, 0x7ff8000000000000, 0x7ff8000000000000, + 0x409f5be7acad5998, 0x40a5e37c6ac52c2f, 0x7ff8000000000000, + 0x7ff8000000000000, 0x40a4b2a7466e763d, 0x4088fcca333ab72d, + 0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000, + 0x40818706fb9cc11b); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -4628.000, 5116.000, -9928.000, 9392.000, -140.875, + // 6112.000, 2598.000, 3210.000, 528.000, -3298.000, + // -3674.000, 368.250, 1712.000, -8584.000, -2080.000, + // 4336.000 + VLOAD_16(v2, 0xec85, 0x6cff, 0xf0d9, 0x7096, 0xd867, 0x6df8, 0x6913, 0x6a45, + 0x6020, 0xea71, 0xeb2d, 0x5dc1, 0x66b0, 0xf031, 0xe810, 0x6c3c); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsqrt.v v4, v2, v0.t"); + // 0.000, 71.500, 0.000, 96.938, 0.000, 78.188, + // 0.000, 56.656, 0.000, nan, 0.000, 19.188, 0.000, + // nan, 0.000, 65.875 + VCMP_U16(4, v4, 0x0, 0x5478, 0x0, 0x560e, 0x0, 0x54e2, 0x0, 0x5315, 0x0, + 0x7e00, 0x0, 0x4ccc, 0x0, 0x7e00, 0x0, 0x541d); + + VSET(16, e32, m4); + // 53688.590, -5719.180, -59560.355, -34640.023, -22323.398, + // -52381.586, 19136.160, 13055.238, -68576.781, + // -35066.488, 62475.219, -25604.578, 54705.039, + // -19827.459, 17792.961, -28415.572 + VLOAD_32(v4, 0x4751b897, 0xc5b2b971, 0xc768a85b, 0xc7075006, 0xc6ae66cc, + 0xc74c9d96, 0x46958052, 0x464bfcf4, 0xc785f064, 0xc708fa7d, + 0x47740b38, 0xc6c80928, 0x4755b10a, 0xc69ae6eb, 0x468b01ec, + 0xc6ddff25); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsqrt.v v8, v4, v0.t"); + // 0.000, nan, 0.000, nan, 0.000, nan, 0.000, + // 114.260, 0.000, nan, 0.000, nan, 0.000, nan, + // 0.000, nan + VCMP_U32(5, v8, 0x0, 0x7fc00000, 0x0, 0x7fc00000, 0x0, 0x7fc00000, 0x0, + 0x42e484e0, 0x0, 0x7fc00000, 0x0, 0x7fc00000, 0x0, 0x7fc00000, 0x0, + 0x7fc00000); + + VSET(16, e64, m8); + // -2532126.867, -601715.939, -7176821.248, 9617114.284, + // -4651296.040, -9962642.835, 4027953.647, 7849763.850, + // -9544132.585, -8682313.823, 7018932.012, 639358.130, + // -7598169.215, -9585529.793, -4604984.668, 314584.590 + VLOAD_64(v8, 0xc143518f6efce4ae, 0xc1225ce7e096cbf0, 0xc15b609d4fd8b968, + 0x416257db4912ef24, 0xc151be4802974a67, 0xc16300925abc1630, + 0x414ebb18d2c34030, 0x415df1c8f662a87c, 0xc162343892b8d28c, + 0xc1608f693a52837e, 0x415ac66d00c810d8, 0x412382fc427c96a0, + 0xc15cfc164dc9e320, 0xc162486f39607ee9, 0xc151910e2ac0e818, + 0x411333625c861bc0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vfsqrt.v v16, v8, v0.t"); + // 0.000, nan, 0.000, 3101.147, 0.000, nan, 0.000, + // 2801.743, 0.000, nan, 0.000, 799.599, 0.000, nan, + // 0.000, 560.878 + VCMP_U64(6, v16, 0x0, 0x7ff8000000000000, 0x0, 0x40a83a4b64b82189, 0x0, + 0x7ff8000000000000, 0x0, 0x40a5e37c6ac52c2f, 0x0, 0x7ff8000000000000, + 0x0, 0x4088fcca333ab72d, 0x0, 0x7ff8000000000000, 0x0, + 0x40818706fb9cc11b); +}; + +int main(void) { + enable_vec(); + enable_fp(); + // Change RM to RTZ since there are issues with FDIV + RNE in fpnew + // Update: there are issues also with RTZ... + CHANGE_RM(RM_RTZ); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsub.c new file mode 100644 index 000000000..292ee9194 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsub.c @@ -0,0 +1,349 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.2161, 0.7432, 0.7871, 0.7583, -0.4546, -0.0478, 0.1260, + // -0.4824, 0.9282, -0.6221, 0.6543, 0.3025, -0.1420, -0.7236, + // 0.2333, -0.0269 + VLOAD_16(v4, 0xb2ea, 0x39f2, 0x3a4c, 0x3a11, 0xb746, 0xaa1f, 0x3008, 0xb7b8, + 0x3b6d, 0xb8fa, 0x393c, 0x34d7, 0xb08b, 0xb9ca, 0x3377, 0xa6e5); + // -0.3289, -0.8408, -0.1754, -0.8472, 0.7739, -0.9111, -0.3152, + // 0.4519, -0.2537, 0.9287, -0.7163, -0.2318, 0.0615, -0.2563, + // 0.1448, 0.6606 + VLOAD_16(v6, 0xb543, 0xbaba, 0xb19d, 0xbac7, 0x3a31, 0xbb4a, 0xb50b, 0x373b, + 0xb40f, 0x3b6e, 0xb9bb, 0xb36b, 0x2bde, 0xb41a, 0x30a2, 0x3949); + asm volatile("vfsub.vv v2, v4, v6"); + // 0.1128, 1.5840, 0.9624, 1.6055, -1.2285, 0.8633, 0.4412, + // -0.9346, 1.1816, -1.5508, 1.3711, 0.5342, -0.2034, + // -0.4673, 0.0885, -0.6875 + VCMP_U16(1, v2, 0x2f38, 0x3e56, 0x3bb3, 0x3e6c, 0xbcea, 0x3ae8, 0x370f, + 0xbb7a, 0x3cba, 0xbe34, 0x3d7c, 0x3846, 0xb282, 0xb77a, 0x2daa, + 0xb980); + + VSET(16, e32, m4); + // -0.12869358, 0.96847999, -0.85811919, -0.21122381, + // -0.05195865, 0.43910158, 0.86828148, -0.90407801, + // 0.62089461, -0.65907389, 0.91886526, -0.57595438, + // -0.35377914, -0.26657876, 0.49153560, 0.42637765 + VLOAD_32(v8, 0xbe03c840, 0x3f77ee4e, 0xbf5badb3, 0xbe584b0e, 0xbd54d298, + 0x3ee0d1ec, 0x3f5e47b2, 0xbf6771a8, 0x3f1ef2f3, 0xbf28b911, + 0x3f6b3ac1, 0xbf1371bf, 0xbeb5228a, 0xbe887d03, 0x3efbaa8e, + 0x3eda4e2c); + // -0.50821143, -0.56901741, -0.88642830, 0.91128469, + // -0.00441748, 0.72763014, 0.81834352, -0.49977919, + // -0.94507313, -0.60766727, 0.21069343, 0.35644454, + // -0.51639801, -0.74812186, -0.97028691, 0.42650157 + VLOAD_32(v12, 0xbf021a25, 0xbf11ab20, 0xbf62ecf7, 0x3f6949f4, 0xbb90c083, + 0x3f3a45f8, 0x3f517ef6, 0xbeffe30f, 0xbf71f050, 0xbf1b9015, + 0x3e57c005, 0x3eb67fe6, 0xbf0432a9, 0xbf3f84ea, 0xbf7864b9, + 0x3eda5e6a); + asm volatile("vfsub.vv v4, v8, v12"); + // 0.37951785, 1.53749740, 0.02830911, -1.12250853, + // -0.04754117, -0.28852856, 0.04993796, + // -0.40429881, 1.56596780, -0.05140662, 0.70817184, + // -0.93239892, 0.16261888, 0.48154309, 1.46182251, + // -0.00012392 + VCMP_U32(2, v4, 0x3ec2502a, 0x3fc4ccb7, 0x3ce7e880, 0xbf8fae5c, 0xbd42ba88, + 0xbe93ba04, 0x3d4c8bc0, 0xbecf0041, 0x3fc871a2, 0xbd528fc0, + 0x3f354ac0, 0xbf6eb1b2, 0x3e268590, 0x3ef68cd1, 0x3fbb1d00, + 0xb901f000); + + VSET(16, e64, m8); + // -0.5053356652713634, -0.6291854947278097, 0.6181258713941662, + // -0.6097328085365348, 0.8960683065358290, 0.1233825892982841, + // -0.7071646124826323, -0.6783334309218909, 0.3533001486660008, + // 0.4732651306122215, -0.7335080825789513, -0.9296500813876505, + // 0.5349827137885166, -0.0621174552558810, -0.8122743533756343, + // -0.8908485518923974 + VLOAD_64(v16, 0xbfe02bb5b37af91c, 0xbfe422499e5f271a, 0x3fe3c7afe84e61dc, + 0xbfe382ee60fece00, 0x3fecac9770f1b62e, 0x3fbf960059ee92f0, + 0xbfe6a117ae700ba0, 0xbfe5b4e84fb2f9d4, 0x3fd69c783a0c5078, + 0x3fde49f9d4944428, 0xbfe778e5f140e788, 0xbfedbfb18709140c, + 0x3fe11e941174b448, 0xbfafcddbedab64a0, 0xbfe9fe26c8e417ba, + 0xbfec81d4d2822346); + // -0.6041772411195545, 0.1691588460867453, -0.3855578735230800, + // -0.9206749118255901, 0.7025181961160538, -0.9905598942344518, + // 0.9510997049380876, 0.2754176494545910, 0.5271936205102918, + // 0.8778238674058336, 0.9294006140978470, -0.8775508592745904, + // 0.7472392658861982, -0.3880038279796372, -0.6483706997783654, + // -0.1530785884604509 + VLOAD_64(v24, 0xbfe3556b82731260, 0x3fc5a6ff3fe2c608, 0xbfd8acfaee5fcdc0, + 0xbfed762b3b913c28, 0x3fe67b0770a53a4a, 0xbfefb2aaa9ceeb06, + 0x3fee6f68a5fe3800, 0x3fd1a071594983a8, 0x3fe0dec527d80c9a, + 0x3fec172214450060, 0x3fedbda65b4dd79c, 0xbfec14e58a252770, + 0x3fe7e962522895aa, 0xbfd8d50e01f94d70, 0xbfe4bf73e8e77264, + 0xbfc398144593e6c0); + asm volatile("vfsub.vv v8, v16, v24"); + // 0.0988415758481911, -0.7983443408145550, 1.0036837449172462, + // 0.3109421032890554, 0.1935501104197752, 1.1139424835327358, + // -1.6582643174207199, -0.9537510803764819, + // -0.1738934718442910, -0.4045587367936121, + // -1.6629086966767983, -0.0520992221130601, + // -0.2122565520976816, 0.3258863727237562, + // -0.1639036535972689, -0.7377699634319466 + VCMP_U64(3, v8, 0x3fb94dae77c0ca20, 0xbfe98c096e57d89c, 0x3ff00f16afbf245e, + 0x3fd3e679b524dc50, 0x3fc8c6400131ef90, 0x3ff1d2b55a865eb2, + 0xbffa88402a3721d0, 0xbfee8520fc57bba8, 0xbfc642242b479178, + 0xbfd9e44a53f5bc98, 0xbffa9b4626475f92, 0xbfaaacbfce3ec9c0, + 0xbfcb2b3902cf8588, 0x3fd4db528443e0dc, 0xbfc4facb7ff29558, + 0xbfe79bcfc11d2996); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.2161, 0.7432, 0.7871, 0.7583, -0.4546, -0.0478, 0.1260, + // -0.4824, 0.9282, -0.6221, 0.6543, 0.3025, -0.1420, -0.7236, + // 0.2333, -0.0269 + VLOAD_16(v4, 0xb2ea, 0x39f2, 0x3a4c, 0x3a11, 0xb746, 0xaa1f, 0x3008, 0xb7b8, + 0x3b6d, 0xb8fa, 0x393c, 0x34d7, 0xb08b, 0xb9ca, 0x3377, 0xa6e5); + // -0.3289, -0.8408, -0.1754, -0.8472, 0.7739, -0.9111, -0.3152, + // 0.4519, -0.2537, 0.9287, -0.7163, -0.2318, 0.0615, -0.2563, + // 0.1448, 0.6606 + VLOAD_16(v6, 0xb543, 0xbaba, 0xb19d, 0xbac7, 0x3a31, 0xbb4a, 0xb50b, 0x373b, + 0xb40f, 0x3b6e, 0xb9bb, 0xb36b, 0x2bde, 0xb41a, 0x30a2, 0x3949); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsub.vv v2, v4, v6, v0.t"); + // 0.0000, 1.5840, 0.0000, 1.6055, 0.0000, 0.8633, 0.0000, + // -0.9346, 0.0000, -1.5508, 0.0000, 0.5342, 0.0000, + // -0.4673, 0.0000, -0.6875 + VCMP_U16(4, v2, 0x0, 0x3e56, 0x0, 0x3e6c, 0x0, 0x3ae8, 0x0, 0xbb7a, 0x0, + 0xbe34, 0x0, 0x3846, 0x0, 0xb77a, 0x0, 0xb980); + + VSET(16, e32, m4); + // -0.12869358, 0.96847999, -0.85811919, -0.21122381, + // -0.05195865, 0.43910158, 0.86828148, -0.90407801, + // 0.62089461, -0.65907389, 0.91886526, -0.57595438, + // -0.35377914, -0.26657876, 0.49153560, 0.42637765 + VLOAD_32(v8, 0xbe03c840, 0x3f77ee4e, 0xbf5badb3, 0xbe584b0e, 0xbd54d298, + 0x3ee0d1ec, 0x3f5e47b2, 0xbf6771a8, 0x3f1ef2f3, 0xbf28b911, + 0x3f6b3ac1, 0xbf1371bf, 0xbeb5228a, 0xbe887d03, 0x3efbaa8e, + 0x3eda4e2c); + // -0.50821143, -0.56901741, -0.88642830, 0.91128469, + // -0.00441748, 0.72763014, 0.81834352, -0.49977919, + // -0.94507313, -0.60766727, 0.21069343, 0.35644454, + // -0.51639801, -0.74812186, -0.97028691, 0.42650157 + VLOAD_32(v12, 0xbf021a25, 0xbf11ab20, 0xbf62ecf7, 0x3f6949f4, 0xbb90c083, + 0x3f3a45f8, 0x3f517ef6, 0xbeffe30f, 0xbf71f050, 0xbf1b9015, + 0x3e57c005, 0x3eb67fe6, 0xbf0432a9, 0xbf3f84ea, 0xbf7864b9, + 0x3eda5e6a); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsub.vv v4, v8, v12, v0.t"); + // 0.00000000, 1.53749740, 0.00000000, -1.12250853, + // 0.00000000, -0.28852856, 0.00000000, -0.40429881, + // 0.00000000, -0.05140662, 0.00000000, -0.93239892, + // 0.00000000, 0.48154309, 0.00000000, -0.00012392 + VCMP_U32(5, v4, 0x0, 0x3fc4ccb7, 0x0, 0xbf8fae5c, 0x0, 0xbe93ba04, 0x0, + 0xbecf0041, 0x0, 0xbd528fc0, 0x0, 0xbf6eb1b2, 0x0, 0x3ef68cd1, 0x0, + 0xb901f000); + + VSET(16, e64, m8); + // -0.5053356652713634, -0.6291854947278097, 0.6181258713941662, + // -0.6097328085365348, 0.8960683065358290, 0.1233825892982841, + // -0.7071646124826323, -0.6783334309218909, 0.3533001486660008, + // 0.4732651306122215, -0.7335080825789513, -0.9296500813876505, + // 0.5349827137885166, -0.0621174552558810, -0.8122743533756343, + // -0.8908485518923974 + VLOAD_64(v16, 0xbfe02bb5b37af91c, 0xbfe422499e5f271a, 0x3fe3c7afe84e61dc, + 0xbfe382ee60fece00, 0x3fecac9770f1b62e, 0x3fbf960059ee92f0, + 0xbfe6a117ae700ba0, 0xbfe5b4e84fb2f9d4, 0x3fd69c783a0c5078, + 0x3fde49f9d4944428, 0xbfe778e5f140e788, 0xbfedbfb18709140c, + 0x3fe11e941174b448, 0xbfafcddbedab64a0, 0xbfe9fe26c8e417ba, + 0xbfec81d4d2822346); + // -0.6041772411195545, 0.1691588460867453, -0.3855578735230800, + // -0.9206749118255901, 0.7025181961160538, -0.9905598942344518, + // 0.9510997049380876, 0.2754176494545910, 0.5271936205102918, + // 0.8778238674058336, 0.9294006140978470, -0.8775508592745904, + // 0.7472392658861982, -0.3880038279796372, -0.6483706997783654, + // -0.1530785884604509 + VLOAD_64(v24, 0xbfe3556b82731260, 0x3fc5a6ff3fe2c608, 0xbfd8acfaee5fcdc0, + 0xbfed762b3b913c28, 0x3fe67b0770a53a4a, 0xbfefb2aaa9ceeb06, + 0x3fee6f68a5fe3800, 0x3fd1a071594983a8, 0x3fe0dec527d80c9a, + 0x3fec172214450060, 0x3fedbda65b4dd79c, 0xbfec14e58a252770, + 0x3fe7e962522895aa, 0xbfd8d50e01f94d70, 0xbfe4bf73e8e77264, + 0xbfc398144593e6c0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsub.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, -0.7983443408145550, 0.0000000000000000, + // 0.3109421032890554, 0.0000000000000000, 1.1139424835327358, + // 0.0000000000000000, -0.9537510803764819, 0.0000000000000000, + // -0.4045587367936121, 0.0000000000000000, + // -0.0520992221130601, 0.0000000000000000, 0.3258863727237562, + // 0.0000000000000000, -0.7377699634319466 + VCMP_U64(6, v8, 0x0, 0xbfe98c096e57d89c, 0x0, 0x3fd3e679b524dc50, 0x0, + 0x3ff1d2b55a865eb2, 0x0, 0xbfee8520fc57bba8, 0x0, 0xbfd9e44a53f5bc98, + 0x0, 0xbfaaacbfce3ec9c0, 0x0, 0x3fd4db528443e0dc, 0x0, + 0xbfe79bcfc11d2996); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 0.9727, 0.7676, 0.0876, -0.4526, -0.1158, 0.6221, 0.7612, + // -0.7539, 0.3875, -0.2002, 0.2168, -0.1055, -0.4348, 0.9795, + // 0.3650, 0.5171 + VLOAD_16(v4, 0x3bc8, 0x3a24, 0x2d9c, 0xb73e, 0xaf6a, 0x38fa, 0x3a17, 0xba08, + 0x3633, 0xb268, 0x32f0, 0xaec0, 0xb6f5, 0x3bd6, 0x35d7, 0x3823); + double dscalar_16; + // -0.8667 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbaef); + asm volatile("vfsub.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // 1.8398, 1.6348, 0.9541, 0.4141, 0.7510, 1.4883, 1.6279, + // 0.1128, 1.2539, 0.6665, 1.0840, 0.7612, + // 0.4319, 1.8457, 1.2314, 1.3838 + VCMP_U16(7, v2, 0x3f5c, 0x3e8a, 0x3ba2, 0x36a0, 0x3a02, 0x3df4, 0x3e83, + 0x2f38, 0x3d04, 0x3955, 0x3c56, 0x3a17, 0x36e9, 0x3f62, 0x3ced, + 0x3d89); + + VSET(16, e32, m4); + // 0.85933530, -0.31821987, 0.18340160, -0.58902484, + // -0.83326858, -0.98716992, -0.74268776, -0.50486410, + // 0.91496444, -0.46108878, -0.75265163, -0.17853038, + // 0.09909800, -0.22828153, 0.31248060, 0.70940411 + VLOAD_32(v8, 0x3f5bfd66, 0xbea2edb7, 0x3e3bcda1, 0xbf16ca55, 0xbf555117, + 0xbf7cb72b, 0xbf3e20c9, 0xbf013ec6, 0x3f6a3b1c, 0xbeec13d4, + 0xbf40adc7, 0xbe36d0ab, 0x3dcaf3e5, 0xbe69c2a2, 0x3e9ffd75, + 0x3f359b82); + double dscalar_32; + // -0.16449618 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe2871b0); + asm volatile("vfsub.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // 1.02383149, -0.15372369, 0.34789777, -0.42452866, + // -0.66877240, -0.82267374, -0.57819158, + // -0.34036791, 1.07946062, -0.29659259, -0.58815545, + // -0.01403420, 0.26359418, -0.06378534, 0.47697678, + // 0.87390029 + VCMP_U32(8, v4, 0x3f830ce9, 0xbe1d69be, 0x3eb21fa8, 0xbed95bd2, 0xbf2b34ab, + 0xbf529abf, 0xbf14045d, 0xbeae44b4, 0x3f8a2bc4, 0xbe97dafc, + 0xbf16915b, 0xbc65efb0, 0x3e86f5d1, 0xbd82a1e4, 0x3ef4364d, + 0x3f5fb7ee); + + VSET(16, e64, m8); + // -0.5270370833343294, -0.3892108170289901, 0.3278104985181656, + // 0.8978904717616114, 0.2838388271052681, 0.1890152734369528, + // -0.5587120809764872, 0.0329118609761476, 0.2661042157694802, + // 0.4284631655495406, 0.3525175873513684, -0.7218762878034530, + // -0.1902187411555145, 0.0621279131630217, + // -0.3175600204168794, 0.2653267716685161 + VLOAD_64(v16, 0xbfe0dd7cdf9667ce, 0xbfd8e8d47c98e498, 0x3fd4fad8e29af14c, + 0x3fecbb84cc736570, 0x3fd22a6a53f022d0, 0x3fc831a708ed9848, + 0xbfe1e0f82875925c, 0x3fa0d9d2cd160b00, 0x3fd107d9fa03b074, + 0x3fdb6bf0c4e4dbb8, 0x3fd68fa5ed3c17c4, 0xbfe7199c4cfbf578, + 0xbfc85916742cb360, 0x3fafcf3ad6686660, 0xbfd452e7438d4924, + 0x3fd0fb1d23c47348); + double dscalar_64; + // -0.3447987329466446 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd6112eb46d5120); + asm volatile("vfsub.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // -0.1822383503876848, -0.0444120840823454, + // 0.6726092314648102, 1.2426892047082561, 0.6286375600519127, + // 0.5338140063835974, -0.2139133480298425, 0.3777105939227923, + // 0.6109029487161248, 0.7732618984961852, 0.6973163202980130, + // -0.3770775548568084, 0.1545799917911301, 0.4069266461096663, + // 0.0272387125297653, 0.6101255046151608 + VCMP_U64(9, v8, 0xbfc75396157efcf8, 0xbfa6bd2e415c9bc0, 0x3fe58603cb842136, + 0x3ff3e20e13550700, 0x3fe41dcc842eb9f8, 0x3fe115011c720ea2, + 0xbfcb618338fba730, 0x3fd82c690e101280, 0x3fe38c84573880ca, + 0x3fe8be8fbca9166c, 0x3fe6506a50d4b472, 0xbfd82209e58a99d0, + 0x3fc3c946f4adeee0, 0x3fda0b160f3a5dec, 0x3f9be4770e007fc0, + 0x3fe38625ec18e234); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + // 0.9727, 0.7676, 0.0876, -0.4526, -0.1158, 0.6221, 0.7612, + // -0.7539, 0.3875, -0.2002, 0.2168, -0.1055, -0.4348, + // 0.9795, 0.3650, 0.5171 + VLOAD_16(v4, 0x3bc8, 0x3a24, 0x2d9c, 0xb73e, 0xaf6a, 0x38fa, 0x3a17, 0xba08, + 0x3633, 0xb268, 0x32f0, 0xaec0, 0xb6f5, 0x3bd6, 0x35d7, 0x3823); + double dscalar_16; + // -0.8667 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbaef); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsub.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 1.6348, 0.0000, 0.4141, 0.0000, 1.4883, 0.0000, + // 0.1128, 0.0000, 0.6665, 0.0000, 0.7612, 0.0000, 1.8457, + // 0.0000, 1.3838 + VCMP_U16(10, v2, 0x0, 0x3e8a, 0x0, 0x36a0, 0x0, 0x3df4, 0x0, 0x2f38, 0x0, + 0x3955, 0x0, 0x3a17, 0x0, 0x3f62, 0x0, 0x3d89); + + VSET(16, e32, m4); + // 0.85933530, -0.31821987, 0.18340160, -0.58902484, + // -0.83326858, -0.98716992, -0.74268776, -0.50486410, + // 0.91496444, -0.46108878, -0.75265163, -0.17853038, + // 0.09909800, -0.22828153, 0.31248060, 0.70940411 + VLOAD_32(v8, 0x3f5bfd66, 0xbea2edb7, 0x3e3bcda1, 0xbf16ca55, 0xbf555117, + 0xbf7cb72b, 0xbf3e20c9, 0xbf013ec6, 0x3f6a3b1c, 0xbeec13d4, + 0xbf40adc7, 0xbe36d0ab, 0x3dcaf3e5, 0xbe69c2a2, 0x3e9ffd75, + 0x3f359b82); + double dscalar_32; + // -0.16449618 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe2871b0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsub.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, -0.15372369, 0.00000000, -0.42452866, + // 0.00000000, -0.82267374, 0.00000000, -0.34036791, + // 0.00000000, -0.29659259, 0.00000000, -0.01403420, + // 0.00000000, -0.06378534, 0.00000000, 0.87390029 + VCMP_U32(11, v4, 0x0, 0xbe1d69be, 0x0, 0xbed95bd2, 0x0, 0xbf529abf, 0x0, + 0xbeae44b4, 0x0, 0xbe97dafc, 0x0, 0xbc65efb0, 0x0, 0xbd82a1e4, 0x0, + 0x3f5fb7ee); + + VSET(16, e64, m8); + // -0.5270370833343294, -0.3892108170289901, + // 0.3278104985181656, 0.8978904717616114, 0.2838388271052681, + // 0.1890152734369528, -0.5587120809764872, 0.0329118609761476, + // 0.2661042157694802, 0.4284631655495406, 0.3525175873513684, + // -0.7218762878034530, -0.1902187411555145, + // 0.0621279131630217, -0.3175600204168794, 0.2653267716685161 + VLOAD_64(v16, 0xbfe0dd7cdf9667ce, 0xbfd8e8d47c98e498, 0x3fd4fad8e29af14c, + 0x3fecbb84cc736570, 0x3fd22a6a53f022d0, 0x3fc831a708ed9848, + 0xbfe1e0f82875925c, 0x3fa0d9d2cd160b00, 0x3fd107d9fa03b074, + 0x3fdb6bf0c4e4dbb8, 0x3fd68fa5ed3c17c4, 0xbfe7199c4cfbf578, + 0xbfc85916742cb360, 0x3fafcf3ad6686660, 0xbfd452e7438d4924, + 0x3fd0fb1d23c47348); + double dscalar_64; + // -0.3447987329466446 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd6112eb46d5120); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsub.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, -0.0444120840823454, 0.0000000000000000, + // 1.2426892047082561, 0.0000000000000000, 0.5338140063835974, + // 0.0000000000000000, 0.3777105939227923, 0.0000000000000000, + // 0.7732618984961852, 0.0000000000000000, + // -0.3770775548568084, 0.0000000000000000, + // 0.4069266461096663, 0.0000000000000000, 0.6101255046151608 + VCMP_U64(12, v8, 0x0, 0xbfa6bd2e415c9bc0, 0x0, 0x3ff3e20e13550700, 0x0, + 0x3fe115011c720ea2, 0x0, 0x3fd82c690e101280, 0x0, 0x3fe8be8fbca9166c, + 0x0, 0xbfd82209e58a99d0, 0x0, 0x3fda0b160f3a5dec, 0x0, + 0x3fe38625ec18e234); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwadd.c new file mode 100644 index 000000000..53d9bdcb2 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwadd.c @@ -0,0 +1,531 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 10.5312, 79.6250, 12.2891, 89.5000, 62.5938, 53.5625, + // -37.3438, -48.3750, 49.3438, 1.1475, -79.6250, 52.0000, + // -61.0312, 5.1641, 48.7500, -25.6250 + VLOAD_16(v4, 0x4944, 0x54fa, 0x4a25, 0x5598, 0x53d3, 0x52b2, 0xd0ab, 0xd20c, + 0x522b, 0x3c97, 0xd4fa, 0x5280, 0xd3a1, 0x452a, 0x5218, 0xce68); + // 68.1875, + // -7.0742, 20.2656, 72.3125, 88.5625, 36.0000, 96.6250, 70.4375, + // -87.6875, 87.6875, 8.7891, -18.8906, -40.1562, 88.8750, + // -55.5312, 2.1875 + VLOAD_16(v6, 0x5443, 0xc713, 0x4d11, 0x5485, 0x5589, 0x5080, 0x560a, 0x5467, + 0xd57b, 0x557b, 0x4865, 0xccb9, 0xd105, 0x558e, 0xd2f1, 0x4060); + asm volatile("vfwadd.vv v8, v4, v6"); + // 78.71875000, 72.55078125, 32.55468750, 161.81250000, + // 151.15625000, 89.56250000, 59.28125000, 22.06250000, + // -38.34375000, 88.83496094, -70.83593750, 33.10937500, + // -101.18750000, 94.03906250, -6.78125000, -23.43750000 + VCMP_U32(1, v8, 0x429d7000, 0x42911a00, 0x42023800, 0x4321d000, 0x43172800, + 0x42b32000, 0x426d2000, 0x41b08000, 0xc2196000, 0x42b1ab80, + 0xc28dac00, 0x42047000, 0xc2ca6000, 0x42bc1400, 0xc0d90000, + 0xc1bb8000); + + VSET(16, e32, m4); + // -35386.17187500, -52670.69531250, 69391.31250000, + // 3219.84130859, 74596.35156250, -45488.69921875, + // 6598.72949219, 20221.24609375, 75105.62500000, + // 67354.82031250, 32844.19140625, -73470.18750000, + // -28377.67382812, 61998.10937500, 24691.54296875, + // -29873.45507812 + VLOAD_32(v8, 0xc70a3a2c, 0xc74dbeb2, 0x478787a8, 0x45493d76, 0x4791b22d, + 0xc731b0b3, 0x45ce35d6, 0x469dfa7e, 0x4792b0d0, 0x47838d69, + 0x47004c31, 0xc78f7f18, 0xc6ddb359, 0x47722e1c, 0x46c0e716, + 0xc6e962e9); + // -90937.21875000, -72297.07031250, 18867.19531250, + // -516.01525879, -13301.11425781, 85173.41406250, + // -32079.35546875, -23770.60937500, 39295.43359375, + // 38042.19140625, -61343.24218750, 76844.01562500, + // 26642.50390625, 91893.05468750, 88349.72656250, + // 29134.96093750 + VLOAD_32(v12, 0xc7b19c9c, 0xc78d3489, 0x46936664, 0xc40100fa, 0xc64fd475, + 0x47a65ab5, 0xc6fa9eb6, 0xc6b9b538, 0x47197f6f, 0x47149a31, + 0xc76f9f3e, 0x47961602, 0x46d02502, 0x47b37a87, 0x47ac8edd, + 0x46e39dec); + asm volatile("vfwadd.vv v16, v8, v12"); + // -126323.3906250000000000, -124967.7656250000000000, + // 88258.5078125000000000, 2703.8260498046875000, + // 61295.2373046875000000, 39684.7148437500000000, + // -25480.6259765625000000, -3549.3632812500000000, + // 114401.0585937500000000, 105397.0117187500000000, + // -28499.0507812500000000, 3373.8281250000000000, + // -1735.1699218750000000, 153891.1640625000000000, + // 113041.2695312500000000, -738.4941406250000000 + VCMP_U64(2, v16, 0xc0fed73640000000, 0xc0fe827c40000000, 0x40f58c2820000000, + 0x40a51fa6f0000000, 0x40edede798000000, 0x40e36096e0000000, + 0xc0d8e22810000000, 0xc0abbaba00000000, 0x40fbee10f0000000, + 0x40f9bb5030000000, 0xc0dbd4c340000000, 0x40aa5ba800000000, + 0xc09b1cae00000000, 0x4102c91950000000, 0x40fb991450000000, + 0xc08713f400000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 10.5312, 79.6250, 12.2891, 89.5000, 62.5938, 53.5625, + // -37.3438, -48.3750, 49.3438, 1.1475, -79.6250, 52.0000, + // -61.0312, 5.1641, 48.7500, -25.6250 + VLOAD_16(v4, 0x4944, 0x54fa, 0x4a25, 0x5598, 0x53d3, 0x52b2, 0xd0ab, 0xd20c, + 0x522b, 0x3c97, 0xd4fa, 0x5280, 0xd3a1, 0x452a, 0x5218, 0xce68); + // 68.1875, + // -7.0742, 20.2656, 72.3125, 88.5625, 36.0000, 96.6250, 70.4375, + // -87.6875, 87.6875, 8.7891, -18.8906, -40.1562, 88.8750, + // -55.5312, 2.1875 + VLOAD_16(v6, 0x5443, 0xc713, 0x4d11, 0x5485, 0x5589, 0x5080, 0x560a, 0x5467, + 0xd57b, 0x557b, 0x4865, 0xccb9, 0xd105, 0x558e, 0xd2f1, 0x4060); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwadd.vv v8, v4, v6, v0.t"); + // 0.00000000, 72.55078125, 0.00000000, 161.81250000, + // 0.00000000, 89.56250000, 0.00000000, 22.06250000, + // 0.00000000, 88.83496094, 0.00000000, 33.10937500, + // 0.00000000, 94.03906250, 0.00000000, -23.43750000 + VCMP_U32(3, v8, 0x0, 0x42911a00, 0x0, 0x4321d000, 0x0, 0x42b32000, 0x0, + 0x41b08000, 0x0, 0x42b1ab80, 0x0, 0x42047000, 0x0, 0x42bc1400, 0x0, + 0xc1bb8000); + + VSET(16, e32, m4); + // -35386.17187500, -52670.69531250, 69391.31250000, + // 3219.84130859, 74596.35156250, -45488.69921875, + // 6598.72949219, 20221.24609375, 75105.62500000, + // 67354.82031250, 32844.19140625, -73470.18750000, + // -28377.67382812, 61998.10937500, 24691.54296875, + // -29873.45507812 + VLOAD_32(v8, 0xc70a3a2c, 0xc74dbeb2, 0x478787a8, 0x45493d76, 0x4791b22d, + 0xc731b0b3, 0x45ce35d6, 0x469dfa7e, 0x4792b0d0, 0x47838d69, + 0x47004c31, 0xc78f7f18, 0xc6ddb359, 0x47722e1c, 0x46c0e716, + 0xc6e962e9); + // -90937.21875000, -72297.07031250, 18867.19531250, + // -516.01525879, -13301.11425781, 85173.41406250, + // -32079.35546875, -23770.60937500, 39295.43359375, + // 38042.19140625, -61343.24218750, 76844.01562500, + // 26642.50390625, 91893.05468750, 88349.72656250, + // 29134.96093750 + VLOAD_32(v12, 0xc7b19c9c, 0xc78d3489, 0x46936664, 0xc40100fa, 0xc64fd475, + 0x47a65ab5, 0xc6fa9eb6, 0xc6b9b538, 0x47197f6f, 0x47149a31, + 0xc76f9f3e, 0x47961602, 0x46d02502, 0x47b37a87, 0x47ac8edd, + 0x46e39dec); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwadd.vv v16, v8, v12, v0.t"); + // 0.0000000000000000, -124967.7656250000000000, + // 0.0000000000000000, 2703.8260498046875000, + // 0.0000000000000000, 39684.7148437500000000, + // 0.0000000000000000, -3549.3632812500000000, + // 0.0000000000000000, 105397.0117187500000000, + // 0.0000000000000000, 3373.8281250000000000, + // 0.0000000000000000, 153891.1640625000000000, + // 0.0000000000000000, -738.4941406250000000 + VCMP_U64(4, v16, 0x0, 0xc0fe827c40000000, 0x0, 0x40a51fa6f0000000, 0x0, + 0x40e36096e0000000, 0x0, 0xc0abbaba00000000, 0x0, 0x40f9bb5030000000, + 0x0, 0x40aa5ba800000000, 0x0, 0x4102c91950000000, 0x0, + 0xc08713f400000000); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 12.5859 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x4a4b); + // 2.1094, 59.2188, 89.4375, 75.2500, -4.0742, 64.7500, + // -82.9375, -84.9375, -48.0625, + // -6.1016, 29.9688, 10.6172, 52.5938, -8.5000, 70.5000, + // -83.6875 + VLOAD_16(v4, 0x4038, 0x5367, 0x5597, 0x54b4, 0xc413, 0x540c, 0xd52f, 0xd54f, + 0xd202, 0xc61a, 0x4f7e, 0x494f, 0x5293, 0xc840, 0x5468, 0xd53b); + asm volatile("vfwadd.vf v8, v4, %[A]" ::[A] "f"(dscalar_16)); + // 14.69531250, 71.80468750, + // 102.02343750, 87.83593750, 8.51171875, 77.33593750, + // -70.35156250, -72.35156250, + // -35.47656250, 6.48437500, 42.55468750, 23.20312500, 65.17968750, + // 4.08593750, 83.08593750, -71.10156250 + VCMP_U32(5, v8, 0x416b2000, 0x428f9c00, 0x42cc0c00, 0x42afac00, 0x41083000, + 0x429aac00, 0xc28cb400, 0xc290b400, 0xc20de800, 0x40cf8000, + 0x422a3800, 0x41b9a000, 0x42825c00, 0x4082c000, 0x42a62c00, + 0xc28e3400); + + VSET(16, e32, m4); + double dscalar_32; + // -497871.25000000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc8f319e8); + // -568507.00000000, 457995.96875000, 409553.18750000, + // -797866.87500000, 263520.50000000, 290072.90625000, + // 530785.75000000, 226941.43750000, 285558.65625000, + // -987117.31250000, -423428.21875000, -140664.31250000, + // -844580.25000000, 549948.50000000, 304879.87500000, + // -655518.43750000 + VLOAD_32(v8, 0xc90acbb0, 0x48dfa17f, 0x48c7fa26, 0xc942caae, 0x4880ac10, + 0x488da31d, 0x4901961c, 0x485d9f5c, 0x488b6ed5, 0xc970fed5, + 0xc8cec087, 0xc8095e14, 0xc94e3244, 0x490643c8, 0x4894ddfc, + 0xc92009e7); + asm volatile("vfwadd.vf v16, v8, %[A]" ::[A] "f"(dscalar_32)); + // -1066378.2500000000000000, -39875.2812500000000000, + // -88318.0625000000000000, -1295738.1250000000000000, + // -234350.7500000000000000, -207798.3437500000000000, + // 32914.5000000000000000, -270929.8125000000000000, + // -212312.5937500000000000, -1484988.5625000000000000, + // -921299.4687500000000000, -638535.5625000000000000, + // -1342451.5000000000000000, 52077.2500000000000000, + // -192991.3750000000000000, -1153389.6875000000000000 + VCMP_U64(6, v16, 0xc130458a40000000, 0xc0e3786900000000, 0xc0f58fe100000000, + 0xc133c57a20000000, 0xc10c9b7600000000, 0xc1095db2c0000000, + 0x40e0125000000000, 0xc110894740000000, 0xc109eac4c0000000, + 0xc136a8bc90000000, 0xc12c1da6f0000000, 0xc1237c8f20000000, + 0xc1347bf380000000, 0x40e96da800000000, 0xc1078efb00000000, + 0xc131996db0000000); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 12.5859 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x4a4b); + // 2.1094, 59.2188, 89.4375, 75.2500, -4.0742, 64.7500, + // -82.9375, -84.9375, -48.0625, + // -6.1016, 29.9688, 10.6172, 52.5938, -8.5000, 70.5000, + // -83.6875 + VLOAD_16(v4, 0x4038, 0x5367, 0x5597, 0x54b4, 0xc413, 0x540c, 0xd52f, 0xd54f, + 0xd202, 0xc61a, 0x4f7e, 0x494f, 0x5293, 0xc840, 0x5468, 0xd53b); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwadd.vf v8, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.00000000, 71.80468750, 0.00000000, 87.83593750, + // 0.00000000, 77.33593750, 0.00000000, -72.35156250, + // 0.00000000, 6.48437500, 0.00000000, 23.20312500, + // 0.00000000, 4.08593750, 0.00000000, -71.10156250 + VCMP_U32(7, v8, 0x0, 0x428f9c00, 0x0, 0x42afac00, 0x0, 0x429aac00, 0x0, + 0xc290b400, 0x0, 0x40cf8000, 0x0, 0x41b9a000, 0x0, 0x4082c000, 0x0, + 0xc28e3400); + + VSET(16, e32, m4); + double dscalar_32; + // -497871.25000000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc8f319e8); + // -568507.00000000, 457995.96875000, 409553.18750000, + // -797866.87500000, 263520.50000000, 290072.90625000, + // 530785.75000000, 226941.43750000, 285558.65625000, + // -987117.31250000, -423428.21875000, -140664.31250000, + // -844580.25000000, 549948.50000000, 304879.87500000, + // -655518.43750000 + VLOAD_32(v8, 0xc90acbb0, 0x48dfa17f, 0x48c7fa26, 0xc942caae, 0x4880ac10, + 0x488da31d, 0x4901961c, 0x485d9f5c, 0x488b6ed5, 0xc970fed5, + 0xc8cec087, 0xc8095e14, 0xc94e3244, 0x490643c8, 0x4894ddfc, + 0xc92009e7); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwadd.vf v16, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.0000000000000000, -39875.2812500000000000, + // 0.0000000000000000, -1295738.1250000000000000, + // 0.0000000000000000, -207798.3437500000000000, + // 0.0000000000000000, -270929.8125000000000000, + // 0.0000000000000000, -1484988.5625000000000000, + // 0.0000000000000000, -638535.5625000000000000, + // 0.0000000000000000, 52077.2500000000000000, + // 0.0000000000000000, -1153389.6875000000000000 + VCMP_U64(8, v16, 0x0, 0xc0e3786900000000, 0x0, 0xc133c57a20000000, 0x0, + 0xc1095db2c0000000, 0x0, 0xc110894740000000, 0x0, 0xc136a8bc90000000, + 0x0, 0xc1237c8f20000000, 0x0, 0x40e96da800000000, 0x0, + 0xc131996db0000000); +}; + +// Simple random test with similar values +void TEST_CASE5(void) { + VSET(16, e16, m2); + // -4.22890615, 94.35176849, -2.66183305, 81.53971863, + // -30.80995941, -61.45680237, 53.70308304, 26.43629074, + // -50.49792862, 12.57134342, -18.77090454, -0.50017655, + // -33.71574402, 14.64656544, 89.57390594, 94.32437897 + VLOAD_32(v4, 0xc0875333, 0x42bcb41b, 0xc02a5b79, 0x42a31456, 0xc1f67acc, + 0xc275d3c4, 0x4256cff5, 0x41d37d86, 0xc249fde1, 0x41492439, + 0xc1962ad0, 0xbf000b92, 0xc206dcec, 0x416a5855, 0x42b325d7, + 0x42bca615); + // -38.1875, + // -22.7500, 51.3438, 8.8594, 23.0938, 32.6875, 71.7500, 7.6758, + // -12.8047, 98.0625, -8.7734, -73.5625, + // -59.0312, 44.9688, 63.8438, 30.1406 + VLOAD_16(v8, 0xd0c6, 0xcdb0, 0x526b, 0x486e, 0x4dc6, 0x5016, 0x547c, 0x47ad, + 0xca67, 0x5621, 0xc863, 0xd499, 0xd361, 0x519f, 0x53fb, 0x4f89); + asm volatile("vfwadd.wv v12, v4, v8"); + // -42.41640472, 71.60176849, 48.68191528, 90.39909363, + // -7.71620941, -28.76930237, 125.45307922, 34.11207199, + // -63.30261612, 110.63384247, -27.54434204, -74.06267548, + // -92.74699402, 59.61531448, 153.41766357, 124.46500397 + VCMP_U32(9, v12, 0xc229aa66, 0x428f341b, 0x4242ba48, 0x42b4cc56, 0xc0f6eb30, + 0xc1e62788, 0x42fae7fa, 0x420872c3, 0xc27d35e1, 0x42dd4487, + 0xc1dc5ad0, 0xc2942017, 0xc2b97e76, 0x426e7615, 0x43196aec, + 0x42f8ee15); + + VSET(16, e32, m4); + // -55997.9824854041071376, 64501.1750668793683872, + // -29542.1742966430028901, -97235.1376669598394074, + // -76290.1568635256844573, -53719.7602741207738291, + // -71738.9507989753619768, 76087.4621994893532246, + // 88201.1680542646208778, -10676.6526112916035345, + // -87188.1793410585087258, 19855.3190320774738211, + // 17509.5892884960630909, 30185.4023848686192650, + // 54203.3669640090665780, -57884.3948619379953016 + VLOAD_64(v8, 0xc0eb57bf70853aee, 0x40ef7ea59a25db30, 0xc0dcd98b27ad1b60, + 0xc0f7bd3233e24524, 0xc0f2a0228283540c, 0xc0ea3af8542a6497, + 0xc0f183af3678fc84, 0x40f29377652b4ab0, 0x40f58892b059ab28, + 0xc0c4da5388c44d38, 0xc0f54942de94bad2, 0x40d363d46b0584c8, + 0x40d11965b6e718a0, 0x40dd7a59c0ac76c8, 0x40ea776bbe2b4e38, + 0xc0ec438ca2b580c4); + // -73536.78125000, 57454.64062500, -32693.95507812, + // -56205.09375000, 12513.70898438, 20858.82226562, + // 12284.94335938, 61625.35156250, -11893.36132812, + // -46430.30078125, 30247.85937500, -94111.64843750, + // 89016.30468750, -52090.74609375, 72764.65625000, + // -47109.86328125 + VLOAD_32(v16, 0xc78fa064, 0x47606ea4, 0xc6ff6be9, 0xc75b8d18, 0x464386d6, + 0x46a2f5a5, 0x463ff3c6, 0x4770b95a, 0xc639d572, 0xc7355e4d, + 0x46ec4fb8, 0xc7b7cfd3, 0x47addc27, 0xc74b7abf, 0x478e1e54, + 0xc73805dd); + asm volatile("vfwadd.wv v24, v8, v16"); + // -129534.7637354041071376, 121955.8156918793683872, + // -62236.1293747680028901, -153440.2314169598394074, + // -63776.4478791506844573, -32860.9380084957738291, + // -59454.0074396003619768, 137712.8137619893532246, + // 76307.8067261396208778, -57106.9533925416035345, + // -56940.3199660585087258, -74256.3294054225261789, + // 106525.8939759960630909, -21905.3437088813807350, + // 126968.0232140090665780, -104994.2581431879953016 + VCMP_U64(10, v24, 0xc0ff9fec38429d77, 0x40fdc63d0d12ed98, 0xc0ee638423d68db0, + 0xc102bb01d9f12292, 0xc0ef240e5506a818, 0xc0e00b9e042a6497, + 0xc0ed07c03cf1f908, 0x4100cf868295a558, 0x40f2a13ce859ab28, + 0xc0ebe25e8231134e, 0xc0ebcd8a3d2975a4, 0xc0f22105453e9ece, + 0x40fa01de4db9c628, 0xc0d56455ff538938, 0x40feff805f15a71c, + 0xc0f9a224215ac062); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE6(void) { + VSET(16, e16, m2); + // -4.22890615, 94.35176849, -2.66183305, 81.53971863, + // -30.80995941, -61.45680237, 53.70308304, 26.43629074, + // -50.49792862, 12.57134342, -18.77090454, -0.50017655, + // -33.71574402, 14.64656544, 89.57390594, 94.32437897 + VLOAD_32(v4, 0xc0875333, 0x42bcb41b, 0xc02a5b79, 0x42a31456, 0xc1f67acc, + 0xc275d3c4, 0x4256cff5, 0x41d37d86, 0xc249fde1, 0x41492439, + 0xc1962ad0, 0xbf000b92, 0xc206dcec, 0x416a5855, 0x42b325d7, + 0x42bca615); + // -38.1875, + // -22.7500, 51.3438, 8.8594, 23.0938, 32.6875, 71.7500, 7.6758, + // -12.8047, 98.0625, -8.7734, -73.5625, + // -59.0312, 44.9688, 63.8438, 30.1406 + VLOAD_16(v8, 0xd0c6, 0xcdb0, 0x526b, 0x486e, 0x4dc6, 0x5016, 0x547c, 0x47ad, + 0xca67, 0x5621, 0xc863, 0xd499, 0xd361, 0x519f, 0x53fb, 0x4f89); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vfwadd.wv v12, v4, v8, v0.t"); + // 0.00000000, 71.60176849, 0.00000000, 90.39909363, + // 0.00000000, -28.76930237, 0.00000000, 34.11207199, + // 0.00000000, 110.63384247, 0.00000000, -74.06267548, + // 0.00000000, 59.61531448, 0.00000000, 124.46500397 + VCMP_U32(11, v12, 0x0, 0x428f341b, 0x0, 0x42b4cc56, 0x0, 0xc1e62788, 0x0, + 0x420872c3, 0x0, 0x42dd4487, 0x0, 0xc2942017, 0x0, 0x426e7615, 0x0, + 0x42f8ee15); + + VSET(16, e32, m4); + // -55997.9824854041071376, 64501.1750668793683872, + // -29542.1742966430028901, -97235.1376669598394074, + // -76290.1568635256844573, -53719.7602741207738291, + // -71738.9507989753619768, 76087.4621994893532246, + // 88201.1680542646208778, -10676.6526112916035345, + // -87188.1793410585087258, 19855.3190320774738211, + // 17509.5892884960630909, 30185.4023848686192650, + // 54203.3669640090665780, -57884.3948619379953016 + VLOAD_64(v8, 0xc0eb57bf70853aee, 0x40ef7ea59a25db30, 0xc0dcd98b27ad1b60, + 0xc0f7bd3233e24524, 0xc0f2a0228283540c, 0xc0ea3af8542a6497, + 0xc0f183af3678fc84, 0x40f29377652b4ab0, 0x40f58892b059ab28, + 0xc0c4da5388c44d38, 0xc0f54942de94bad2, 0x40d363d46b0584c8, + 0x40d11965b6e718a0, 0x40dd7a59c0ac76c8, 0x40ea776bbe2b4e38, + 0xc0ec438ca2b580c4); + // -73536.78125000, 57454.64062500, -32693.95507812, + // -56205.09375000, 12513.70898438, 20858.82226562, + // 12284.94335938, 61625.35156250, -11893.36132812, + // -46430.30078125, 30247.85937500, -94111.64843750, + // 89016.30468750, -52090.74609375, 72764.65625000, + // -47109.86328125 + VLOAD_32(v16, 0xc78fa064, 0x47606ea4, 0xc6ff6be9, 0xc75b8d18, 0x464386d6, + 0x46a2f5a5, 0x463ff3c6, 0x4770b95a, 0xc639d572, 0xc7355e4d, + 0x46ec4fb8, 0xc7b7cfd3, 0x47addc27, 0xc74b7abf, 0x478e1e54, + 0xc73805dd); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vfwadd.wv v24, v8, v16, v0.t"); + // 0.0000000000000000, 121955.8156918793683872, + // 0.0000000000000000, -153440.2314169598394074, + // 0.0000000000000000, -32860.9380084957738291, + // 0.0000000000000000, 137712.8137619893532246, + // 0.0000000000000000, -57106.9533925416035345, + // 0.0000000000000000, -74256.3294054225261789, + // 0.0000000000000000, -21905.3437088813807350, + // 0.0000000000000000, -104994.2581431879953016 + VCMP_U64(12, v24, 0x0, 0x40fdc63d0d12ed98, 0x0, 0xc102bb01d9f12292, 0x0, + 0xc0e00b9e042a6497, 0x0, 0x4100cf868295a558, 0x0, 0xc0ebe25e8231134e, + 0x0, 0xc0f22105453e9ece, 0x0, 0xc0d56455ff538938, 0x0, + 0xc0f9a224215ac062); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE7(void) { + VSET(16, e16, m2); + double dscalar_16; + // -16.64103889, 69.17821503, 38.24327850, + // -60.26666641, 97.95110321, -47.38455200, 94.12043762, + // -90.39623260, -17.02018356, 28.09982681, + // -85.91639709, 73.60102081, -98.61261749, + // -81.75864410, 40.07990265, 55.56723022 + VLOAD_32(v4, 0xc18520d9, 0x428a5b3f, 0x4218f91e, 0xc2711111, 0x42c3e6f7, + 0xc23d89c8, 0x42bc3daa, 0xc2b4cadf, 0xc1882956, 0x41e0cc72, + 0xc2abd532, 0x429333b9, 0xc2c539a9, 0xc2a3846d, 0x422051d2, + 0x425e44d8); + // 53.8750 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x52bc); + asm volatile("vfwadd.wf v8, v4, %[A]" ::[A] "f"(dscalar_16)); + // 37.23396301, 123.05321503, 92.11827850, -6.39166641, + // 151.82611084, 6.49044800, 147.99543762, + // -36.52123260, 36.85481644, 81.97482300, -32.04139709, + // 127.47602081, -44.73761749, -27.88364410, 93.95490265, + // 109.44223022 + VCMP_U32(13, v8, 0x4214ef94, 0x42f61b3f, 0x42b83c8f, 0xc0cc8888, 0x4317d37c, + 0x40cfb1c0, 0x4313fed5, 0xc21215be, 0x42136b55, 0x42a3f31c, + 0xc2002a64, 0x42fef3b9, 0xc232f352, 0xc1df11b4, 0x42bbe8e9, + 0x42dae26c); + + VSET(16, e32, m4); + double dscalar_32; + // 366783.2934919928666204, -648147.5638866436202079, + // 24949.3815817765425891, -211759.8585660880198702, + // 337740.3714956413023174, -528559.3617047512670979, + // -863948.2704646114725620, -463848.5980863422155380, + // 958859.3069495267700404, -853775.5625991101842374, + // -7020.6864214694360271, 839278.6509590207133442, + // -443325.1460256360005587, 97289.3425237806513906, + // 220009.0786798361223191, 491284.4355713783297688 + VLOAD_64(v8, 0x411662fd2c892a3c, 0xc123c7a720b5c00a, 0x40d85d586bd5f8c0, + 0xc109d97ede57e5ac, 0x41149d317c695a78, 0xc120215eb9315d7b, + 0xc12a5d988a7a566a, 0xc11c4fa26470bf00, 0x412d43169d287d06, + 0xc12a0e1f200cfd96, 0xc0bb6cafb9514280, 0x41299cdd4d4a8032, + 0xc11b0ef49587be8c, 0x40f7c0957afa3740, 0x410adb48a122e4d8, + 0x411dfc51be066c64); + // 572932.37500000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x490be046); + asm volatile("vfwadd.wf v16, v8, %[A]" ::[A] "f"(dscalar_32)); + // 939715.6684919928666204, -75215.1888866436202079, + // 597881.7565817765425891, 361172.5164339119801298, + // 910672.7464956413023174, 44373.0132952487329021, + // -291015.8954646114725620, 109083.7769136577844620, + // 1531791.6819495267700404, -280843.1875991101842374, + // 565911.6885785305639729, 1412211.0259590207133442, + // 129607.2289743639994413, 670221.7175237806513906, + // 792941.4536798361223191, 1064216.8105713783297688 + VCMP_U64(14, v16, 0x412cad875644951e, 0xc0f25cf305ae0050, 0x41223ef3835eafc6, + 0x41160b5210d40d2a, 0x412bcaa17e34ad3c, 0x40e5aaa06cea2850, + 0xc111c31f94f4acd4, 0x40faa1bc6e3d0400, 0x41375f8fae943e83, + 0xc111242cc019fb2c, 0x4121452f608d5d7b, 0x41358c7306a54019, + 0x40ffa473a9e105d0, 0x4124741b6f5f46e8, 0x412832dae848b936, + 0x41303d18cf819b19); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE8(void) { + VSET(16, e16, m2); + double dscalar_16; + // -16.64103889, 69.17821503, 38.24327850, + // -60.26666641, 97.95110321, -47.38455200, 94.12043762, + // -90.39623260, -17.02018356, 28.09982681, + // -85.91639709, 73.60102081, -98.61261749, + // -81.75864410, 40.07990265, 55.56723022 + VLOAD_32(v4, 0xc18520d9, 0x428a5b3f, 0x4218f91e, 0xc2711111, 0x42c3e6f7, + 0xc23d89c8, 0x42bc3daa, 0xc2b4cadf, 0xc1882956, 0x41e0cc72, + 0xc2abd532, 0x429333b9, 0xc2c539a9, 0xc2a3846d, 0x422051d2, + 0x425e44d8); + // 53.8750 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x52bc); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwadd.wf v8, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.00000000, 123.05321503, 0.00000000, -6.39166641, + // 0.00000000, 6.49044800, 0.00000000, -36.52123260, + // 0.00000000, 81.97482300, 0.00000000, 127.47602081, + // 0.00000000, -27.88364410, 0.00000000, 109.44223022 + VCMP_U32(15, v8, 0x0, 0x42f61b3f, 0x0, 0xc0cc8888, 0x0, 0x40cfb1c0, 0x0, + 0xc21215be, 0x0, 0x42a3f31c, 0x0, 0x42fef3b9, 0x0, 0xc1df11b4, 0x0, + 0x42dae26c); + + VSET(16, e32, m4); + double dscalar_32; + // 366783.2934919928666204, -648147.5638866436202079, + // 24949.3815817765425891, -211759.8585660880198702, + // 337740.3714956413023174, -528559.3617047512670979, + // -863948.2704646114725620, -463848.5980863422155380, + // 958859.3069495267700404, -853775.5625991101842374, + // -7020.6864214694360271, 839278.6509590207133442, + // -443325.1460256360005587, 97289.3425237806513906, + // 220009.0786798361223191, 491284.4355713783297688 + VLOAD_64(v8, 0x411662fd2c892a3c, 0xc123c7a720b5c00a, 0x40d85d586bd5f8c0, + 0xc109d97ede57e5ac, 0x41149d317c695a78, 0xc120215eb9315d7b, + 0xc12a5d988a7a566a, 0xc11c4fa26470bf00, 0x412d43169d287d06, + 0xc12a0e1f200cfd96, 0xc0bb6cafb9514280, 0x41299cdd4d4a8032, + 0xc11b0ef49587be8c, 0x40f7c0957afa3740, 0x410adb48a122e4d8, + 0x411dfc51be066c64); + // 572932.37500000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x490be046); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwadd.wf v16, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.0000000000000000, -75215.1888866436202079, + // 0.0000000000000000, 361172.5164339119801298, + // 0.0000000000000000, 44373.0132952487329021, + // 0.0000000000000000, 109083.7769136577844620, + // 0.0000000000000000, -280843.1875991101842374, + // 0.0000000000000000, 1412211.0259590207133442, + // 0.0000000000000000, 670221.7175237806513906, + // 0.0000000000000000, 1064216.8105713783297688 + VCMP_U64(16, v16, 0x0, 0xc0f25cf305ae0050, 0x0, 0x41160b5210d40d2a, 0x0, + 0x40e5aaa06cea2850, 0x0, 0x40faa1bc6e3d0400, 0x0, 0xc111242cc019fb2c, + 0x0, 0x41358c7306a54019, 0x0, 0x4124741b6f5f46e8, 0x0, + 0x41303d18cf819b19); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwcvt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwcvt.c new file mode 100644 index 000000000..195759577 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwcvt.c @@ -0,0 +1,670 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// We assume RNE rounding when not specified by the encoding + +///////////////// +// vfwcvt.xu.f // +///////////////// + +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 56.438, -30.938, -68.438, -32.969, 56.438, + // -5.816, 53.094, -29.875, -93.562, -90.750, -65.875, + // -91.062, 16.281, -77.938, -67.000, -51.844 + VLOAD_16(v4, 0x530e, 0xcfbc, 0xd447, 0xd01f, 0x530e, 0xc5d1, 0x52a3, 0xcf78, + 0xd5d9, 0xd5ac, 0xd41e, 0xd5b1, 0x4c12, 0xd4df, 0xd430, 0xd27b); + asm volatile("vfwcvt.xu.f.v v8, v4"); + // 56, 0, 0, 0, 56, 0, 53, + // 0, 0, 0, 0, 0, 16, 0, + // 0, 0 + VCMP_U32(1, v8, 0x00000038, 0x00000000, 0x00000000, 0x00000000, 0x00000038, + 0x00000000, 0x00000035, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000010, 0x00000000, 0x00000000, + 0x00000000); + + VSET(16, e32, m4); + // -54444.973, 43481.863, 88447.461, 32690.551, + // -37979.809, 68218.094, -43036.512, -38011.395, + // -36599.363, 48418.234, 81414.820, 16330.853, + // 75606.320, -85030.219, 13033.059, 7375.421 + VLOAD_32(v8, 0xc754acf9, 0x4729d9dd, 0x47acbfbb, 0x46ff651a, 0xc7145bcf, + 0x47853d0c, 0xc7281c83, 0xc7147b65, 0xc70ef75d, 0x473d223c, + 0x479f0369, 0x467f2b69, 0x4793ab29, 0xc7a6131c, 0x464ba43c, + 0x45e67b5f); + asm volatile("vfwcvt.xu.f.v v16, v8"); + // 0, 43482, 88447, 32691, + // 0, 68218, 0, 0, 0, + // 48418, 81415, 16331, + // 75606, 0, 13033, + // 7375 + VCMP_U64(2, v16, 0x0000000000000000, 0x000000000000a9da, 0x000000000001597f, + 0x0000000000007fb3, 0x0000000000000000, 0x0000000000010a7a, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x000000000000bd22, 0x0000000000013e07, 0x0000000000003fcb, + 0x0000000000012756, 0x0000000000000000, 0x00000000000032e9, + 0x0000000000001ccf); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -36.375, 56.438, -68.438, -32.969, 56.438, + // -5.816, 53.094, -29.875, -93.562, -90.750, -65.875, + // -91.062, 16.281, -77.938, -67.000, -51.844 + VLOAD_16(v4, 0xd08c, 0x530e, 0xd447, 0xd01f, 0x530e, 0xc5d1, 0x52a3, 0xcf78, + 0xd5d9, 0xd5ac, 0xd41e, 0xd5b1, 0x4c12, 0xd4df, 0xd430, 0xd27b); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.xu.f.v v8, v4, v0.t"); + // 0, 56, 0, 0, 0, 0, 0, 0, + // 0, 0, 0, 0, 0, 0, 0, 0 + VCMP_U32(3, v8, 0x00000000, 0x00000038, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000); + + VSET(16, e32, m4); + // -54444.973, 43481.863, 88447.461, 32690.551, -37979.809, + // 68218.094, -43036.512, -38011.395, -36599.363, 48418.234, + // 81414.820, 16330.853, 75606.320, -85030.219, 13033.059, + // 7375.421 + VLOAD_32(v8, 0xc754acf9, 0x4729d9dd, 0x47acbfbb, 0x46ff651a, 0xc7145bcf, + 0x47853d0c, 0xc7281c83, 0xc7147b65, 0xc70ef75d, 0x473d223c, + 0x479f0369, 0x467f2b69, 0x4793ab29, 0xc7a6131c, 0x464ba43c, + 0x45e67b5f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.xu.f.v v16, v8, v0.t"); + // 0, 43482, 0, 32691, 0, + // 68218, 0, 0, 0, + // 48418, 0, 16331, 0, + // 0, 0, 7375 + VCMP_U64(4, v16, 0x0000000000000000, 0x000000000000a9da, 0x0000000000000000, + 0x0000000000007fb3, 0x0000000000000000, 0x0000000000010a7a, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x000000000000bd22, 0x0000000000000000, 0x0000000000003fcb, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000001ccf); +}; + +//////////////// +// vfwcvt.x.f // +//////////////// + +// Simple random test with similar values +void TEST_CASE3(void) { + VSET(16, e16, m2); + // -55.656, -23.391, 53.094, -0.356, 26.859, -81.938, + // 63.625, -54.594, -36.375, 77.312, 73.188, -79.500, + // -22.047, -30.500, 33.375, -26.281 + VLOAD_16(v4, 0xd2f5, 0xcdd9, 0x52a3, 0xb5b2, 0x4eb7, 0xd51f, 0x53f4, 0xd2d3, + 0xd08c, 0x54d5, 0x5493, 0xd4f8, 0xcd83, 0xcfa0, 0x502c, 0xce92); + asm volatile("vfwcvt.x.f.v v8, v4"); + // -56, -23, 53, 0, 27, -82, + // 64, -55, -36, 77, + // 73, -80, -22, -30, + // 33, -26 + VCMP_U32(5, v8, 0xffffffc8, 0xffffffe9, 0x00000035, 0x00000000, 0x0000001b, + 0xffffffae, 0x00000040, 0xffffffc9, 0xffffffdc, 0x0000004d, + 0x00000049, 0xffffffb0, 0xffffffea, 0xffffffe2, 0x00000021, + 0xffffffe6); + + VSET(16, e32, m4); + // -22345.104, -55208.160, 60155.754, -4924.268, + // -42337.285, -60609.004, 51795.328, 33876.547, + // -99812.922, 59419.867, -78706.844, 72266.555, + // -70664.008, -83501.727, -15981.749, -2004.535 + VLOAD_32(v8, 0xc6ae9235, 0xc757a829, 0x476afbc1, 0xc599e225, 0xc7256149, + 0xc76cc101, 0x474a5354, 0x4704548c, 0xc7c2f276, 0x47681bde, + 0xc799b96c, 0x478d2547, 0xc78a0401, 0xc7a316dd, 0xc679b6ff, + 0xc4fa9120); + asm volatile("vfwcvt.x.f.v v16, v8"); + // -22345, -55208, 60156, -4924, + // -42337, -60609, 51795, 33877, + // -99813, 59420, -78707, 72267, + // -70664, -83502, -15982, -2005 + VCMP_U64(6, v16, 0xffffffffffffa8b7, 0xffffffffffff2858, 0x000000000000eafc, + 0xffffffffffffecc4, 0xffffffffffff5a9f, 0xffffffffffff133f, + 0x000000000000ca53, 0x0000000000008455, 0xfffffffffffe7a1b, + 0x000000000000e81c, 0xfffffffffffecc8d, 0x0000000000011a4b, + 0xfffffffffffeebf8, 0xfffffffffffeb9d2, 0xffffffffffffc192, + 0xfffffffffffff82b); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE4(void) { + VSET(16, e16, m2); + // -55.656, -23.391, 53.094, -0.356, 26.859, + // -81.938, 63.625, -54.594, -36.375, 77.312, 73.188, + // -79.500, -22.047, -30.500, 33.375, -26.281 + VLOAD_16(v4, 0xd2f5, 0xcdd9, 0x52a3, 0xb5b2, 0x4eb7, 0xd51f, 0x53f4, 0xd2d3, + 0xd08c, 0x54d5, 0x5493, 0xd4f8, 0xcd83, 0xcfa0, 0x502c, 0xce92); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.x.f.v v8, v4, v0.t"); + // 0, -23, 0, 0, 0, -82, 0, + // -55, 0, 77, 0, -80, 0, + // -30, 0, -26 + VCMP_U32(7, v8, 0x00000000, 0xffffffe9, 0x00000000, 0x00000000, 0x00000000, + 0xffffffae, 0x00000000, 0xffffffc9, 0x00000000, 0x0000004d, + 0x00000000, 0xffffffb0, 0x00000000, 0xffffffe2, 0x00000000, + 0xffffffe6); + + VSET(16, e32, m4); + // -22345.104, -55208.160, 60155.754, -4924.268, + // -42337.285, -60609.004, 51795.328, 33876.547, + // -99812.922, 59419.867, -78706.844, 72266.555, + // -70664.008, -83501.727, -15981.749, -2004.535 + VLOAD_32(v8, 0xc6ae9235, 0xc757a829, 0x476afbc1, 0xc599e225, 0xc7256149, + 0xc76cc101, 0x474a5354, 0x4704548c, 0xc7c2f276, 0x47681bde, + 0xc799b96c, 0x478d2547, 0xc78a0401, 0xc7a316dd, 0xc679b6ff, + 0xc4fa9120); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.x.f.v v16, v8, v0.t"); + // 0, -55208, 0, -4924, 0, + // -60609, 0, 33877, 0, + // 59420, 0, 72267, 0, + // -83502, 0, -2005 + VCMP_U64(8, v16, 0x0000000000000000, 0xffffffffffff2858, 0x0000000000000000, + 0xffffffffffffecc4, 0x0000000000000000, 0xffffffffffff133f, + 0x0000000000000000, 0x0000000000008455, 0x0000000000000000, + 0x000000000000e81c, 0x0000000000000000, 0x0000000000011a4b, + 0x0000000000000000, 0xfffffffffffeb9d2, 0x0000000000000000, + 0xfffffffffffff82b); +}; + +///////////////////// +// vfwcvt.rtz.xu.f // +///////////////////// + +// Simple random test with similar values +void TEST_CASE5(void) { + VSET(16, e16, m2); + // 26304.000, -31056.000, 6932.000, 63168.000, -10920.000, + // -38528.000, inf, -inf, -1313.000, 52736.000, inf, + // -inf, -61024.000, -inf, -5672.000, 53824.000 + VLOAD_16(v4, 0x766c, 0xf795, 0x6ec5, 0x7bb6, 0xf155, 0xf8b4, 0x7c00, 0xfc00, + 0xe521, 0x7a70, 0x7c00, 0xfc00, 0xfb73, 0xfc00, 0xed8a, 0x7a92); + asm volatile("vfwcvt.rtz.xu.f.v v8, v4"); + // 26304, 0, 6932, 63168, 0, 0, + // 0, 0, 0, 52736, 0, + // 0, 0, 0, 0, + // 53824 + VCMP_U32(9, v8, 0x000066c0, 0x00000000, 0x00001b14, 0x0000f6c0, 0x00000000, + 0x00000000, 0xffffffff, 0x00000000, 0x00000000, 0x0000ce00, + 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0000d240); + + VSET(16, e32, m4); + // -139027333120.000, 783549530112.000, 166903955456.000, + // -848099999744.000, -671032279040.000, -402660294656.000, + // -259808854016.000, 248555126784.000, 712853684224.000, + // -492155797504.000, -448682098688.000, -916605566976.000, + // 67602378752.000, 519669350400.000, 569111478272.000, + // -920229773312.000 + VLOAD_32(v8, 0xd2017ab3, 0x53366f31, 0x521b7101, 0xd34576b3, 0xd31c3ca4, + 0xd2bb80d9, 0xd271f742, 0x52677c29, 0x5325f964, 0xd2e52d8b, + 0xd2d0ef13, 0xd35569f3, 0x517bd6a7, 0x52f1fd6a, 0x530481b0, + 0xd35641f8); + asm volatile("vfwcvt.rtz.xu.f.v v16, v8"); + // 0, 783549530112, 166903955456, + // 0, 0, 0, 0, + // 248555126784, 712853684224, 0, + // 0, 0, 67602378752, + // 519669350400, 569111478272, 0 + VCMP_U64(10, v16, 0x0000000000000000, 0x000000b66f310000, 0x00000026dc404000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x00000039df0a4000, 0x000000a5f9640000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000fbd6a7000, 0x00000078feb50000, 0x0000008481b00000, + 0x0000000000000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE6(void) { + VSET(16, e16, m2); + // 26304.000, -31056.000, 6932.000, 63168.000, -10920.000, + // -38528.000, inf, -inf, -1313.000, 52736.000, inf, + // -inf, -61024.000, -inf, -5672.000, 53824.000 + VLOAD_16(v4, 0x766c, 0xf795, 0x6ec5, 0x7bb6, 0xf155, 0xf8b4, 0x7c00, 0xfc00, + 0xe521, 0x7a70, 0x7c00, 0xfc00, 0xfb73, 0xfc00, 0xed8a, 0x7a92); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.rtz.xu.f.v v8, v4, v0.t"); + // 0, 0, 0, 63168, 0, 0, + // 0, 0, 0, 52736, 0, 0, + // 0, 0, 0, 53824 + VCMP_U32(11, v8, 0x00000000, 0x00000000, 0x00000000, 0x0000f6c0, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ce00, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0000d240); + + VSET(16, e32, m4); + // -139027333120.000, 783549530112.000, 166903955456.000, + // -848099999744.000, -671032279040.000, -402660294656.000, + // -259808854016.000, 248555126784.000, 712853684224.000, + // -492155797504.000, -448682098688.000, -916605566976.000, + // 67602378752.000, 519669350400.000, 569111478272.000, + // -920229773312.000 + VLOAD_32(v8, 0xd2017ab3, 0x53366f31, 0x521b7101, 0xd34576b3, 0xd31c3ca4, + 0xd2bb80d9, 0xd271f742, 0x52677c29, 0x5325f964, 0xd2e52d8b, + 0xd2d0ef13, 0xd35569f3, 0x517bd6a7, 0x52f1fd6a, 0x530481b0, + 0xd35641f8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.rtz.xu.f.v v16, v8, v0.t"); + // 0, 783549530112, 0, 0, 0, 0, 0, + // 248555126784, 0, 0, + // 0, 0, 0, + // 519669350400, 0, 0 + VCMP_U64(12, v16, 0x0000000000000000, 0x000000b66f310000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x00000039df0a4000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x00000078feb50000, 0x0000000000000000, + 0x0000000000000000); +}; + +//////////////////// +// vfwcvt.rtz.x.f // +//////////////////// + +// Simple random test with similar values +void TEST_CASE7(void) { + VSET(16, e16, m2); + // 5.844, 36.219, -86.250, 20.406, -45.688, 13.961, + // -96.562, 81.000, -32.594, 51.281, 80.750, + // -17.750, 14.516, 58.000, 69.938, -94.688 + VLOAD_16(v4, 0x45d8, 0x5087, 0xd564, 0x4d1a, 0xd1b6, 0x4afb, 0xd609, 0x5510, + 0xd013, 0x5269, 0x550c, 0xcc70, 0x4b42, 0x5340, 0x545f, 0xd5eb); + asm volatile("vfwcvt.rtz.x.f.v v8, v4"); + // 5, 36, -86, 20, -45, 13, + // -96, 81, -32, 51, 80, -17, + // 14, 58, 69, -94 + VCMP_U32(13, v8, 0x00000005, 0x00000024, 0xffffffaa, 0x00000014, 0xffffffd3, + 0x0000000d, 0xffffffa0, 0x00000051, 0xffffffe0, 0x00000033, + 0x00000050, 0xffffffef, 0x0000000e, 0x0000003a, 0x00000045, + 0xffffffa2); + + VSET(16, e32, m4); + // 2116.345, -810274979840.000, -5833.340, -6088.383, + // -9260.508, -2389.850, 9361.639, 5574.592, -6825.026, + // 2473.934, -6756.971, -7155.075, 2251.162, -2899.548, + // -3184.759, -1954.714 + VLOAD_32(v8, 0x45044584, 0xd33ca827, 0xc5b64ab9, 0xc5be4311, 0xc610b208, + 0xc5155d98, 0x4612468e, 0x45ae34bd, 0xc5d54835, 0x451a9ef1, + 0xc5d327c5, 0xc5df9899, 0x450cb297, 0xc53538c6, 0xc5470c23, + 0xc4f456dc); + asm volatile("vfwcvt.rtz.x.f.v v16, v8"); + // 2116, -810274979840, -5833, -6088, + // -9260, -2389, 9361, 5574, + // -6825, 2473, -6756, -7155, + // 2251, -2899, -3184, -1954 + VCMP_U64(14, v16, 0x0000000000000844, 0xffffff4357d90000, 0xffffffffffffe937, + 0xffffffffffffe838, 0xffffffffffffdbd4, 0xfffffffffffff6ab, + 0x0000000000002491, 0x00000000000015c6, 0xffffffffffffe557, + 0x00000000000009a9, 0xffffffffffffe59c, 0xffffffffffffe40d, + 0x00000000000008cb, 0xfffffffffffff4ad, 0xfffffffffffff390, + 0xfffffffffffff85e); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE8(void) { + VSET(16, e16, m2); + // 5.844, 36.219, -86.250, 20.406, -45.688, 13.961, + // -96.562, 81.000, -32.594, 51.281, 80.750, + // -17.750, 14.516, 58.000, 69.938, -94.688 + VLOAD_16(v4, 0x45d8, 0x5087, 0xd564, 0x4d1a, 0xd1b6, 0x4afb, 0xd609, 0x5510, + 0xd013, 0x5269, 0x550c, 0xcc70, 0x4b42, 0x5340, 0x545f, 0xd5eb); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.rtz.x.f.v v8, v4, v0.t"); + // 0, 36, 0, 20, 0, 13, 0, + // 81, 0, 51, 0, -17, 0, + // 58, 0, -94 + VCMP_U32(15, v8, 0x00000000, 0x00000024, 0x00000000, 0x00000014, 0x00000000, + 0x0000000d, 0x00000000, 0x00000051, 0x00000000, 0x00000033, + 0x00000000, 0xffffffef, 0x00000000, 0x0000003a, 0x00000000, + 0xffffffa2); + + VSET(16, e32, m4); + // 2116.345, -6652.860, -5833.340, -6088.383, -9260.508, + // -2389.850, 9361.639, 5574.592, -6825.026, 2473.934, + // -6756.971, -7155.075, 2251.162, -2899.548, -3184.759, + // -1954.714 + VLOAD_32(v8, 0x45044584, 0xc5cfe6e1, 0xc5b64ab9, 0xc5be4311, 0xc610b208, + 0xc5155d98, 0x4612468e, 0x45ae34bd, 0xc5d54835, 0x451a9ef1, + 0xc5d327c5, 0xc5df9899, 0x450cb297, 0xc53538c6, 0xc5470c23, + 0xc4f456dc); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.rtz.x.f.v v16, v8, v0.t"); + // 0, -6652, 0, -6088, 0, + // -2389, 0, 5574, 0, + // 2473, 0, -7155, 0, + // -2899, 0, -1954 + VCMP_U64(16, v16, 0x0000000000000000, 0xffffffffffffe604, 0x0000000000000000, + 0xffffffffffffe838, 0x0000000000000000, 0xfffffffffffff6ab, + 0x0000000000000000, 0x00000000000015c6, 0x0000000000000000, + 0x00000000000009a9, 0x0000000000000000, 0xffffffffffffe40d, + 0x0000000000000000, 0xfffffffffffff4ad, 0x0000000000000000, + 0xfffffffffffff85e); +}; + +///////////////// +// vfwcvt.f.xu // +///////////////// + +// Simple random test with similar values +void TEST_CASE9(void) { + VSET(16, e16, m2); + // 64656, 64687, 64823, 970, 543, + // 65038, 65122, 966, 180, 389, 337, + // 341, 65240, 51, 64922, 64676 + VLOAD_16(v4, 0xfc90, 0xfcaf, 0xfd37, 0x03ca, 0x021f, 0xfe0e, 0xfe62, 0x03c6, + 0x00b4, 0x0185, 0x0151, 0x0155, 0xfed8, 0x0033, 0xfd9a, 0xfca4); + asm volatile("vfwcvt.f.xu.v v8, v4"); + // 64656.000, 64687.000, 64823.000, 970.000, + // 543.000, 65038.000, 65122.000, 966.000, + // 180.000, 389.000, 337.000, 341.000, + // 65240.000, 51.000, 64922.000, 64676.000 + VCMP_U32(17, v8, 0x477c9000, 0x477caf00, 0x477d3700, 0x44728000, 0x4407c000, + 0x477e0e00, 0x477e6200, 0x44718000, 0x43340000, 0x43c28000, + 0x43a88000, 0x43aa8000, 0x477ed800, 0x424c0000, 0x477d9a00, + 0x477ca400); + + VSET(16, e32, m4); + // 97144, 4294936082, 42555, + // 4294893205, 55337, 4294948570, + // 4294931792, 4294924170, 4294912208, + // 4294947132, 4294903099, 4294944521, + // 4294923920, 4294889958, 31133, 30359 + VLOAD_32(v8, 0x00017b78, 0xffff8612, 0x0000a63b, 0xfffede95, 0x0000d829, + 0xffffb6da, 0xffff7550, 0xffff578a, 0xffff28d0, 0xffffb13c, + 0xffff053b, 0xffffa709, 0xffff5690, 0xfffed1e6, 0x0000799d, + 0x00007697); + asm volatile("vfwcvt.f.xu.v v16, v8"); + // 97144.000, 4294936082.000, 42555.000, + // 4294893205.000, 55337.000, 4294948570.000, + // 4294931792.000, 4294924170.000, 4294912208.000, + // 4294947132.000, 4294903099.000, 4294944521.000, + // 4294923920.000, 4294889958.000, 31133.000, + // 30359.000 + VCMP_U64(18, v16, 0x40f7b78000000000, 0x41effff0c2400000, 0x40e4c76000000000, + 0x41efffdbd2a00000, 0x40eb052000000000, 0x41effff6db400000, + 0x41efffeeaa000000, 0x41efffeaf1400000, 0x41efffe51a000000, + 0x41effff627800000, 0x41efffe0a7600000, 0x41effff4e1200000, + 0x41efffead2000000, 0x41efffda3cc00000, 0x40de674000000000, + 0x40dda5c000000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE10(void) { + VSET(16, e16, m2); + // 64656, 64687, 64823, 970, 543, + // 65038, 65122, 966, 180, 389, 337, + // 341, 65240, 51, 64922, 64676 + VLOAD_16(v4, 0xfc90, 0xfcaf, 0xfd37, 0x03ca, 0x021f, 0xfe0e, 0xfe62, 0x03c6, + 0x00b4, 0x0185, 0x0151, 0x0155, 0xfed8, 0x0033, 0xfd9a, 0xfca4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.f.xu.v v8, v4, v0.t"); + // 0.000, 64687.000, 0.000, 970.000, 0.000, + // 65038.000, 0.000, 966.000, 0.000, 389.000, + // 0.000, 341.000, 0.000, 51.000, 0.000, + // 64676.000 + VCMP_U32(19, v8, 0x0, 0x477caf00, 0x0, 0x44728000, 0x0, 0x477e0e00, 0x0, + 0x44718000, 0x0, 0x43c28000, 0x0, 0x43aa8000, 0x0, 0x424c0000, 0x0, + 0x477ca400); + + VSET(16, e32, m4); + // 97144, 4294936082, 42555, + // 4294893205, 55337, 4294948570, + // 4294931792, 4294924170, 4294912208, + // 4294947132, 4294903099, 4294944521, + // 4294923920, 4294889958, 31133, 30359 + VLOAD_32(v8, 0x00017b78, 0xffff8612, 0x0000a63b, 0xfffede95, 0x0000d829, + 0xffffb6da, 0xffff7550, 0xffff578a, 0xffff28d0, 0xffffb13c, + 0xffff053b, 0xffffa709, 0xffff5690, 0xfffed1e6, 0x0000799d, + 0x00007697); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.f.xu.v v16, v8, v0.t"); + // 0.000, 4294936082.000, 0.000, 4294893205.000, + // 0.000, 4294948570.000, 0.000, 4294924170.000, + // 0.000, 4294947132.000, 0.000, 4294944521.000, + // 0.000, 4294889958.000, 0.000, 30359.000 + VCMP_U64(20, v16, 0x0, 0x41effff0c2400000, 0x0, 0x41efffdbd2a00000, 0x0, + 0x41effff6db400000, 0x0, 0x41efffeaf1400000, 0x0, 0x41effff627800000, + 0x0, 0x41effff4e1200000, 0x0, 0x41efffda3cc00000, 0x0, + 0x40dda5c000000000); +}; + +//////////////// +// vfwcvt.f.x // +//////////////// + +// Simple random test with similar values +void TEST_CASE11(void) { + VSET(16, e16, m2); + // -263, -943, 111, -140, -792, + // -320, -384, 250, -308, 578, -830, + // -865, 908, 264, 93, 833 + VLOAD_16(v4, 0xfef9, 0xfc51, 0x006f, 0xff74, 0xfce8, 0xfec0, 0xfe80, 0x00fa, + 0xfecc, 0x0242, 0xfcc2, 0xfc9f, 0x038c, 0x0108, 0x005d, 0x0341); + asm volatile("vfwcvt.f.x.v v8, v4"); + // -263.000, -943.000, 111.000, -140.000, + // -792.000, -320.000, -384.000, 250.000, + // -308.000, 578.000, -830.000, -865.000, + // 908.000, 264.000, 93.000, 833.000 + VCMP_U32(21, v8, 0xc3838000, 0xc46bc000, 0x42de0000, 0xc30c0000, 0xc4460000, + 0xc3a00000, 0xc3c00000, 0x437a0000, 0xc39a0000, 0x44108000, + 0xc44f8000, 0xc4584000, 0x44630000, 0x43840000, 0x42ba0000, + 0x44504000); + + VSET(16, e32, m4); + // -85277, 33391, 84804, + // -45155, -68903, 19141, + // -10026, 87992, 13128, + // 95737, -70832, 43360, + // 32471, 51, 50027, + // -57346 + VLOAD_32(v8, 0xfffeb2e3, 0x0000826f, 0x00014b44, 0xffff4f9d, 0xfffef2d9, + 0x00004ac5, 0xffffd8d6, 0x000157b8, 0x00003348, 0x000175f9, + 0xfffeeb50, 0x0000a960, 0x00007ed7, 0x00000033, 0x0000c36b, + 0xffff1ffe); + asm volatile("vfwcvt.f.x.v v16, v8"); + // -85277.000, 33391.000, 84804.000, -45155.000, + // -68903.000, 19141.000, -10026.000, 87992.000, + // 13128.000, 95737.000, -70832.000, 43360.000, + // 32471.000, 51.000, 50027.000, -57346.000 + VCMP_U64(22, v16, 0xc0f4d1d000000000, 0x40e04de000000000, 0x40f4b44000000000, + 0xc0e60c6000000000, 0xc0f0d27000000000, 0x40d2b14000000000, + 0xc0c3950000000000, 0x40f57b8000000000, 0x40c9a40000000000, + 0x40f75f9000000000, 0xc0f14b0000000000, 0x40e52c0000000000, + 0x40dfb5c000000000, 0x4049800000000000, 0x40e86d6000000000, + 0xc0ec004000000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE12(void) { + VSET(16, e16, m2); + // -263, -943, 111, -140, -792, + // -320, -384, 250, -308, 578, -830, + // -865, 908, 264, 93, 833 + VLOAD_16(v4, 0xfef9, 0xfc51, 0x006f, 0xff74, 0xfce8, 0xfec0, 0xfe80, 0x00fa, + 0xfecc, 0x0242, 0xfcc2, 0xfc9f, 0x038c, 0x0108, 0x005d, 0x0341); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.f.x.v v8, v4, v0.t"); + // 0.000, -943.000, 0.000, -140.000, 0.000, + // -320.000, 0.000, 250.000, 0.000, 578.000, + // 0.000, -865.000, 0.000, 264.000, 0.000, + // 833.000 + VCMP_U32(23, v8, 0x0, 0xc46bc000, 0x0, 0xc30c0000, 0x0, 0xc3a00000, 0x0, + 0x437a0000, 0x0, 0x44108000, 0x0, 0xc4584000, 0x0, 0x43840000, 0x0, + 0x44504000); + + VSET(16, e32, m4); + // -85277, 33391, 84804, + // -45155, -68903, 19141, + // -10026, 87992, 13128, + // 95737, -70832, 43360, + // 32471, 51, 50027, + // -57346 + VLOAD_32(v8, 0xfffeb2e3, 0x0000826f, 0x00014b44, 0xffff4f9d, 0xfffef2d9, + 0x00004ac5, 0xffffd8d6, 0x000157b8, 0x00003348, 0x000175f9, + 0xfffeeb50, 0x0000a960, 0x00007ed7, 0x00000033, 0x0000c36b, + 0xffff1ffe); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.f.x.v v16, v8, v0.t"); + // 0.000, 33391.000, 0.000, -45155.000, 0.000, + // 19141.000, 0.000, 87992.000, 0.000, + // 95737.000, 0.000, 43360.000, + // 0.000, 51.000, 0.000, -57346.000 + VCMP_U64(24, v16, 0x0, 0x40e04de000000000, 0x0, 0xc0e60c6000000000, 0x0, + 0x40d2b14000000000, 0x0, 0x40f57b8000000000, 0x0, 0x40f75f9000000000, + 0x0, 0x40e52c0000000000, 0x0, 0x4049800000000000, 0x0, + 0xc0ec004000000000); +}; + +//////////////// +// vfwcvt.f.f // +//////////////// + +// Simple random test with similar values +void TEST_CASE13(void) { + VSET(16, e16, m2); + // 83.312, -83.188, 62.469, 94.812, 10.797, -13.070, + // -9.039, 54.250, -92.188, 63.688, -32.875, -81.688, + // -62.219, -78.250, -29.703, -1.137 + VLOAD_16(v4, 0x5535, 0xd533, 0x53cf, 0x55ed, 0x4966, 0xca89, 0xc885, 0x52c8, + 0xd5c3, 0x53f6, 0xd01c, 0xd51b, 0xd3c7, 0xd4e4, 0xcf6d, 0xbc8c); + asm volatile("vfwcvt.f.f.v v8, v4"); + // 83.312, -83.188, 62.469, 94.812, 10.797, -13.070, + // -9.039, 54.250, -92.188, 63.688, -32.875, -81.688, + // -62.219, -78.250, -29.703, -1.137 + VCMP_U32(25, v8, 0x42a6a000, 0xc2a66000, 0x4279e000, 0x42bda000, 0x412cc000, + 0xc1512000, 0xc110a000, 0x42590000, 0xc2b86000, 0x427ec000, + 0xc2038000, 0xc2a36000, 0xc278e000, 0xc29c8000, 0xc1eda000, + 0xbf918000); + + VSET(16, e32, m4); + // -69280.273, -24625.789, 58970.254, 57986.516, 34031.016, + // 61977.340, -84548.211, 89658.250, 4958.967, -73911.508, + // -83526.188, -59814.750, 71544.742, 93401.383, 79319.078, + // 4639.214 + VLOAD_32(v8, 0xc7875023, 0xc6c06394, 0x47665a41, 0x47628284, 0x4704ef04, + 0x47721957, 0xc7a5221b, 0x47af1d20, 0x459af7bc, 0xc7905bc1, + 0xc7a32318, 0xc769a6c0, 0x478bbc5f, 0x47b66cb1, 0x479aeb8a, + 0x4590f9b7); + asm volatile("vfwcvt.f.f.v v16, v8"); + // -69280.273, -24625.789, 58970.254, 57986.516, 34031.016, + // 61977.340, -84548.211, 89658.250, 4958.967, -73911.508, + // -83526.188, -59814.750, 71544.742, 93401.383, 79319.078, + // 4639.214 + VCMP_U64(26, v16, 0xc0f0ea0460000000, 0xc0d80c7280000000, 0x40eccb4820000000, + 0x40ec505080000000, 0x40e09de080000000, 0x40ee432ae0000000, + 0xc0f4a44360000000, 0x40f5e3a400000000, 0x40b35ef780000000, + 0xc0f20b7820000000, 0xc0f4646300000000, 0xc0ed34d800000000, + 0x40f1778be0000000, 0x40f6cd9620000000, 0x40f35d7140000000, + 0x40b21f36e0000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE14(void) { + VSET(16, e16, m2); + // 83.312, -83.188, 62.469, 94.812, 10.797, -13.070, + // -9.039, 54.250, -92.188, 63.688, -32.875, -81.688, + // -62.219, -78.250, -29.703, -1.137 + VLOAD_16(v4, 0x5535, 0xd533, 0x53cf, 0x55ed, 0x4966, 0xca89, 0xc885, 0x52c8, + 0xd5c3, 0x53f6, 0xd01c, 0xd51b, 0xd3c7, 0xd4e4, 0xcf6d, 0xbc8c); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.f.f.v v8, v4, v0.t"); + // 0.000, -83.188, 0.000, 94.812, 0.000, -13.070, 0.000, + // 54.250, 0.000, 63.688, 0.000, -81.688, 0.000, + // -78.250, 0.000, -1.137 + VCMP_U32(27, v8, 0x0, 0xc2a66000, 0x0, 0x42bda000, 0x0, 0xc1512000, 0x0, + 0x42590000, 0x0, 0x427ec000, 0x0, 0xc2a36000, 0x0, 0xc29c8000, 0x0, + 0xbf918000); + + VSET(16, e32, m4); + // -69280.273, -24625.789, 58970.254, 57986.516, 34031.016, + // 61977.340, -84548.211, 89658.250, 4958.967, -73911.508, + // -83526.188, -59814.750, 71544.742, 93401.383, 79319.078, + // 4639.214 + VLOAD_32(v8, 0xc7875023, 0xc6c06394, 0x47665a41, 0x47628284, 0x4704ef04, + 0x47721957, 0xc7a5221b, 0x47af1d20, 0x459af7bc, 0xc7905bc1, + 0xc7a32318, 0xc769a6c0, 0x478bbc5f, 0x47b66cb1, 0x479aeb8a, + 0x4590f9b7); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.f.f.v v16, v8, v0.t"); + // 0.000, -24625.789, 0.000, 57986.516, 0.000, 61977.340, + // 0.000, 89658.250, 0.000, -73911.508, 0.000, + // -59814.750, 0.000, 93401.383, 0.000, 4639.214 + VCMP_U64(28, v16, 0x0, 0xc0d80c7280000000, 0x0, 0x40ec505080000000, 0x0, + 0x40ee432ae0000000, 0x0, 0x40f5e3a400000000, 0x0, 0xc0f20b7820000000, + 0x0, 0xc0ed34d800000000, 0x0, 0x40f6cd9620000000, 0x0, + 0x40b21f36e0000000); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + + TEST_CASE7(); + TEST_CASE8(); + + TEST_CASE9(); + TEST_CASE10(); + + TEST_CASE11(); + TEST_CASE12(); + + TEST_CASE13(); + TEST_CASE14(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmacc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmacc.c new file mode 100644 index 000000000..4f1e354f6 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmacc.c @@ -0,0 +1,351 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 52.7812, 74.1875, 0.3564, 97.5000, 7.8477, 53.2188, + // -26.4688, -48.1250, -32.9688, 7.8750, + // -38.4375, 51.5625, 46.2188, -75.1875, 23.5625, -77.8125 + VLOAD_16(v4, 0x5299, 0x54a3, 0x35b4, 0x5618, 0x47d9, 0x52a7, 0xce9e, 0xd204, + 0xd01f, 0x47e0, 0xd0ce, 0x5272, 0x51c7, 0xd4b3, 0x4de4, 0xd4dd); + // 49.6875, 28.5312, 13.4766, -90.5625, 58.0000, + // -63.8125, 49.0625, 0.3325, 30.8906, 11.2266, -93.9375, + // -54.6875, 61.7500, 38.3438, 95.8125, 10.0938 + VLOAD_16(v12, 0x5236, 0x4f22, 0x4abd, 0xd5a9, 0x5340, 0xd3fa, 0x5222, 0x3552, + 0x4fb9, 0x499d, 0xd5df, 0xd2d6, 0x53b8, 0x50cb, 0x55fd, 0x490c); + // -83.87223053, -48.34465408, 70.48658752, -1.26614821, + // -24.13150024, -65.13838196, + // 0.84671319, 34.34510040, 72.80049896, + // -86.23424530, 25.52654839, -68.44364929, 9.81109142, + // -85.20966339, -81.00300598, 16.25512505 + VLOAD_32(v8, 0xc2a7be95, 0xc24160ed, 0x428cf922, 0xbfa21125, 0xc1c10d50, + 0xc28246da, 0x3f58c232, 0x42096162, 0x429199db, 0xc2ac77ef, + 0x41cc365f, 0xc288e326, 0x411cfa3b, 0xc2aa6b59, 0xc2a2018a, + 0x41820a7f); + asm volatile("vfwmacc.vv v8, v4, v12"); + // 2538.69604492, 2068.31738281, 75.29024506, -8831.11035156, + // 431.03256226, -3461.15991211, -1297.77636719, 18.34259796, + // -945.62481689, 2.17493439, 3636.24926758, -2888.26782227, + // 2863.81884766, -2968.18041992, 2176.57910156, -769.16479492 + VCMP_U32(1, v8, 0x451eab23, 0x45014514, 0x4296949b, 0xc609fc71, 0x43d7842b, + 0xc558528f, 0xc4a238d8, 0x4192bda4, 0xc46c67fd, 0x400b3220, + 0x456343fd, 0xc5348449, 0x4532fd1a, 0xc53982e3, 0x45080944, + 0xc4404a8c); + + VSET(16, e32, m4); + // -3306.98510742, -33314.88281250, 64578.31250000, + // 11648.08203125, -92704.16406250, 33998.11328125, + // 23406.90429688, 44169.36718750, -1206.53601074, + // 4568.00048828, -89687.13281250, 47865.25781250, + // -72205.21875000, 40772.06640625, 95904.72656250, + // 96043.19531250 + VLOAD_32(v8, 0xc54eafc3, 0xc70222e2, 0x477c4250, 0x46360054, 0xc7b51015, + 0x4704ce1d, 0x46b6ddcf, 0x472c895e, 0xc496d127, 0x458ec001, + 0xc7af2b91, 0x473af942, 0xc78d069c, 0x471f4411, 0x47bb505d, + 0x47bb9599); + // -52385.05468750, -31301.09960938, 1862.59667969, + // 86344.56250000, 9560.06835938, -93766.92187500, + // -68756.87500000, 42627.23046875, -89604.89062500, + // -47420.98437500, -40235.07421875, 44342.39453125, + // 90261.61718750, 76035.55468750, -92912.59375000, + // 40474.20703125 + VLOAD_32(v24, 0xc74ca10e, 0xc6f48a33, 0x44e8d318, 0x47a8a448, 0x46156046, + 0xc7b72376, 0xc7864a70, 0x4726833b, 0xc7af0272, 0xc7393cfc, + 0xc71d2b13, 0x472d3665, 0x47b04acf, 0x479481c7, 0xc7b5784c, + 0x471e1a35); + // -10044.0368110413110116, 13040.9349537673260784, + // 88916.1136409099854063, 79168.4367756713472772, + // 21611.0950133731239475, -26455.6752808090968756, + // 5979.6755084589240141, -99733.4556307629245566, + // 85141.1192070578690618, -87838.0155233480472816, + // 53604.5772563865466509, -30101.3490022116457112, + // 80638.7360704737366177, -75019.8948306038219016, + // 63887.5576457676361315, 1225.3713199536578031 + VLOAD_64(v16, 0xc0c39e04b6396548, 0x40c97877ac90a6f8, 0x40f5b541d179217e, + 0x40f35406fd087c82, 0x40d51ac614b2f890, 0xc0d9d5eb37ccffac, + 0x40b75bacee1f5340, 0xc0f859574a437b9d, 0x40f4c951e845a8f0, + 0xc0f571e03f956903, 0x40ea2c9278e262b4, 0xc0dd6556560d5f50, + 0x40f3afebc6f1d544, 0xc0f250be5139e52e, 0x40ef31f1d83befd8, + 0x4093257c3b4c4540); + asm volatile("vfwmacc.vv v16, v8, v24"); + // 173226551.6662319302558899, 1042805506.3236714601516724, + // 120372266.5559626817703247, 1005827715.3891682624816895, + // -886236534.5412018299102783, -3187924887.6156492233276367, + // -1609379613.2016887664794922, 1882718061.3047757148742676, + // 108196668.3968845903873444, -216706917.7953008711338043, + // 3608622049.7550821304321289, 2122430044.9128692150115967, + // -6517279175.0161266326904297, 3100051665.0599727630615234, + // -8910693010.2487506866455078, 3887273396.3922243118286133 + VCMP_U64(2, v16, 0x41a4a6746f551c5a, 0x41cf13f981296e11, 0x419cb2f0aa394e48, + 0x41cdf9db41b1d044, 0xc1ca6972bb45461a, 0xc1e7c07bf2f3b366, + 0xc1d7fb4bc74ce878, 0x41dc0dffdb538172, 0x4199cbccf19668ea, + 0xc1a9d55ecb9731ad, 0x41eae2e67c3829a2, 0x41dfa06d973a6c73, + 0xc1f8475c9c70420e, 0x41e718e11a21eb4c, 0xc20098f31491fd71, + 0x41ecf662b68c8d1a); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 52.7812, 74.1875, 0.3564, 97.5000, 7.8477, 53.2188, + // -26.4688, -48.1250, -32.9688, 7.8750, + // -38.4375, 51.5625, 46.2188, -75.1875, 23.5625, -77.8125 + VLOAD_16(v4, 0x5299, 0x54a3, 0x35b4, 0x5618, 0x47d9, 0x52a7, 0xce9e, 0xd204, + 0xd01f, 0x47e0, 0xd0ce, 0x5272, 0x51c7, 0xd4b3, 0x4de4, 0xd4dd); + // 49.6875, 28.5312, 13.4766, -90.5625, 58.0000, + // -63.8125, 49.0625, 0.3325, 30.8906, 11.2266, -93.9375, + // -54.6875, 61.7500, 38.3438, 95.8125, 10.0938 + VLOAD_16(v12, 0x5236, 0x4f22, 0x4abd, 0xd5a9, 0x5340, 0xd3fa, 0x5222, 0x3552, + 0x4fb9, 0x499d, 0xd5df, 0xd2d6, 0x53b8, 0x50cb, 0x55fd, 0x490c); + VLOAD_8(v0, 0xAA, 0xAA); + // -83.87223053, -48.34465408, 70.48658752, -1.26614821, + // -24.13150024, -65.13838196, + // 0.84671319, 34.34510040, 72.80049896, + // -86.23424530, 25.52654839, -68.44364929, 9.81109142, + // -85.20966339, -81.00300598, 16.25512505 + VLOAD_32(v8, 0xc2a7be95, 0xc24160ed, 0x428cf922, 0xbfa21125, 0xc1c10d50, + 0xc28246da, 0x3f58c232, 0x42096162, 0x429199db, 0xc2ac77ef, + 0x41cc365f, 0xc288e326, 0x411cfa3b, 0xc2aa6b59, 0xc2a2018a, + 0x41820a7f); + asm volatile("vfwmacc.vv v8, v4, v12, v0.t"); + // -83.87223053, 2068.31738281, 70.48658752, -8831.11035156, + // -24.13150024, -3461.15991211, + // 0.84671319, 18.34259796, 72.80049896, 2.17493439, 25.52654839, + // -2888.26782227, 9.81109142, -2968.18041992, -81.00300598, + // -769.16479492 + VCMP_U32(3, v8, 0xc2a7be95, 0x45014514, 0x428cf922, 0xc609fc71, 0xc1c10d50, + 0xc558528f, 0x3f58c232, 0x4192bda4, 0x429199db, 0x400b3220, + 0x41cc365f, 0xc5348449, 0x411cfa3b, 0xc53982e3, 0xc2a2018a, + 0xc4404a8c); + + VSET(16, e32, m4); + // -3306.98510742, -33314.88281250, 64578.31250000, + // 11648.08203125, -92704.16406250, 33998.11328125, + // 23406.90429688, 44169.36718750, -1206.53601074, + // 4568.00048828, -89687.13281250, 47865.25781250, + // -72205.21875000, 40772.06640625, 95904.72656250, + // 96043.19531250 + VLOAD_32(v8, 0xc54eafc3, 0xc70222e2, 0x477c4250, 0x46360054, 0xc7b51015, + 0x4704ce1d, 0x46b6ddcf, 0x472c895e, 0xc496d127, 0x458ec001, + 0xc7af2b91, 0x473af942, 0xc78d069c, 0x471f4411, 0x47bb505d, + 0x47bb9599); + // -52385.05468750, -31301.09960938, 1862.59667969, + // 86344.56250000, 9560.06835938, -93766.92187500, + // -68756.87500000, 42627.23046875, -89604.89062500, + // -47420.98437500, -40235.07421875, 44342.39453125, + // 90261.61718750, 76035.55468750, -92912.59375000, + // 40474.20703125 + VLOAD_32(v24, 0xc74ca10e, 0xc6f48a33, 0x44e8d318, 0x47a8a448, 0x46156046, + 0xc7b72376, 0xc7864a70, 0x4726833b, 0xc7af0272, 0xc7393cfc, + 0xc71d2b13, 0x472d3665, 0x47b04acf, 0x479481c7, 0xc7b5784c, + 0x471e1a35); + VLOAD_8(v0, 0xAA, 0xAA); + // -10044.0368110413110116, 13040.9349537673260784, + // 88916.1136409099854063, 79168.4367756713472772, + // 21611.0950133731239475, -26455.6752808090968756, + // 5979.6755084589240141, -99733.4556307629245566, + // 85141.1192070578690618, -87838.0155233480472816, + // 53604.5772563865466509, -30101.3490022116457112, + // 80638.7360704737366177, -75019.8948306038219016, + // 63887.5576457676361315, 1225.3713199536578031 + VLOAD_64(v16, 0xc0c39e04b6396548, 0x40c97877ac90a6f8, 0x40f5b541d179217e, + 0x40f35406fd087c82, 0x40d51ac614b2f890, 0xc0d9d5eb37ccffac, + 0x40b75bacee1f5340, 0xc0f859574a437b9d, 0x40f4c951e845a8f0, + 0xc0f571e03f956903, 0x40ea2c9278e262b4, 0xc0dd6556560d5f50, + 0x40f3afebc6f1d544, 0xc0f250be5139e52e, 0x40ef31f1d83befd8, + 0x4093257c3b4c4540); + asm volatile("vfwmacc.vv v16, v8, v24, v0.t"); + // -10044.0368110413110116, 1042805506.3236714601516724, + // 88916.1136409099854063, 1005827715.3891682624816895, + // 21611.0950133731239475, -3187924887.6156492233276367, + // 5979.6755084589240141, 1882718061.3047757148742676, + // 85141.1192070578690618, -216706917.7953008711338043, + // 53604.5772563865466509, 2122430044.9128692150115967, + // 80638.7360704737366177, 3100051665.0599727630615234, + // 63887.5576457676361315, 3887273396.3922243118286133 + VCMP_U64(4, v16, 0xc0c39e04b6396548, 0x41cf13f981296e11, 0x40f5b541d179217e, + 0x41cdf9db41b1d044, 0x40d51ac614b2f890, 0xc1e7c07bf2f3b366, + 0x40b75bacee1f5340, 0x41dc0dffdb538172, 0x40f4c951e845a8f0, + 0xc1a9d55ecb9731ad, 0x40ea2c9278e262b4, 0x41dfa06d973a6c73, + 0x40f3afebc6f1d544, 0x41e718e11a21eb4c, 0x40ef31f1d83befd8, + 0x41ecf662b68c8d1a); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -15.3750, 11.9375, + // -31.7656, 27.0625, 3.0684, 71.2500, 63.2500, -95.6875, + // -62.0625, -27.7344, 55.5312, -62.1875, -42.1875, + // -95.3125, 27.1406, -16.9219 + VLOAD_16(v4, 0xcbb0, 0x49f8, 0xcff1, 0x4ec4, 0x4223, 0x5474, 0x53e8, 0xd5fb, + 0xd3c2, 0xceef, 0x52f1, 0xd3c6, 0xd146, 0xd5f5, 0x4ec9, 0xcc3b); + // 32.2812 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x5009); + // -70.22966003, 68.36327362, -69.75650787, -92.51078796, + // -53.56798553, + // -92.09814453, 92.33961487, 42.48206329, 99.15431976, + // -5.94871950, -55.92549133, 59.99367523, -45.05080032, + // -68.93397522, 55.13935089, -80.23659515 + VLOAD_32(v8, 0xc28c7596, 0x4288b9ff, 0xc28b8355, 0xc2b90586, 0xc256459e, + 0xc2b83240, 0x42b8ade2, 0x4229eda2, 0x42c64f03, 0xc0be5be9, + 0xc25fb3b4, 0x426ff986, 0xc2343405, 0xc289de32, 0x425c8eb2, + 0xc2a07923); + asm volatile("vfwmacc.vf v8, %[A], v4" ::[A] "f"(dscalar_16)); + // -566.55389404, 453.72070312, -1095.19055176, + // 781.10052490, 45.48249054, 2207.94091797, 2134.12866211, + // -3046.42993164, -1904.30078125, -901.24902344, 1736.69262695, + // -1947.49658203, -1406.91601562, -3145.74072266, 931.27264404, + // -626.49584961 + VCMP_U32(5, v8, 0xc40da373, 0x43e2dc40, 0xc488e619, 0x4443466f, 0x4235ee12, + 0x4509ff0e, 0x4505620f, 0xc53e66e1, 0xc4ee09a0, 0xc4614ff0, + 0x44d9162a, 0xc4f36fe4, 0xc4afdd50, 0xc5449bda, 0x4468d173, + 0xc41c9fbc); + + VSET(16, e32, m4); + double dscalar_32; + // -260866.17187500, -221967.43750000, -907157.25000000, + // 754760.87500000, -585546.12500000, 260611.84375000, + // -768453.25000000, -117569.82812500, -469705.78125000, + // 775094.50000000, 533114.81250000, -798136.87500000, + // 66693.82812500, 246179.67187500, 728220.87500000, + // -749270.75000000 + VLOAD_32(v8, 0xc87ec08b, 0xc858c3dc, 0xc95d7954, 0x4938448e, 0xc90ef4a2, + 0x487e80f6, 0xc93b9c54, 0xc7e5a0ea, 0xc8e55939, 0x493d3b68, + 0x490227ad, 0xc942db8e, 0x478242ea, 0x487068eb, 0x4931c9ce, + 0xc936ed6c); + // -164832.20312500 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc820f80d); + // -730249.9193813583115116, -885955.2111547881504521, + // -739704.0702666083816439, 991252.9466537751723081, + // -543412.1947198503185064, 859135.3883249030914158, + // 862259.4763332824222744, 331294.3916525302920491, + // 936699.0835190876387060, -813722.4244660194963217, + // -602138.5575914122164249, 253718.7884360067546368, + // 7255.4825419568223879, 957493.0229552322998643, + // -446793.8022573012858629, -757660.7323241395642981 + VLOAD_64(v16, 0xc1264913d6b92745, 0xc12b09866c1c7afb, 0xc12692f023f9fc22, + 0x412e4029e4afcdba, 0xc120956863b251fa, 0x412a37fec6d2858e, + 0x412a5066f3e1f4bc, 0x41143879910d5c64, 0x412c95f62ac3038c, + 0xc128d534d9539c30, 0xc12260351d7c9f20, 0x410ef8b64eb78980, + 0x40bc577b87dea380, 0x412d386a0bc0c9c8, 0xc11b45273582f020, + 0xc1271f3976f3308b); + asm volatile("vfwmacc.vf v16, %[A], v8" ::[A] "f"(dscalar_32)); + // 42998415581.0217819213867188, 36586495789.9245910644531250, + // 149527988394.2461547851562500, + // -124407906605.8560791015625000, 96516314402.8619232177734375, + // -42956365230.3924331665039062, 126666704455.5427398681640625, + // 19379625085.2629890441894531, 77423575443.0703430175781250, + // -127761347787.4947814941406250, + // -87875091201.5038757324218750, 131558913220.3411712646484375, + // -10993283369.2012958526611328, -40577380186.7228927612304688, + // -120034697981.6674957275390625, 123503190798.8887634277343750 + VCMP_U64(6, v16, 0x422405cf81ba0b27, 0x422109733e5bd964, 0x4241684804551f82, + 0xc23cf74a012ddb28, 0x423678d21522dca7, 0xc22400cc3b5cc8ed, + 0x423d7dec86478af1, 0x42120c7671f50d4d, 0x423206ce01931202, + 0xc23dbf2b74cb7eaa, 0xc23475c37b0180fe, 0x423ea185b4c45757, + 0xc2047a0189499c41, 0xc222e5335eb5721f, 0xc23bf2a022fdaae1, + 0x423cc15d230ee386); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -15.3750, 11.9375, + // -31.7656, 27.0625, 3.0684, 71.2500, 63.2500, -95.6875, + // -62.0625, -27.7344, 55.5312, -62.1875, -42.1875, + // -95.3125, 27.1406, -16.9219 + VLOAD_16(v4, 0xcbb0, 0x49f8, 0xcff1, 0x4ec4, 0x4223, 0x5474, 0x53e8, 0xd5fb, + 0xd3c2, 0xceef, 0x52f1, 0xd3c6, 0xd146, 0xd5f5, 0x4ec9, 0xcc3b); + // 32.2812 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x5009); + VLOAD_8(v0, 0xAA, 0xAA); + // -70.22966003, 68.36327362, -69.75650787, -92.51078796, + // -53.56798553, + // -92.09814453, 92.33961487, 42.48206329, 99.15431976, + // -5.94871950, -55.92549133, 59.99367523, -45.05080032, + // -68.93397522, 55.13935089, -80.23659515 + VLOAD_32(v8, 0xc28c7596, 0x4288b9ff, 0xc28b8355, 0xc2b90586, 0xc256459e, + 0xc2b83240, 0x42b8ade2, 0x4229eda2, 0x42c64f03, 0xc0be5be9, + 0xc25fb3b4, 0x426ff986, 0xc2343405, 0xc289de32, 0x425c8eb2, + 0xc2a07923); + asm volatile("vfwmacc.vf v8, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // -70.22966003, 453.72070312, -69.75650787, 781.10052490, + // -53.56798553, 2207.94091797, 92.33961487, + // -3046.42993164, 99.15431976, -901.24902344, -55.92549133, + // -1947.49658203, -45.05080032, -3145.74072266, 55.13935089, + // -626.49584961 + VCMP_U32(7, v8, 0xc28c7596, 0x43e2dc40, 0xc28b8355, 0x4443466f, 0xc256459e, + 0x4509ff0e, 0x42b8ade2, 0xc53e66e1, 0x42c64f03, 0xc4614ff0, + 0xc25fb3b4, 0xc4f36fe4, 0xc2343405, 0xc5449bda, 0x425c8eb2, + 0xc41c9fbc); + + VSET(16, e32, m4); + double dscalar_32; + // -260866.17187500, -221967.43750000, -907157.25000000, + // 754760.87500000, -585546.12500000, 260611.84375000, + // -768453.25000000, -117569.82812500, -469705.78125000, + // 775094.50000000, 533114.81250000, -798136.87500000, + // 66693.82812500, 246179.67187500, 728220.87500000, + // -749270.75000000 + VLOAD_32(v8, 0xc87ec08b, 0xc858c3dc, 0xc95d7954, 0x4938448e, 0xc90ef4a2, + 0x487e80f6, 0xc93b9c54, 0xc7e5a0ea, 0xc8e55939, 0x493d3b68, + 0x490227ad, 0xc942db8e, 0x478242ea, 0x487068eb, 0x4931c9ce, + 0xc936ed6c); + // -164832.20312500 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc820f80d); + VLOAD_8(v0, 0xAA, 0xAA); + // -730249.9193813583115116, -885955.2111547881504521, + // -739704.0702666083816439, 991252.9466537751723081, + // -543412.1947198503185064, 859135.3883249030914158, + // 862259.4763332824222744, 331294.3916525302920491, + // 936699.0835190876387060, -813722.4244660194963217, + // -602138.5575914122164249, 253718.7884360067546368, + // 7255.4825419568223879, 957493.0229552322998643, + // -446793.8022573012858629, -757660.7323241395642981 + VLOAD_64(v16, 0xc1264913d6b92745, 0xc12b09866c1c7afb, 0xc12692f023f9fc22, + 0x412e4029e4afcdba, 0xc120956863b251fa, 0x412a37fec6d2858e, + 0x412a5066f3e1f4bc, 0x41143879910d5c64, 0x412c95f62ac3038c, + 0xc128d534d9539c30, 0xc12260351d7c9f20, 0x410ef8b64eb78980, + 0x40bc577b87dea380, 0x412d386a0bc0c9c8, 0xc11b45273582f020, + 0xc1271f3976f3308b); + asm volatile("vfwmacc.vf v16, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // -730249.9193813583115116, 36586495789.9245910644531250, + // -739704.0702666083816439, -124407906605.8560791015625000, + // -543412.1947198503185064, -42956365230.3924331665039062, + // 862259.4763332824222744, 19379625085.2629890441894531, + // 936699.0835190876387060, -127761347787.4947814941406250, + // -602138.5575914122164249, 131558913220.3411712646484375, + // 7255.4825419568223879, -40577380186.7228927612304688, + // -446793.8022573012858629, 123503190798.8887634277343750 + VCMP_U64(8, v16, 0xc1264913d6b92745, 0x422109733e5bd964, 0xc12692f023f9fc22, + 0xc23cf74a012ddb28, 0xc120956863b251fa, 0xc22400cc3b5cc8ed, + 0x412a5066f3e1f4bc, 0x42120c7671f50d4d, 0x412c95f62ac3038c, + 0xc23dbf2b74cb7eaa, 0xc12260351d7c9f20, 0x423ea185b4c45757, + 0x40bc577b87dea380, 0xc222e5335eb5721f, 0xc11b45273582f020, + 0x423cc15d230ee386); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmsac.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmsac.c new file mode 100644 index 000000000..15c34d054 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmsac.c @@ -0,0 +1,353 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -93.0000, -55.1250, -68.5625, 76.3125, -61.2188, 48.9375, + // -56.3125, 71.0000, -74.5625, -38.7188, + // -50.3438, 93.3750, 80.2500, -7.4141, 93.8125, 83.1875 + VLOAD_16(v4, 0xd5d0, 0xd2e4, 0xd449, 0x54c5, 0xd3a7, 0x521e, 0xd30a, 0x5470, + 0xd4a9, 0xd0d7, 0xd24b, 0x55d6, 0x5504, 0xc76a, 0x55dd, 0x5533); + // -60.0312, -31.7188, -74.2500, -0.9077, 30.4844, -56.2500, + // -4.8320, 34.2812, 66.6875, 37.9375, 78.1875, 5.6094, + // -81.8125, 67.6250, 29.4531, -64.9375 + VLOAD_16(v12, 0xd381, 0xcfee, 0xd4a4, 0xbb43, 0x4f9f, 0xd308, 0xc4d5, 0x5049, + 0x542b, 0x50be, 0x54e3, 0x459c, 0xd51d, 0x543a, 0x4f5d, 0xd40f); + // 31.29529381, -66.12346649, + // -48.59321213, 21.66906929, 92.08473206, 1.95985305, + // -96.55027771, 77.65225220, -82.48660278, + // -35.32508850, 42.91923141, + // -76.65069580, 25.13817024, 72.89311981, 21.44047737, 69.71634674 + VLOAD_32(v8, 0x41fa5cc3, 0xc2843f37, 0xc2425f73, 0x41ad5a41, 0x42b82b62, + 0x3ffadc77, 0xc2c119be, 0x429b4df4, 0xc2a4f924, 0xc20d4ce4, + 0x422bad4b, 0xc2994d28, 0x41c91af9, 0x4291c947, 0x41ab8619, + 0x428b6ec5); + asm volatile("vfwmsac.vv v8, v4, v12"); + // 5551.61083984, 1814.61950684, 5139.35888672, -90.93905640, + // -1958.30004883, -2754.69433594, 368.65405273, 2356.31640625, + // -4889.89990234, -1433.56750488, -3979.17114258, 600.42608643, + // -6590.59130859, -574.26910400, 2741.63085938, -5471.70458984 + VCMP_U32(1, v8, 0x45ad7ce3, 0x44e2d3d3, 0x45a09adf, 0xc2b5e0cc, 0xc4f4c99a, + 0xc52c2b1c, 0x43b853b8, 0x45134510, 0xc598cf33, 0xc4b33229, + 0xc578b2bd, 0x44161b45, 0xc5cdf4bb, 0xc40f9139, 0x452b5a18, + 0xc5aafda3); + + VSET(16, e32, m4); + // -71423.96093750, -46625.21875000, -59851.39453125, + // -43461.99218750, -10255.72753906, 37671.59765625, + // 96842.05468750, 33293.05859375, 27126.79296875, + // -27343.42187500, 26815.15429688, 28654.72070312, + // -5699.91699219, 70582.03906250, -5936.72802734, + // 43479.90234375 + VLOAD_32(v8, 0xc78b7ffb, 0xc7362138, 0xc769cb65, 0xc729c5fe, 0xc6203ee9, + 0x47132799, 0x47bd2507, 0x47020d0f, 0x46d3ed96, 0xc6d59ed8, + 0x46d17e4f, 0x46dfdd71, 0xc5b21f56, 0x4789db05, 0xc5b985d3, + 0x4729d7e7); + // 93657.15625000, -28369.10156250, -42169.91406250, + // -21377.95507812, 16308.38183594, 64517.84375000, + // -12469.71679688, -76994.53125000, -33687.83593750, + // -84006.54687500, 31506.48437500, 2731.77905273, + // -20272.41992188, 53550.01953125, -85441.62500000, + // -33418.07031250 + VLOAD_32(v24, 0x47b6ec94, 0xc6dda234, 0xc724b9ea, 0xc6a703e9, 0x467ed187, + 0x477c05d8, 0xc642d6de, 0xc7966144, 0xc70397d6, 0xc7a41346, + 0x46f624f8, 0x452abc77, 0xc69e60d7, 0x47512e05, 0xc7a6e0d0, + 0xc7028a12); + // 15054.1952512034331448, -39042.3922682931588497, + // -83554.3539477824524511, 35787.9235785690543707, + // -34715.2784411938628182, 35880.5352577352605294, + // -52433.9701052222590079, -40831.3148960549369804, + // -3569.6808186589187244, 77018.1414445060363505, + // 58906.1301468654128257, -84146.7844421620393405, + // -23969.5482366856886074, 92255.7186088700836990, + // -35519.3091108352309675, -65623.9480113173485734 + VLOAD_64(v16, 0x40cd6718fdfdcea0, 0xc0e3104c8d763c4c, 0xc0f46625a9c52662, + 0x40e1797d8df4a4ac, 0xc0e0f368e8fd81b0, 0x40e1851120d4d47c, + 0xc0e99a3f0b1a1b69, 0xc0e3efea13a0e433, 0xc0abe35c94436520, + 0x40f2cda2435b507a, 0x40ecc3442a29c254, 0xc0f48b2c8d133979, + 0xc0d76863164f52e0, 0x40f685fb7f6c03ba, 0xc0e157e9e43c6805, + 0xc0f0057f2b0dea44); + asm volatile("vfwmsac.vv v16, v8, v24"); + // -6689380123.7125854492187500, 1322754608.4847974777221680, + // 2524011718.2575426101684570, 929092728.6666160821914673, + // -167219605.4339296817779541, 2430454370.8635458946228027, + // -1207540562.0105009078979492, -2563342608.9896693229675293, + // -913839381.3609598875045776, 2296949433.3236432075500488, + // 844792333.7375594377517700, 78362512.5631930530071259, + // 115575080.3336923867464066, 3779577314.6337165832519531, + // 507279209.1484052538871765, -1452948809.7560596466064453 + VCMP_U64(2, v16, 0xc1f8eb7d71bb66c0, 0x41d3b5e88c1f06ec, 0x41e2ce2b98c83dca, + 0x41cbb06a3c5553ad, 0xc1a3ef232ade2c08, 0x41e21bb94c5ba22b, + 0xc1d1fe66d480ac0c, 0xc1e31930221fab5f, 0xc1cb3c0a8aae33ef, + 0x41e11d15572a5b49, 0x41c92d4106de6859, 0x4192aede4240b5ae, + 0x419b8e24a155b375, 0x41ec28f8bc544768, 0x41be3c776925fde3, + 0xc1d5a68f52706348); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -93.0000, -55.1250, -68.5625, 76.3125, -61.2188, 48.9375, + // -56.3125, 71.0000, -74.5625, -38.7188, + // -50.3438, 93.3750, 80.2500, -7.4141, 93.8125, 83.1875 + VLOAD_16(v4, 0xd5d0, 0xd2e4, 0xd449, 0x54c5, 0xd3a7, 0x521e, 0xd30a, 0x5470, + 0xd4a9, 0xd0d7, 0xd24b, 0x55d6, 0x5504, 0xc76a, 0x55dd, 0x5533); + // -60.0312, -31.7188, -74.2500, -0.9077, 30.4844, -56.2500, + // -4.8320, 34.2812, 66.6875, 37.9375, 78.1875, 5.6094, + // -81.8125, 67.6250, 29.4531, -64.9375 + VLOAD_16(v12, 0xd381, 0xcfee, 0xd4a4, 0xbb43, 0x4f9f, 0xd308, 0xc4d5, 0x5049, + 0x542b, 0x50be, 0x54e3, 0x459c, 0xd51d, 0x543a, 0x4f5d, 0xd40f); + VLOAD_8(v0, 0xAA, 0xAA); + // 31.29529381, -66.12346649, + // -48.59321213, 21.66906929, 92.08473206, 1.95985305, + // -96.55027771, 77.65225220, -82.48660278, + // -35.32508850, 42.91923141, + // -76.65069580, 25.13817024, 72.89311981, 21.44047737, 69.71634674 + VLOAD_32(v8, 0x41fa5cc3, 0xc2843f37, 0xc2425f73, 0x41ad5a41, 0x42b82b62, + 0x3ffadc77, 0xc2c119be, 0x429b4df4, 0xc2a4f924, 0xc20d4ce4, + 0x422bad4b, 0xc2994d28, 0x41c91af9, 0x4291c947, 0x41ab8619, + 0x428b6ec5); + asm volatile("vfwmsac.vv v8, v4, v12, v0.t"); + // 31.29529381, 1814.61950684, -48.59321213, + // -90.93905640, 92.08473206, -2754.69433594, -96.55027771, + // 2356.31640625, -82.48660278, -1433.56750488, 42.91923141, + // 600.42608643, 25.13817024, -574.26910400, 21.44047737, + // -5471.70458984 + VCMP_U32(3, v8, 0x41fa5cc3, 0x44e2d3d3, 0xc2425f73, 0xc2b5e0cc, 0x42b82b62, + 0xc52c2b1c, 0xc2c119be, 0x45134510, 0xc2a4f924, 0xc4b33229, + 0x422bad4b, 0x44161b45, 0x41c91af9, 0xc40f9139, 0x41ab8619, + 0xc5aafda3); + + VSET(16, e32, m4); + // -71423.96093750, -46625.21875000, -59851.39453125, + // -43461.99218750, -10255.72753906, 37671.59765625, + // 96842.05468750, 33293.05859375, 27126.79296875, + // -27343.42187500, 26815.15429688, 28654.72070312, + // -5699.91699219, 70582.03906250, -5936.72802734, + // 43479.90234375 + VLOAD_32(v8, 0xc78b7ffb, 0xc7362138, 0xc769cb65, 0xc729c5fe, 0xc6203ee9, + 0x47132799, 0x47bd2507, 0x47020d0f, 0x46d3ed96, 0xc6d59ed8, + 0x46d17e4f, 0x46dfdd71, 0xc5b21f56, 0x4789db05, 0xc5b985d3, + 0x4729d7e7); + // 93657.15625000, -28369.10156250, -42169.91406250, + // -21377.95507812, 16308.38183594, 64517.84375000, + // -12469.71679688, -76994.53125000, -33687.83593750, + // -84006.54687500, 31506.48437500, 2731.77905273, + // -20272.41992188, 53550.01953125, -85441.62500000, + // -33418.07031250 + VLOAD_32(v24, 0x47b6ec94, 0xc6dda234, 0xc724b9ea, 0xc6a703e9, 0x467ed187, + 0x477c05d8, 0xc642d6de, 0xc7966144, 0xc70397d6, 0xc7a41346, + 0x46f624f8, 0x452abc77, 0xc69e60d7, 0x47512e05, 0xc7a6e0d0, + 0xc7028a12); + VLOAD_8(v0, 0xAA, 0xAA); + // 15054.1952512034331448, -39042.3922682931588497, + // -83554.3539477824524511, 35787.9235785690543707, + // -34715.2784411938628182, 35880.5352577352605294, + // -52433.9701052222590079, -40831.3148960549369804, + // -3569.6808186589187244, 77018.1414445060363505, + // 58906.1301468654128257, -84146.7844421620393405, + // -23969.5482366856886074, 92255.7186088700836990, + // -35519.3091108352309675, -65623.9480113173485734 + VLOAD_64(v16, 0x40cd6718fdfdcea0, 0xc0e3104c8d763c4c, 0xc0f46625a9c52662, + 0x40e1797d8df4a4ac, 0xc0e0f368e8fd81b0, 0x40e1851120d4d47c, + 0xc0e99a3f0b1a1b69, 0xc0e3efea13a0e433, 0xc0abe35c94436520, + 0x40f2cda2435b507a, 0x40ecc3442a29c254, 0xc0f48b2c8d133979, + 0xc0d76863164f52e0, 0x40f685fb7f6c03ba, 0xc0e157e9e43c6805, + 0xc0f0057f2b0dea44); + asm volatile("vfwmsac.vv v16, v8, v24, v0.t"); + // 15054.1952512034331448, 1322754608.4847974777221680, + // -83554.3539477824524511, 929092728.6666160821914673, + // -34715.2784411938628182, 2430454370.8635458946228027, + // -52433.9701052222590079, -2563342608.9896693229675293, + // -3569.6808186589187244, 2296949433.3236432075500488, + // 58906.1301468654128257, 78362512.5631930530071259, + // -23969.5482366856886074, 3779577314.6337165832519531, + // -35519.3091108352309675, -1452948809.7560596466064453 + VCMP_U64(4, v16, 0x40cd6718fdfdcea0, 0x41d3b5e88c1f06ec, 0xc0f46625a9c52662, + 0x41cbb06a3c5553ad, 0xc0e0f368e8fd81b0, 0x41e21bb94c5ba22b, + 0xc0e99a3f0b1a1b69, 0xc1e31930221fab5f, 0xc0abe35c94436520, + 0x41e11d15572a5b49, 0x40ecc3442a29c254, 0x4192aede4240b5ae, + 0xc0d76863164f52e0, 0x41ec28f8bc544768, 0xc0e157e9e43c6805, + 0xc1d5a68f52706348); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 8.1562, 2.6836, 56.7188, 38.4688, 33.8125, + // -83.0625, 37.7812, -28.0938, -33.0625, 61.1562, 13.0859, + // -80.5000, 78.3125, -38.0625, 30.0625, -78.6250 + VLOAD_16(v4, 0x4814, 0x415e, 0x5317, 0x50cf, 0x503a, 0xd531, 0x50b9, 0xcf06, + 0xd022, 0x53a5, 0x4a8b, 0xd508, 0x54e5, 0xd0c2, 0x4f84, 0xd4ea); + // 39.8125 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x50fa); + // 56.66989136, 59.83663559, -8.21133614, -19.17305374, + // -93.35797119, -34.25491333, 46.99548721, + // -6.17113161, 55.22229004, 7.96844339, -92.84493256, + // -90.90106201, 78.59468842, -58.67407608, 39.90958405, + // -93.58789825 + VLOAD_32(v8, 0x4262adf8, 0x426f58b7, 0xc10361a2, 0xc199626a, 0xc2bab748, + 0xc2090508, 0x423bfb61, 0xc0c579e9, 0x425ce3a0, 0x40fefd7d, + 0xc2b9b09b, 0xc2b5cd58, 0x429d307b, 0xc26ab241, 0x421fa36a, + 0xc2bb2d01); + asm volatile("vfwmsac.vf v8, %[A], v4" ::[A] "f"(dscalar_16)); + // 268.05081177, 47.00394058, 2266.32666016, 1550.71020508, + // 1439.51806641, -3272.67089844, 1457.17053223, + // -1112.31127930, -1371.52307129, 2426.81469727, 613.82879639, + // -3114.00512695, 3039.22167969, -1456.68920898, + // 1156.95373535, -3036.66992188 + VCMP_U32(5, v8, 0x43860681, 0x423c0409, 0x450da53a, 0x44c1d6ba, 0x44b3f094, + 0xc54c8abc, 0x44b62575, 0xc48b09f6, 0xc4ab70bd, 0x4517ad09, + 0x4419750b, 0xc542a015, 0x453df38c, 0xc4b6160e, 0x44909e85, + 0xc53dcab8); + + VSET(16, e32, m4); + double dscalar_32; + // 580253.06250000, -300331.93750000, 485801.21875000, + // -751037.87500000, -360868.65625000, 893035.68750000, + // 541162.00000000, 417622.93750000, -933287.18750000, + // -790074.12500000, 496987.96875000, 455066.96875000, + // -928285.18750000, 300725.40625000, -645096.93750000, + // 102530.55468750 + VLOAD_32(v8, 0x490da9d1, 0xc892a57e, 0x48ed3527, 0xc9375bde, 0xc8b03495, + 0x495a06bb, 0x49041ea0, 0x48cbeade, 0xc963da73, 0xc940e3a2, + 0x48f2ab7f, 0x48de335f, 0xc962a1d3, 0x4892d6ad, 0xc91d7e8f, + 0x47c84147); + // 670995.56250000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x4923d139); + // 579132.0708449089433998, 521241.3016625398304313, + // 409779.0302067114971578, 454935.4394149139989167, + // -640831.0776052488945425, 262502.9360184965189546, + // -132061.7241549796890467, -523289.4277524493518285, + // 796635.9535234714858234, 170970.3947326899506152, + // -520724.0386287728324533, -616193.5881990450434387, + // 79952.4583538805600256, -869849.3916852036491036, + // 535808.2751473840326071, -306070.6657954099355265 + VLOAD_64(v16, 0x4121ac782445c8ae, 0x411fd06534e7065c, 0x411902cc1eee8218, + 0x411bc45dc1f5fbb4, 0xc1238e7e27bbe00c, 0x4110059bbe7ba1fc, + 0xc1001eedcb11c418, 0xc11ff065b604bcf3, 0x41284fb7e8343a7c, + 0x4104ded328699cd0, 0xc11fc850278e4d10, 0xc122ce032d286cdc, + 0x40f38507556ae0f0, 0xc12a8bb2c88af688, 0x41205a008ce01e30, + 0xc112ae5aa9c6459e); + asm volatile("vfwmsac.vf v16, %[A], v8" ::[A] "f"(dscalar_32)); + // 389346650932.4642944335937500, + // -201521918580.8290100097656250, + // 325970052259.3115844726562500, + // -503943536329.8690795898437500, + // -242140626158.0102844238281250, + // 599222720963.7006835937500000, 363117432655.3491821289062500, + // 280223661150.1425781250000000, + // -626232357986.5589599609375000, + // -530136402891.4650268554687500, + // 333477242371.1773071289062500, 305348532865.1643676757812500, + // -622875321499.4388427734375000, + // 201786282974.1514587402343750, + // -432857718253.1149902343750000, 68797853286.6418762207031250 + VCMP_U64(6, v16, 0x4256a9b79acd1db7, 0xc24775d2393a6a1d, 0x4252f9551128d3f1, + 0xc25d555807b2779f, 0xc24c305a5e770151, 0x4261708ea338766c, + 0x425522df13d3d659, 0x42504fa86f178920, 0xc26239cb6e8c51e3, + 0xc25edba5e2f2ddc3, 0x42536932b980cb59, 0x4251c60c36a04a85, + 0xc26220c864936e0b, 0x42477db329ef1363, 0xc2593215277b475c, + 0x423004abee66a452); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 8.1562, 2.6836, 56.7188, 38.4688, 33.8125, + // -83.0625, 37.7812, -28.0938, -33.0625, 61.1562, 13.0859, + // -80.5000, 78.3125, -38.0625, 30.0625, -78.6250 + VLOAD_16(v4, 0x4814, 0x415e, 0x5317, 0x50cf, 0x503a, 0xd531, 0x50b9, 0xcf06, + 0xd022, 0x53a5, 0x4a8b, 0xd508, 0x54e5, 0xd0c2, 0x4f84, 0xd4ea); + // 39.8125 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x50fa); + VLOAD_8(v0, 0xAA, 0xAA); + // 56.66989136, 59.83663559, -8.21133614, -19.17305374, + // -93.35797119, -34.25491333, 46.99548721, + // -6.17113161, 55.22229004, 7.96844339, -92.84493256, + // -90.90106201, 78.59468842, -58.67407608, 39.90958405, + // -93.58789825 + VLOAD_32(v8, 0x4262adf8, 0x426f58b7, 0xc10361a2, 0xc199626a, 0xc2bab748, + 0xc2090508, 0x423bfb61, 0xc0c579e9, 0x425ce3a0, 0x40fefd7d, + 0xc2b9b09b, 0xc2b5cd58, 0x429d307b, 0xc26ab241, 0x421fa36a, + 0xc2bb2d01); + asm volatile("vfwmsac.vf v8, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // 56.66989136, 47.00394058, -8.21133614, 1550.71020508, + // -93.35797119, -3272.67089844, 46.99548721, + // -1112.31127930, 55.22229004, 2426.81469727, -92.84493256, + // -3114.00512695, 78.59468842, -1456.68920898, 39.90958405, + // -3036.66992188 + VCMP_U32(7, v8, 0x4262adf8, 0x423c0409, 0xc10361a2, 0x44c1d6ba, 0xc2bab748, + 0xc54c8abc, 0x423bfb61, 0xc48b09f6, 0x425ce3a0, 0x4517ad09, + 0xc2b9b09b, 0xc542a015, 0x429d307b, 0xc4b6160e, 0x421fa36a, + 0xc53dcab8); + + VSET(16, e32, m4); + double dscalar_32; + // 580253.06250000, -300331.93750000, 485801.21875000, + // -751037.87500000, -360868.65625000, 893035.68750000, + // 541162.00000000, 417622.93750000, -933287.18750000, + // -790074.12500000, 496987.96875000, 455066.96875000, + // -928285.18750000, 300725.40625000, -645096.93750000, + // 102530.55468750 + VLOAD_32(v8, 0x490da9d1, 0xc892a57e, 0x48ed3527, 0xc9375bde, 0xc8b03495, + 0x495a06bb, 0x49041ea0, 0x48cbeade, 0xc963da73, 0xc940e3a2, + 0x48f2ab7f, 0x48de335f, 0xc962a1d3, 0x4892d6ad, 0xc91d7e8f, + 0x47c84147); + // 670995.56250000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x4923d139); + VLOAD_8(v0, 0xAA, 0xAA); + // 579132.0708449089433998, 521241.3016625398304313, + // 409779.0302067114971578, 454935.4394149139989167, + // -640831.0776052488945425, 262502.9360184965189546, + // -132061.7241549796890467, -523289.4277524493518285, + // 796635.9535234714858234, 170970.3947326899506152, + // -520724.0386287728324533, -616193.5881990450434387, + // 79952.4583538805600256, -869849.3916852036491036, + // 535808.2751473840326071, -306070.6657954099355265 + VLOAD_64(v16, 0x4121ac782445c8ae, 0x411fd06534e7065c, 0x411902cc1eee8218, + 0x411bc45dc1f5fbb4, 0xc1238e7e27bbe00c, 0x4110059bbe7ba1fc, + 0xc1001eedcb11c418, 0xc11ff065b604bcf3, 0x41284fb7e8343a7c, + 0x4104ded328699cd0, 0xc11fc850278e4d10, 0xc122ce032d286cdc, + 0x40f38507556ae0f0, 0xc12a8bb2c88af688, 0x41205a008ce01e30, + 0xc112ae5aa9c6459e); + asm volatile("vfwmsac.vf v16, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // 579132.0708449089433998, -201521918580.8290100097656250, + // 409779.0302067114971578, -503943536329.8690795898437500, + // -640831.0776052488945425, 599222720963.7006835937500000, + // -132061.7241549796890467, 280223661150.1425781250000000, + // 796635.9535234714858234, -530136402891.4650268554687500, + // -520724.0386287728324533, 305348532865.1643676757812500, + // 79952.4583538805600256, 201786282974.1514587402343750, + // 535808.2751473840326071, 68797853286.6418762207031250 + VCMP_U64(8, v16, 0x4121ac782445c8ae, 0xc24775d2393a6a1d, 0x411902cc1eee8218, + 0xc25d555807b2779f, 0xc1238e7e27bbe00c, 0x4261708ea338766c, + 0xc1001eedcb11c418, 0x42504fa86f178920, 0x41284fb7e8343a7c, + 0xc25edba5e2f2ddc3, 0xc11fc850278e4d10, 0x4251c60c36a04a85, + 0x40f38507556ae0f0, 0x42477db329ef1363, 0x41205a008ce01e30, + 0x423004abee66a452); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmul.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmul.c new file mode 100644 index 000000000..3c901db0f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmul.c @@ -0,0 +1,258 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -56.5312, 95.3750, 86.3750, -33.4375, 4.7656, 58.8438, + // -80.4375, -96.4375, 74.3750, -92.7500, -57.2812, -90.0625, + // -93.2500, 40.6875, -32.2812, -36.8125 + VLOAD_16(v4, 0xd311, 0x55f6, 0x5566, 0xd02e, 0x44c4, 0x535b, 0xd507, 0xd607, + 0x54a6, 0xd5cc, 0xd329, 0xd5a1, 0xd5d4, 0x5116, 0xd009, 0xd09a); + // 96.4375, -98.8125, -49.1250, -78.8750, + // -5.9180, 32.8750, 32.8750, -74.8125, + // -10.3750, 39.5938, 43.2812, 15.0547, -31.9062, + // -11.2500, 16.3594, 28.6094 + VLOAD_16(v8, 0x5607, 0xd62d, 0xd224, 0xd4ee, 0xc5eb, 0x501c, 0x501c, 0xd4ad, + 0xc930, 0x50f3, 0x5169, 0x4b87, 0xcffa, 0xc9a0, 0x4c17, 0x4f27); + asm volatile("vfwmul.vv v12, v4, v8"); + // -5451.73242188, -9424.24218750, -4243.17187500, 2637.38281250, + // -28.20281982, 1934.48828125, -2644.38281250, 7214.73046875, + // -771.64062500, -3672.32031250, -2479.20410156, -1355.86279297, + // 2975.25781250, -457.73437500, -528.10107422, -1053.18261719 + VCMP_U32(1, v12, 0xc5aa5ddc, 0xc61340f8, 0xc5849960, 0x4524d620, 0xc1e19f60, + 0x44f1cfa0, 0xc5254620, 0x45e175d8, 0xc440e900, 0xc5658520, + 0xc51af344, 0xc4a97b9c, 0x4539f420, 0xc3e4de00, 0xc4040678, + 0xc483a5d8); + + VSET(16, e32, m4); + // -89875.40625000, 87678.49218750, -37342.58593750, + // -47507.81640625, -80717.72656250, 2230.02978516, + // -68805.99218750, 79032.60156250, -43338.95703125, + // 42250.94531250, -6447.03955078, 25544.21679688, + // 5945.30566406, -47409.30468750, -43415.17187500, + // 92669.35156250 + VLOAD_32(v8, 0xc7af89b4, 0x47ab3f3f, 0xc711de96, 0xc73993d1, 0xc79da6dd, + 0x450b607a, 0xc78662ff, 0x479a5c4d, 0xc7294af5, 0x47250af2, + 0xc5c97851, 0x46c7906f, 0x45b9ca72, 0xc739314e, 0xc729972c, + 0x47b4fead); + // 99630.39843750, 37076.73437500, -66118.01562500, + // -99829.85156250, -78879.75000000, 75633.85937500, + // -90564.15625000, -84653.48437500, 34630.80859375, + // 85817.48437500, -23627.74023438, -79522.11718750, + // 51590.63671875, 7574.55957031, -93117.57812500, + // 28056.31054688 + VLOAD_32(v16, 0x47c29733, 0x4710d4bc, 0xc7812302, 0xc7c2faed, 0xc79a0fe0, + 0x4793b8ee, 0xc7b0e214, 0xc7a556be, 0x470746cf, 0x47a79cbe, + 0xc6b8977b, 0xc79b510f, 0x474986a3, 0x45ecb47a, 0xc7b5deca, + 0x46db309f); + asm volatile("vfwmul.vv v24, v8, v16"); + // -8954322534.4196777343750000, 3250832165.2364501953125000, + // 2469017680.4935302734375000, 4742698259.8944396972656250, + // 6366994091.8183593750000000, 168665759.1725692749023438, + // 6231356627.4050292968750000, -6690385101.4866943359375000, + // -1500863125.6019744873046875, 3625869839.1844482421875000, + // 152328975.7866010665893555, -2031330201.5839996337890625, + // 306722104.6965751647949219, -359104602.5425643920898438, + // 4042715658.8806152343750000, 2599960105.6150360107421875 + VCMP_U64(2, v24, 0xc200adc0f3335b80, 0x41e8387864a79100, 0x41e265470a0fcb00, + 0x41f1aafd513e4fa0, 0x41f7b809eabd1800, 0x41a41b453e585b00, + 0x41f736af4d367b00, 0xc1f8ec72ccd7c980, 0xc1d65d56a56686c0, + 0x41eb03cc41e5e700, 0x41a228b61f92bd60, 0xc1de44e8e6656040, + 0x41b2483538b252c0, 0xc1b567805a8ae580, 0x41ee1ede415c2e00, + 0x41e35f07c533ae60); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -56.5312, 95.3750, 86.3750, -33.4375, 4.7656, 58.8438, + // -80.4375, -96.4375, 74.3750, -92.7500, -57.2812, -90.0625, + // -93.2500, 40.6875, -32.2812, -36.8125 + VLOAD_16(v4, 0xd311, 0x55f6, 0x5566, 0xd02e, 0x44c4, 0x535b, 0xd507, 0xd607, + 0x54a6, 0xd5cc, 0xd329, 0xd5a1, 0xd5d4, 0x5116, 0xd009, 0xd09a); + // 96.4375, -98.8125, -49.1250, -78.8750, + // -5.9180, 32.8750, 32.8750, -74.8125, + // -10.3750, 39.5938, 43.2812, 15.0547, -31.9062, + // -11.2500, 16.3594, 28.6094 + VLOAD_16(v8, 0x5607, 0xd62d, 0xd224, 0xd4ee, 0xc5eb, 0x501c, 0x501c, 0xd4ad, + 0xc930, 0x50f3, 0x5169, 0x4b87, 0xcffa, 0xc9a0, 0x4c17, 0x4f27); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vfwmul.vv v12, v4, v8, v0.t"); + // 0.00000000, -9424.24218750, 0.00000000, 2637.38281250, + // 0.00000000, 1934.48828125, 0.00000000, 7214.73046875, + // 0.00000000, -3672.32031250, 0.00000000, -1355.86279297, + // 0.00000000, -457.73437500, 0.00000000, -1053.18261719 + VCMP_U32(3, v12, 0x0, 0xc61340f8, 0x0, 0x4524d620, 0x0, 0x44f1cfa0, 0x0, + 0x45e175d8, 0x0, 0xc5658520, 0x0, 0xc4a97b9c, 0x0, 0xc3e4de00, 0x0, + 0xc483a5d8); + + VSET(16, e32, m4); + // -89875.40625000, 87678.49218750, -37342.58593750, + // -47507.81640625, -80717.72656250, 2230.02978516, + // -68805.99218750, 79032.60156250, -43338.95703125, + // 42250.94531250, -6447.03955078, 25544.21679688, + // 5945.30566406, -47409.30468750, -43415.17187500, + // 92669.35156250 + VLOAD_32(v8, 0xc7af89b4, 0x47ab3f3f, 0xc711de96, 0xc73993d1, 0xc79da6dd, + 0x450b607a, 0xc78662ff, 0x479a5c4d, 0xc7294af5, 0x47250af2, + 0xc5c97851, 0x46c7906f, 0x45b9ca72, 0xc739314e, 0xc729972c, + 0x47b4fead); + // 99630.39843750, 37076.73437500, -66118.01562500, + // -99829.85156250, -78879.75000000, 75633.85937500, + // -90564.15625000, -84653.48437500, 34630.80859375, + // 85817.48437500, -23627.74023438, -79522.11718750, + // 51590.63671875, 7574.55957031, -93117.57812500, + // 28056.31054688 + VLOAD_32(v16, 0x47c29733, 0x4710d4bc, 0xc7812302, 0xc7c2faed, 0xc79a0fe0, + 0x4793b8ee, 0xc7b0e214, 0xc7a556be, 0x470746cf, 0x47a79cbe, + 0xc6b8977b, 0xc79b510f, 0x474986a3, 0x45ecb47a, 0xc7b5deca, + 0x46db309f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vfwmul.vv v24, v8, v16, v0.t"); + // 0.0000000000000000, 3250832165.2364501953125000, + // 0.0000000000000000, 4742698259.8944396972656250, + // 0.0000000000000000, 168665759.1725692749023438, + // 0.0000000000000000, -6690385101.4866943359375000, + // 0.0000000000000000, 3625869839.1844482421875000, + // 0.0000000000000000, -2031330201.5839996337890625, + // 0.0000000000000000, -359104602.5425643920898438, + // 0.0000000000000000, 2599960105.6150360107421875 + VCMP_U64(4, v24, 0x0, 0x41e8387864a79100, 0x0, 0x41f1aafd513e4fa0, 0x0, + 0x41a41b453e585b00, 0x0, 0xc1f8ec72ccd7c980, 0x0, 0x41eb03cc41e5e700, + 0x0, 0xc1de44e8e6656040, 0x0, 0xc1b567805a8ae580, 0x0, + 0x41e35f07c533ae60); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -44.4062, -27.0781, -21.6562, 75.5625, -84.5000, + // -1.0713, 72.5625, -84.6250, 83.9375, -52.3438, + // -40.5625, 1.6523, 79.6875, -36.2812, 33.5938, -72.4375 + VLOAD_16(v4, 0xd18d, 0xcec5, 0xcd6a, 0x54b9, 0xd548, 0xbc49, 0x5489, 0xd54a, + 0x553f, 0xd28b, 0xd112, 0x3e9c, 0x54fb, 0xd089, 0x5033, 0xd487); + // -58.9688 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xd35f); + asm volatile("vfwmul.vf v8, v4, %[A]" ::[A] "f"(dscalar_16)); + // 2618.58105469, 1596.76318359, 1277.04199219, + // -4455.82617188, 4982.85937500, 63.17257690, -4278.91992188, + // 4990.23046875, -4949.68945312, 3086.64550781, 2391.91992188, + // -97.43664551, -4699.07226562, 2139.45996094, -1980.98144531, + // 4271.54882812 + VCMP_U32(5, v8, 0x4523a94c, 0x44c7986c, 0x449fa158, 0xc58b3e9c, 0x459bb6e0, + 0x427cb0b8, 0xc585b75c, 0x459bf1d8, 0xc59aad84, 0x4540ea54, + 0x45157eb8, 0xc2c2df90, 0xc592d894, 0x4505b75c, 0xc4f79f68, + 0x45857c64); + + VSET(16, e32, m4); + double dscalar_32; + // -187018.20312500, -714032.18750000, -891429.25000000, + // -378265.00000000, 211566.90625000, 231934.78125000, + // 947047.75000000, -241945.03125000, -489658.75000000, + // -788001.68750000, -817411.37500000, -522168.21875000, + // -668021.56250000, 744069.12500000, -620354.68750000, + // 913454.68750000 + VLOAD_32(v8, 0xc836a28d, 0xc92e5303, 0xc959a254, 0xc8b8b320, 0x484e9bba, + 0x48627fb2, 0x4967367c, 0xc86c4642, 0xc8ef1758, 0xc940621b, + 0xc9479036, 0xc8fef707, 0xc9231759, 0x4935a852, 0xc917742b, + 0x495f02eb); + // -50557.21484375 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc7457d37); + asm volatile("vfwmul.vf v16, v8, %[A]" ::[A] "f"(dscalar_32)); + // 9455119475.0827026367187500, 36099478708.7902832031250000, + // 45068180110.2529296875000000, 19124024872.8710937500000000, + // -10696233533.1087646484375000, -11725976565.3944091796875000, + // -47880096564.0400390625000000, 12232066925.2840576171875000, + // 24755782623.8720703125000000, 39839170612.1750488281250000, + // 41326042501.6000976562500000, 26399370819.9219970703125000, + // 33773309655.5700683593750000, -37618062611.2260742187500000, + // 31363405215.2648925781250000, -46181724885.9680175781250000 + VCMP_U64(6, v16, 0x42019c8d6398a960, 0x4220cf64a96994a0, 0x4224fc8bb51c8180, + 0x4211cf85d8a37c00, 0xc203ec5c91e8dec0, 0xc205d7619fab27c0, + 0xc2264bc096681480, 0x4206c8b43b6a45c0, 0x42170e3d0f7f7d00, + 0x42228d33006859a0, 0x42233e72bb0b3340, 0x42189619b90fb020, + 0x421f742f3b5e47c0, 0xc221846c2a2673c0, 0x421d359e567d0f40, + 0xc225814a65abefa0); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -44.4062, -27.0781, -21.6562, 75.5625, -84.5000, + // -1.0713, 72.5625, -84.6250, 83.9375, -52.3438, + // -40.5625, 1.6523, 79.6875, -36.2812, 33.5938, -72.4375 + VLOAD_16(v4, 0xd18d, 0xcec5, 0xcd6a, 0x54b9, 0xd548, 0xbc49, 0x5489, 0xd54a, + 0x553f, 0xd28b, 0xd112, 0x3e9c, 0x54fb, 0xd089, 0x5033, 0xd487); + // -58.9688 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xd35f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwmul.vf v8, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.00000000, 1596.76318359, 0.00000000, -4455.82617188, + // 0.00000000, 63.17257690, 0.00000000, 4990.23046875, + // 0.00000000, 3086.64550781, 0.00000000, -97.43664551, + // 0.00000000, 2139.45996094, 0.00000000, 4271.54882812 + VCMP_U32(7, v8, 0x0, 0x44c7986c, 0x0, 0xc58b3e9c, 0x0, 0x427cb0b8, 0x0, + 0x459bf1d8, 0x0, 0x4540ea54, 0x0, 0xc2c2df90, 0x0, 0x4505b75c, 0x0, + 0x45857c64); + + VSET(16, e32, m4); + double dscalar_32; + // -187018.20312500, -714032.18750000, -891429.25000000, + // -378265.00000000, 211566.90625000, 231934.78125000, + // 947047.75000000, -241945.03125000, -489658.75000000, + // -788001.68750000, -817411.37500000, -522168.21875000, + // -668021.56250000, 744069.12500000, -620354.68750000, + // 913454.68750000 + VLOAD_32(v8, 0xc836a28d, 0xc92e5303, 0xc959a254, 0xc8b8b320, 0x484e9bba, + 0x48627fb2, 0x4967367c, 0xc86c4642, 0xc8ef1758, 0xc940621b, + 0xc9479036, 0xc8fef707, 0xc9231759, 0x4935a852, 0xc917742b, + 0x495f02eb); + // -50557.21484375 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc7457d37); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwmul.vf v16, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.0000000000000000, 36099478708.7902832031250000, + // 0.0000000000000000, 19124024872.8710937500000000, + // 0.0000000000000000, -11725976565.3944091796875000, + // 0.0000000000000000, 12232066925.2840576171875000, + // 0.0000000000000000, 39839170612.1750488281250000, + // 0.0000000000000000, 26399370819.9219970703125000, + // 0.0000000000000000, -37618062611.2260742187500000, + // 0.0000000000000000, -46181724885.9680175781250000 + VCMP_U64(8, v16, 0x0, 0x4220cf64a96994a0, 0x0, 0x4211cf85d8a37c00, 0x0, + 0xc205d7619fab27c0, 0x0, 0x4206c8b43b6a45c0, 0x0, 0x42228d33006859a0, + 0x0, 0x42189619b90fb020, 0x0, 0xc221846c2a2673c0, 0x0, + 0xc225814a65abefa0); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmacc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmacc.c new file mode 100644 index 000000000..b036328fb --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmacc.c @@ -0,0 +1,352 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 33.9375, 31.7344, -56.0000, -62.0625, 77.6875, -7.7383, + // -75.3750, 4.1953, 79.5625, -87.3750, + // -37.2188, 90.5000, 68.0625, 69.0625, 54.0312, -64.6875 + VLOAD_16(v4, 0x503e, 0x4fef, 0xd300, 0xd3c2, 0x54db, 0xc7bd, 0xd4b6, 0x4432, + 0x54f9, 0xd576, 0xd0a7, 0x55a8, 0x5441, 0x5451, 0x52c1, 0xd40b); + // -92.3125, -75.9375, 26.1094, + // -79.6875, 3.9375, 37.2812, 50.7812, -3.9375, -55.9688, + // -31.5312, 76.0000, 69.1875, -8.2578, -52.5000, + // -98.4375, 40.3438 + VLOAD_16(v12, 0xd5c5, 0xd4bf, 0x4e87, 0xd4fb, 0x43e0, 0x50a9, 0x5259, 0xc3e0, + 0xd2ff, 0xcfe2, 0x54c0, 0x5453, 0xc821, 0xd290, 0xd627, 0x510b); + // 75.62483215, 29.19676971, 69.45310211, -70.36167145, + // -0.92180759, -77.84928131, 86.66299438, -43.34124756, + // -3.36894345, 7.33576536, -64.43717194, -80.48993683, + // -5.57641745, 89.34833527, -39.19780731, -55.64332581 + VLOAD_32(v8, 0x42973fea, 0x41e992fc, 0x428ae7fd, 0xc28cb92d, 0xbf6bfb95, + 0xc29bb2d5, 0x42ad5374, 0xc22d5d70, 0xc0579cc5, 0x40eabe97, + 0xc280dfd5, 0xc2a0fad9, 0xc0b27203, 0x42b2b259, 0xc21cca8e, + 0xc25e92c4); + asm volatile("vfwnmacc.vv v8, v4, v12"); + // 3057.23071289, 2380.63232422, 1392.67187500, + // -4875.24365234, -304.97271729, 366.34207153, + // 3740.97363281, 59.86029053, 4456.38281250, -2762.37866211, + // 2893.06225586, -6180.97900391, 567.62377930, 3536.43286133, + // 5357.89892578, 2665.37963867 + VCMP_U32(1, v8, 0x453f13b1, 0x4514ca1e, 0x44ae1580, 0xc59859f3, 0xc3987c82, + 0x43b72bc9, 0x4569cf94, 0x426f70f0, 0x458b4310, 0xc52ca60f, + 0x4534d0ff, 0xc5c127d5, 0x440de7ec, 0x455d06ed, 0x45a76f31, + 0x45269613); + + VSET(16, e32, m4); + // 24686.12304688, 45012.43359375, 5708.16113281, + // -32777.98828125, 74121.31250000, -74877.15625000, + // -60082.02734375, 46400.20312500, -45509.65234375, + // -63994.57031250, -8693.70019531, 57683.04296875, + // 70424.14843750, 90967.72656250, 16158.18359375, + // -90782.41406250 + VLOAD_32(v8, 0x46c0dc3f, 0x472fd46f, 0x45b2614a, 0xc70009fd, 0x4790c4a8, + 0xc7923e94, 0xc76ab207, 0x47354034, 0xc731c5a7, 0xc779fa92, + 0xc607d6cd, 0x4761530b, 0x47898c13, 0x47b1abdd, 0x467c78bc, + 0xc7b14f35); + // -87108.13281250, -7857.04492188, -40309.92968750, + // -77418.73437500, 28954.62109375, -28385.13085938, + // 42368.34375000, -32644.74804688, 89327.02343750, + // -91567.60156250, -25929.78515625, -88250.83593750, + // -49992.60156250, 34217.12500000, 49765.98046875, + // 8088.22802734 + VLOAD_32(v24, 0xc7aa2211, 0xc5f5885c, 0xc71d75ee, 0xc797355e, 0x46e2353e, + 0xc6ddc243, 0x47258058, 0xc6ff097f, 0x47ae7783, 0xc7b2d7cd, + 0xc6ca9392, 0xc7ac5d6b, 0xc743489a, 0x4705a920, 0x474265fb, + 0x45fcc1d3); + // -95159.7034957902651513, -25746.0480722606444033, + // -89272.3172746254567755, -57390.5516721799431252, + // 98139.9797031646012329, -66607.6782029465102823, + // 67788.7602550606534351, -90593.7788542728667380, + // -68056.0128839309472824, -37127.9738570771587547, + // 21060.8546093095501419, -76483.1707763712329324, + // 83261.7813338561973069, -99608.0446094776270911, + // 32602.1877863906847779, 52037.0826651407405734 + VLOAD_64(v16, 0xc0f73b7b4184cd41, 0xc0d92483139dacd4, 0xc0f5cb85138e8ec3, + 0xc0ec05d1a74c6a5f, 0x40f7f5bfacdd39bc, 0xc0f042fad9eb5535, + 0x40f08ccc2a0135e2, 0xc0f61e1c762fe5e5, 0xc0f09d8034c5c7e1, + 0xc0e220ff29d6512c, 0x40d49136b1eb3ed8, 0xc0f2ac32bb800116, + 0x40f453dc8057edfa, 0xc0f85180b6b86d78, 0x40dfd68c04b135a8, + 0x40e968a2a5315d80); + asm volatile("vfwnmacc.vv v16, v8, v24"); + // 2150457244.6964006423950195, 353690458.8370813727378845, + // 230184846.2258668541908264, -2537572977.5412845611572266, + // -2146252658.3886387348175049, -2125331270.8559157848358154, + // 2545508198.9366445541381836, 1514813534.1183013916015625, + // 4065309837.5555171966552734, -5859792188.5645341873168945, + // -225446839.1319110989570618, 5090653244.5816955566406250, + // 3520603131.4329605102539062, -3112554462.7102732658386230, + // -804160451.3248255252838135, 734216828.7275727987289429 + VCMP_U64(2, v16, 0x41e005abf39648ea, 0x41b514e35ad64af7, 0x41ab70af1c73a4d2, + 0xc1e2e8094e315234, 0xc1dffb4ddc98df75, 0xc1dfab7ed1b6c753, + 0x41e2f72becddf8fe, 0x41d6928e17879240, 0x41ee49f691b1c6cc, + 0xc1f5d45553c90855, 0xc1aae0176e4389da, 0x41f2f6d343c94ea0, + 0x41ea3b047f6ddad0, 0xc1e730b9fbd6ba8f, 0xc1c7f741e1a993e2, + 0x41c5e1a13e5d211b); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 33.9375, 31.7344, -56.0000, -62.0625, 77.6875, -7.7383, + // -75.3750, 4.1953, 79.5625, -87.3750, + // -37.2188, 90.5000, 68.0625, 69.0625, 54.0312, -64.6875 + VLOAD_16(v4, 0x503e, 0x4fef, 0xd300, 0xd3c2, 0x54db, 0xc7bd, 0xd4b6, 0x4432, + 0x54f9, 0xd576, 0xd0a7, 0x55a8, 0x5441, 0x5451, 0x52c1, 0xd40b); + // -92.3125, -75.9375, 26.1094, + // -79.6875, 3.9375, 37.2812, 50.7812, -3.9375, -55.9688, + // -31.5312, 76.0000, 69.1875, -8.2578, -52.5000, + // -98.4375, 40.3438 + VLOAD_16(v12, 0xd5c5, 0xd4bf, 0x4e87, 0xd4fb, 0x43e0, 0x50a9, 0x5259, 0xc3e0, + 0xd2ff, 0xcfe2, 0x54c0, 0x5453, 0xc821, 0xd290, 0xd627, 0x510b); + VLOAD_8(v0, 0xAA, 0xAA); + // 75.62483215, 29.19676971, 69.45310211, -70.36167145, + // -0.92180759, -77.84928131, 86.66299438, -43.34124756, + // -3.36894345, 7.33576536, -64.43717194, -80.48993683, + // -5.57641745, 89.34833527, -39.19780731, -55.64332581 + VLOAD_32(v8, 0x42973fea, 0x41e992fc, 0x428ae7fd, 0xc28cb92d, 0xbf6bfb95, + 0xc29bb2d5, 0x42ad5374, 0xc22d5d70, 0xc0579cc5, 0x40eabe97, + 0xc280dfd5, 0xc2a0fad9, 0xc0b27203, 0x42b2b259, 0xc21cca8e, + 0xc25e92c4); + asm volatile("vfwnmacc.vv v8, v4, v12, v0.t"); + // 75.62483215, 2380.63232422, 69.45310211, -4875.24365234, + // -0.92180759, 366.34207153, 86.66299438, 59.86029053, + // -3.36894345, -2762.37866211, -64.43717194, -6180.97900391, + // -5.57641745, 3536.43286133, -39.19780731, 2665.37963867 + VCMP_U32(3, v8, 0x42973fea, 0x4514ca1e, 0x428ae7fd, 0xc59859f3, 0xbf6bfb95, + 0x43b72bc9, 0x42ad5374, 0x426f70f0, 0xc0579cc5, 0xc52ca60f, + 0xc280dfd5, 0xc5c127d5, 0xc0b27203, 0x455d06ed, 0xc21cca8e, + 0x45269613); + + VSET(16, e32, m4); + // 24686.12304688, 45012.43359375, 5708.16113281, + // -32777.98828125, 74121.31250000, -74877.15625000, + // -60082.02734375, 46400.20312500, -45509.65234375, + // -63994.57031250, -8693.70019531, 57683.04296875, + // 70424.14843750, 90967.72656250, 16158.18359375, + // -90782.41406250 + VLOAD_32(v8, 0x46c0dc3f, 0x472fd46f, 0x45b2614a, 0xc70009fd, 0x4790c4a8, + 0xc7923e94, 0xc76ab207, 0x47354034, 0xc731c5a7, 0xc779fa92, + 0xc607d6cd, 0x4761530b, 0x47898c13, 0x47b1abdd, 0x467c78bc, + 0xc7b14f35); + // -87108.13281250, -7857.04492188, -40309.92968750, + // -77418.73437500, 28954.62109375, -28385.13085938, + // 42368.34375000, -32644.74804688, 89327.02343750, + // -91567.60156250, -25929.78515625, -88250.83593750, + // -49992.60156250, 34217.12500000, 49765.98046875, + // 8088.22802734 + VLOAD_32(v24, 0xc7aa2211, 0xc5f5885c, 0xc71d75ee, 0xc797355e, 0x46e2353e, + 0xc6ddc243, 0x47258058, 0xc6ff097f, 0x47ae7783, 0xc7b2d7cd, + 0xc6ca9392, 0xc7ac5d6b, 0xc743489a, 0x4705a920, 0x474265fb, + 0x45fcc1d3); + VLOAD_8(v0, 0xAA, 0xAA); + // -95159.7034957902651513, -25746.0480722606444033, + // -89272.3172746254567755, -57390.5516721799431252, + // 98139.9797031646012329, -66607.6782029465102823, + // 67788.7602550606534351, -90593.7788542728667380, + // -68056.0128839309472824, -37127.9738570771587547, + // 21060.8546093095501419, -76483.1707763712329324, + // 83261.7813338561973069, -99608.0446094776270911, + // 32602.1877863906847779, 52037.0826651407405734 + VLOAD_64(v16, 0xc0f73b7b4184cd41, 0xc0d92483139dacd4, 0xc0f5cb85138e8ec3, + 0xc0ec05d1a74c6a5f, 0x40f7f5bfacdd39bc, 0xc0f042fad9eb5535, + 0x40f08ccc2a0135e2, 0xc0f61e1c762fe5e5, 0xc0f09d8034c5c7e1, + 0xc0e220ff29d6512c, 0x40d49136b1eb3ed8, 0xc0f2ac32bb800116, + 0x40f453dc8057edfa, 0xc0f85180b6b86d78, 0x40dfd68c04b135a8, + 0x40e968a2a5315d80); + asm volatile("vfwnmacc.vv v16, v8, v24, v0.t"); + // -95159.7034957902651513, 353690458.8370813727378845, + // -89272.3172746254567755, -2537572977.5412845611572266, + // 98139.9797031646012329, -2125331270.8559157848358154, + // 67788.7602550606534351, 1514813534.1183013916015625, + // -68056.0128839309472824, -5859792188.5645341873168945, + // 21060.8546093095501419, 5090653244.5816955566406250, + // 83261.7813338561973069, -3112554462.7102732658386230, + // 32602.1877863906847779, 734216828.7275727987289429 + VCMP_U64(4, v16, 0xc0f73b7b4184cd41, 0x41b514e35ad64af7, 0xc0f5cb85138e8ec3, + 0xc1e2e8094e315234, 0x40f7f5bfacdd39bc, 0xc1dfab7ed1b6c753, + 0x40f08ccc2a0135e2, 0x41d6928e17879240, 0xc0f09d8034c5c7e1, + 0xc1f5d45553c90855, 0x40d49136b1eb3ed8, 0x41f2f6d343c94ea0, + 0x40f453dc8057edfa, 0xc1e730b9fbd6ba8f, 0x40dfd68c04b135a8, + 0x41c5e1a13e5d211b); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 85.2500, -7.6602, -81.8125, -37.2500, + // -48.0000, 14.9531, 25.9844, 96.1875, 46.5000, + // -77.4375, 45.5312, -68.7500, 58.8438, -70.5625, -45.9375, + // -90.5000 + VLOAD_16(v4, 0x5554, 0xc7a9, 0xd51d, 0xd0a8, 0xd200, 0x4b7a, 0x4e7f, 0x5603, + 0x51d0, 0xd4d7, 0x51b1, 0xd44c, 0x535b, 0xd469, 0xd1be, 0xd5a8); + // -47.9375 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xd1fe); + // 6.19335365, -81.26284790, + // -77.74858093, 15.38204670, 27.37081337, 48.81098938, + // -82.18785095, 3.87765026, -34.03960037, + // -92.34690857, 46.98464203, 28.09385681, 58.44809723, 57.04935455, + // -44.62148285, 83.52678680 + VLOAD_32(v8, 0x40c62ff4, 0xc2a28694, 0xc29b7f46, 0x41761cdd, 0x41daf76d, + 0x42433e74, 0xc2a4602e, 0x40782b6c, 0xc208288d, 0xc2b8b19e, + 0x423bf046, 0x41e0c038, 0x4269cada, 0x4264328a, 0xc2327c66, + 0x42a70db7); + asm volatile("vfwnmacc.vf v8, %[A], v4" ::[A] "f"(dscalar_16)); + // 4080.47851562, -285.94589233, -3844.13818359, -1801.05395508, + // -2328.37084961, 668.00445557, 1327.81384277, 4607.11083984, + // 2263.13330078, -3619.81323242, 2135.66967773, + // -3323.79687500, 2762.37426758, -3439.63916016, + // -2157.50732422, -4421.87060547 + VCMP_U32(5, v8, 0x457f07a8, 0xc38ef913, 0xc5704236, 0xc4e121ba, 0xc51185ef, + 0x44270049, 0x44a5fa0b, 0x458ff8e3, 0x450d7222, 0xc5623d03, + 0x45057ab7, 0xc54fbcc0, 0x452ca5fd, 0xc556fa3a, 0xc506d81e, + 0xc58a2ef7); + + VSET(16, e32, m4); + double dscalar_32; + // 415907.75000000, 16644.92773438, -320087.15625000, + // -560497.81250000, 51200.66406250, 175961.67187500, + // -62272.61328125, -40134.65234375, 67972.27343750, + // 832511.06250000, -279323.15625000, -48243.37500000, + // 685093.43750000, 272952.25000000, 518086.00000000, + // -349626.18750000 + VLOAD_32(v8, 0x48cb1478, 0x468209db, 0xc89c4ae5, 0xc908d71d, 0x474800aa, + 0x482bd66b, 0xc773409d, 0xc71cc6a7, 0x4784c223, 0x494b3ff1, + 0xc8886365, 0xc73c7360, 0x49274257, 0x48854708, 0x48fcf8c0, + 0xc8aab746); + // -648299.93750000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc91e46bf); + // -3761.4446916116867214, 251037.4171459318604320, + // -832277.7590174797223881, -817938.7112226528115571, + // -640813.8540152770001441, -87111.6097838475834578, + // -748981.3983645441476256, 259451.7965630525723100, + // -469164.5467169298790395, -204901.8221613015048206, + // 97767.4262132318690419, -208046.9794710964197293, + // -303699.6372622016351670, -710697.5104083393234760, + // -907884.7086961114546284, -326406.2730544115183875 + VLOAD_64(v16, 0xc0ad62e3ae9e7200, 0x410ea4eb56509b38, 0xc129662b849df069, + 0xc128f6256c256024, 0xc1238e5bb5417d8a, 0xc0f54479c1acb530, + 0xc126db6acbf67002, 0x410fabde5f5c7320, 0xc11ca2b22fd69018, + 0xc109032e93c94df0, 0x40f7de76d1c4f740, 0xc1096577d5f4f134, + 0xc112894e8c8e766c, 0xc125b05305543dea, 0xc12bb4d96ada377b, + 0xc113ec19179b935e); + asm volatile("vfwnmacc.vf v16, %[A], v8" ::[A] "f"(dscalar_32)); + // 269632972092.2103271484375000, 10790654572.4701824188232422, + // -207511651113.6687316894531250, + // -363369878873.9254760742187500, 33194028125.5312614440917969, + // 114076027990.5677947998046875, -40370582316.7976837158203125, + // -26019552057.8339157104492188, 44066889785.8108749389648438, + // 539717074688.6307373046875000, + // -181085282506.6039428710937500, + // -31275968950.3095932006835938, 444146336412.5474243164062500, + // 176955637312.9947814941406250, 335876029304.3336791992187500, + // -226662309098.3402404785156250 + VCMP_U64(6, v16, 0x424f63b0529e1aec, 0x420419629363c2ef, 0xc24828544194d599, + 0xc25526a215567b3b, 0x421eea12b1762003, 0x423a8f760c56915b, + 0xc222cc8c6659986a, 0xc2183b8b6ce755ee, 0x4224852ec0739f2b, + 0x425f6a693fc0285e, 0xc24514c310654d4e, 0xc21d20c5a6d93d06, + 0x4259da4bd0a72309, 0x424499b05f207f55, 0x42538cf15ede155b, + 0xc24a63102e752b8d); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 85.2500, -7.6602, -81.8125, -37.2500, + // -48.0000, 14.9531, 25.9844, 96.1875, 46.5000, + // -77.4375, 45.5312, -68.7500, 58.8438, -70.5625, -45.9375, + // -90.5000 + VLOAD_16(v4, 0x5554, 0xc7a9, 0xd51d, 0xd0a8, 0xd200, 0x4b7a, 0x4e7f, 0x5603, + 0x51d0, 0xd4d7, 0x51b1, 0xd44c, 0x535b, 0xd469, 0xd1be, 0xd5a8); + // -47.9375 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xd1fe); + VLOAD_8(v0, 0xAA, 0xAA); + // 6.19335365, -81.26284790, + // -77.74858093, 15.38204670, 27.37081337, 48.81098938, + // -82.18785095, 3.87765026, -34.03960037, + // -92.34690857, 46.98464203, 28.09385681, 58.44809723, 57.04935455, + // -44.62148285, 83.52678680 + VLOAD_32(v8, 0x40c62ff4, 0xc2a28694, 0xc29b7f46, 0x41761cdd, 0x41daf76d, + 0x42433e74, 0xc2a4602e, 0x40782b6c, 0xc208288d, 0xc2b8b19e, + 0x423bf046, 0x41e0c038, 0x4269cada, 0x4264328a, 0xc2327c66, + 0x42a70db7); + asm volatile("vfwnmacc.vf v8, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // 6.19335365, -285.94589233, -77.74858093, + // -1801.05395508, 27.37081337, 668.00445557, -82.18785095, + // 4607.11083984, -34.03960037, -3619.81323242, 46.98464203, + // -3323.79687500, 58.44809723, -3439.63916016, -44.62148285, + // -4421.87060547 + VCMP_U32(7, v8, 0x40c62ff4, 0xc38ef913, 0xc29b7f46, 0xc4e121ba, 0x41daf76d, + 0x44270049, 0xc2a4602e, 0x458ff8e3, 0xc208288d, 0xc5623d03, + 0x423bf046, 0xc54fbcc0, 0x4269cada, 0xc556fa3a, 0xc2327c66, + 0xc58a2ef7); + + VSET(16, e32, m4); + double dscalar_32; + // 415907.75000000, 16644.92773438, -320087.15625000, + // -560497.81250000, 51200.66406250, 175961.67187500, + // -62272.61328125, -40134.65234375, 67972.27343750, + // 832511.06250000, -279323.15625000, -48243.37500000, + // 685093.43750000, 272952.25000000, 518086.00000000, + // -349626.18750000 + VLOAD_32(v8, 0x48cb1478, 0x468209db, 0xc89c4ae5, 0xc908d71d, 0x474800aa, + 0x482bd66b, 0xc773409d, 0xc71cc6a7, 0x4784c223, 0x494b3ff1, + 0xc8886365, 0xc73c7360, 0x49274257, 0x48854708, 0x48fcf8c0, + 0xc8aab746); + // -648299.93750000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc91e46bf); + VLOAD_8(v0, 0xAA, 0xAA); + // -3761.4446916116867214, 251037.4171459318604320, + // -832277.7590174797223881, -817938.7112226528115571, + // -640813.8540152770001441, -87111.6097838475834578, + // -748981.3983645441476256, 259451.7965630525723100, + // -469164.5467169298790395, -204901.8221613015048206, + // 97767.4262132318690419, -208046.9794710964197293, + // -303699.6372622016351670, -710697.5104083393234760, + // -907884.7086961114546284, -326406.2730544115183875 + VLOAD_64(v16, 0xc0ad62e3ae9e7200, 0x410ea4eb56509b38, 0xc129662b849df069, + 0xc128f6256c256024, 0xc1238e5bb5417d8a, 0xc0f54479c1acb530, + 0xc126db6acbf67002, 0x410fabde5f5c7320, 0xc11ca2b22fd69018, + 0xc109032e93c94df0, 0x40f7de76d1c4f740, 0xc1096577d5f4f134, + 0xc112894e8c8e766c, 0xc125b05305543dea, 0xc12bb4d96ada377b, + 0xc113ec19179b935e); + asm volatile("vfwnmacc.vf v16, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // -3761.4446916116867214, 10790654572.4701824188232422, + // -832277.7590174797223881, -363369878873.9254760742187500, + // -640813.8540152770001441, 114076027990.5677947998046875, + // -748981.3983645441476256, -26019552057.8339157104492188, + // -469164.5467169298790395, 539717074688.6307373046875000, + // 97767.4262132318690419, -31275968950.3095932006835938, + // -303699.6372622016351670, 176955637312.9947814941406250, + // -907884.7086961114546284, -226662309098.3402404785156250 + VCMP_U64(8, v16, 0xc0ad62e3ae9e7200, 0x420419629363c2ef, 0xc129662b849df069, + 0xc25526a215567b3b, 0xc1238e5bb5417d8a, 0x423a8f760c56915b, + 0xc126db6acbf67002, 0xc2183b8b6ce755ee, 0xc11ca2b22fd69018, + 0x425f6a693fc0285e, 0x40f7de76d1c4f740, 0xc21d20c5a6d93d06, + 0xc112894e8c8e766c, 0x424499b05f207f55, 0xc12bb4d96ada377b, + 0xc24a63102e752b8d); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmsac.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmsac.c new file mode 100644 index 000000000..9bff05119 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmsac.c @@ -0,0 +1,347 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -27.1719, 16.3438, -76.1250, 73.7500, 39.2500, 32.8438, + // -48.0312, -62.9062, -52.3125, 50.8750, -32.1562, -86.3750, + // -42.7812, 97.2500, -83.6250, 46.6250 + VLOAD_16(v4, 0xcecb, 0x4c16, 0xd4c2, 0x549c, 0x50e8, 0x501b, 0xd201, 0xd3dd, + 0xd28a, 0x525c, 0xd005, 0xd566, 0xd159, 0x5614, 0xd53a, 0x51d4); + // -18.1719, -46.5312, -72.7500, + // -78.0625, 13.7344, 6.3164, 19.1250, 23.3125, 72.4375, + // -53.2812, -16.3438, -95.0625, -96.2500, 10.4141, + // -44.4688, 42.5938 + VLOAD_16(v12, 0xcc8b, 0xd1d1, 0xd48c, 0xd4e1, 0x4ade, 0x4651, 0x4cc8, 0x4dd4, + 0x5487, 0xd2a9, 0xcc16, 0xd5f1, 0xd604, 0x4935, 0xd18f, 0x5153); + // 69.72727966, 14.41778183, + // -64.82620239, 5.66590357, 73.33881378, + // -23.97786140, 94.91672516, 17.38204765, -39.07393646, + // -50.71182251, -11.98221493, -36.07648849, + // -86.86090088, 55.96418381, 61.43484116, -88.02533722 + VLOAD_32(v8, 0x428b745e, 0x4166af3c, 0xc281a704, 0x40b54f15, 0x4292ad79, + 0xc1bfd2a9, 0x42bdd55d, 0x418b0e6f, 0xc21c4bb6, 0xc24ad8e8, + 0xc13fb727, 0xc2104e53, 0xc2adb8c8, 0x425fdb53, 0x4275bd47, + 0xc2b00cf9); + asm volatile("vfwnmsac.vv v8, v4, v12"); + // -424.03662109, 774.91290283, -5602.91992188, 5762.77539062, + // -465.73541260, -231.43232727, 1013.51440430, 1483.88403320, + // 3750.31274414, 2659.97167969, -537.53594971, -8247.09960938, + // -4204.55615234, -956.80340576, -3657.26440430, -2073.95898438 + VCMP_U32(1, v8, 0xc3d404b0, 0x4441ba6d, 0xc5af175c, 0x45b41634, 0xc3e8de22, + 0xc3676ead, 0x447d60ec, 0x44b97c4a, 0x456a6501, 0x45263f8c, + 0xc406624d, 0xc600dc66, 0xc5836473, 0xc46f336b, 0xc564943b, + 0xc5019f58); + + VSET(16, e32, m4); + // 76109.13281250, 56176.41406250, -69127.14843750, + // -80327.49218750, 42920.59375000, -22857.18164062, + // -74227.70312500, -2650.23828125, 34254.71093750, + // -45853.78125000, 16339.80859375, + // -48032.71875000, 49.54582977, -47754.19921875, + // -95663.35156250, 82512.11718750 + VLOAD_32(v8, 0x4794a691, 0x475b706a, 0xc7870393, 0xc79ce3bf, 0x4727a898, + 0xc6b2925d, 0xc790f9da, 0xc525a3d0, 0x4705ceb6, 0xc7331dc8, + 0x467f4f3c, 0xc73ba0b8, 0x42462eee, 0xc73a8a33, 0xc7bad7ad, + 0x47a1280f); + // -36622.54296875, -60900.32421875, -36611.69921875, + // -74411.05468750, -25865.60937500, -67159.76562500, + // 6145.43457031, -31624.23242188, -69962.68750000, 468.94165039, + // 10443.93554688, -6054.45410156, -26090.46093750, + // 83534.57031250, 49878.42968750, -62082.53125000 + VLOAD_32(v24, 0xc70f0e8b, 0xc76de453, 0xc70f03b3, 0xc7915587, 0xc6ca1338, + 0xc7832be2, 0x45c00b7a, 0xc6f71077, 0xc788a558, 0x43ea7888, + 0x46232fbe, 0xc5bd33a2, 0xc6cbd4ec, 0x47a32749, 0x4742d66e, + 0xc7728288); + // 69521.3925020728202071, 98263.6759213360201102, + // -97991.2678309752518544, -63510.9471883209771477, + // 65329.9928102507547010, -34993.7523106171429390, + // -15831.2510480509663466, -3510.3868967669695849, + // 47068.9415519913600292, -19802.3942476644588169, + // 25915.8242703938303748, 82619.8738822988234460, + // 36865.7501246419560630, 41236.4660055586136878, + // -5735.0030344506667461, 97965.1847665070963558 + VLOAD_64(v16, 0x40f0f91647b040e6, 0x40f7fd7ad092e40e, 0xc0f7ec74490921f9, + 0xc0ef02de4f5de1b8, 0x40efe63fc51a00c4, 0xc0e1163812edb722, + 0xc0ceeba02257b050, 0xc0ab6cc617554220, 0x40e6fb9e2131a44c, + 0xc0d356993b5a8e58, 0x40d94ef4c0d89c24, 0x40f42bbdfb6c0160, + 0x40e20038010564a4, 0x40e4228ee9847d40, 0xc0b66700c6dda260, + 0x40f7ead2f4cdb996); + asm volatile("vfwnmsac.vv v16, v8, v24"); + // 2787379508.1325840950012207, 3421260093.5289182662963867, + // -2530960357.7114648818969727, -5977316925.0209798812866211, + // 1110232642.0733766555786133, -1535117955.5847384929656982, + // 456145661.6082201600074768, -83815261.7664972990751266, + // 2396598705.6646966934204102, 21482945.4617780297994614, + // -170625991.9771288335323334, -290729271.1712532043457031, + // 1329539.2864317980129272, 3989167748.8218097686767578, + // 4771532019.5777149200439453, 5122659058.9813976287841797 + VCMP_U64(2, v16, 0x41e4c48126843e21, 0x41e97d8927b0ece6, 0xc1e2db6c7cb6c452, + 0xc1f64469e3d055ef, 0x41d08b339084b234, 0xc1d6e002a0e56c5b, + 0x41bb303afd9bb451, 0xc193fbad7710e4ab, 0x41e1db2636354532, + 0x41747cdc1763715c, 0xc1a457178ff44a3b, 0xc1b1542d372bd740, + 0x4134498349539825, 0x41edb8bbd09a4c44, 0x41f1c67ccf393e52, + 0x41f315592f2fb3ce); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -27.1719, 16.3438, -76.1250, 73.7500, 39.2500, 32.8438, + // -48.0312, -62.9062, -52.3125, 50.8750, -32.1562, -86.3750, + // -42.7812, 97.2500, -83.6250, 46.6250 + VLOAD_16(v4, 0xcecb, 0x4c16, 0xd4c2, 0x549c, 0x50e8, 0x501b, 0xd201, 0xd3dd, + 0xd28a, 0x525c, 0xd005, 0xd566, 0xd159, 0x5614, 0xd53a, 0x51d4); + // -18.1719, -46.5312, -72.7500, + // -78.0625, 13.7344, 6.3164, 19.1250, 23.3125, 72.4375, + // -53.2812, -16.3438, -95.0625, -96.2500, 10.4141, + // -44.4688, 42.5938 + VLOAD_16(v12, 0xcc8b, 0xd1d1, 0xd48c, 0xd4e1, 0x4ade, 0x4651, 0x4cc8, 0x4dd4, + 0x5487, 0xd2a9, 0xcc16, 0xd5f1, 0xd604, 0x4935, 0xd18f, 0x5153); + VLOAD_8(v0, 0xAA, 0xAA); + // 69.72727966, 14.41778183, + // -64.82620239, 5.66590357, 73.33881378, + // -23.97786140, 94.91672516, 17.38204765, -39.07393646, + // -50.71182251, -11.98221493, -36.07648849, + // -86.86090088, 55.96418381, 61.43484116, -88.02533722 + VLOAD_32(v8, 0x428b745e, 0x4166af3c, 0xc281a704, 0x40b54f15, 0x4292ad79, + 0xc1bfd2a9, 0x42bdd55d, 0x418b0e6f, 0xc21c4bb6, 0xc24ad8e8, + 0xc13fb727, 0xc2104e53, 0xc2adb8c8, 0x425fdb53, 0x4275bd47, + 0xc2b00cf9); + asm volatile("vfwnmsac.vv v8, v4, v12, v0.t"); + // 69.72727966, 774.91290283, -64.82620239, + // 5762.77539062, 73.33881378, -231.43232727, 94.91672516, + // 1483.88403320, -39.07393646, 2659.97167969, -11.98221493, + // -8247.09960938, -86.86090088, -956.80340576, 61.43484116, + // -2073.95898438 + VCMP_U32(3, v8, 0x428b745e, 0x4441ba6d, 0xc281a704, 0x45b41634, 0x4292ad79, + 0xc3676ead, 0x42bdd55d, 0x44b97c4a, 0xc21c4bb6, 0x45263f8c, + 0xc13fb727, 0xc600dc66, 0xc2adb8c8, 0xc46f336b, 0x4275bd47, + 0xc5019f58); + + VSET(16, e32, m4); + // 76109.13281250, 56176.41406250, -69127.14843750, + // -80327.49218750, 42920.59375000, -22857.18164062, + // -74227.70312500, -2650.23828125, 34254.71093750, + // -45853.78125000, 16339.80859375, + // -48032.71875000, 49.54582977, -47754.19921875, + // -95663.35156250, 82512.11718750 + VLOAD_32(v8, 0x4794a691, 0x475b706a, 0xc7870393, 0xc79ce3bf, 0x4727a898, + 0xc6b2925d, 0xc790f9da, 0xc525a3d0, 0x4705ceb6, 0xc7331dc8, + 0x467f4f3c, 0xc73ba0b8, 0x42462eee, 0xc73a8a33, 0xc7bad7ad, + 0x47a1280f); + // -36622.54296875, -60900.32421875, -36611.69921875, + // -74411.05468750, -25865.60937500, -67159.76562500, + // 6145.43457031, -31624.23242188, -69962.68750000, 468.94165039, + // 10443.93554688, -6054.45410156, -26090.46093750, + // 83534.57031250, 49878.42968750, -62082.53125000 + VLOAD_32(v24, 0xc70f0e8b, 0xc76de453, 0xc70f03b3, 0xc7915587, 0xc6ca1338, + 0xc7832be2, 0x45c00b7a, 0xc6f71077, 0xc788a558, 0x43ea7888, + 0x46232fbe, 0xc5bd33a2, 0xc6cbd4ec, 0x47a32749, 0x4742d66e, + 0xc7728288); + VLOAD_8(v0, 0xAA, 0xAA); + // 69521.3925020728202071, 98263.6759213360201102, + // -97991.2678309752518544, -63510.9471883209771477, + // 65329.9928102507547010, -34993.7523106171429390, + // -15831.2510480509663466, -3510.3868967669695849, + // 47068.9415519913600292, -19802.3942476644588169, + // 25915.8242703938303748, 82619.8738822988234460, + // 36865.7501246419560630, 41236.4660055586136878, + // -5735.0030344506667461, 97965.1847665070963558 + VLOAD_64(v16, 0x40f0f91647b040e6, 0x40f7fd7ad092e40e, 0xc0f7ec74490921f9, + 0xc0ef02de4f5de1b8, 0x40efe63fc51a00c4, 0xc0e1163812edb722, + 0xc0ceeba02257b050, 0xc0ab6cc617554220, 0x40e6fb9e2131a44c, + 0xc0d356993b5a8e58, 0x40d94ef4c0d89c24, 0x40f42bbdfb6c0160, + 0x40e20038010564a4, 0x40e4228ee9847d40, 0xc0b66700c6dda260, + 0x40f7ead2f4cdb996); + asm volatile("vfwnmsac.vv v16, v8, v24, v0.t"); + // 69521.3925020728202071, 3421260093.5289182662963867, + // -97991.2678309752518544, -5977316925.0209798812866211, + // 65329.9928102507547010, -1535117955.5847384929656982, + // -15831.2510480509663466, -83815261.7664972990751266, + // 47068.9415519913600292, 21482945.4617780297994614, + // 25915.8242703938303748, -290729271.1712532043457031, + // 36865.7501246419560630, 3989167748.8218097686767578, + // -5735.0030344506667461, 5122659058.9813976287841797 + VCMP_U64(4, v16, 0x40f0f91647b040e6, 0x41e97d8927b0ece6, 0xc0f7ec74490921f9, + 0xc1f64469e3d055ef, 0x40efe63fc51a00c4, 0xc1d6e002a0e56c5b, + 0xc0ceeba02257b050, 0xc193fbad7710e4ab, 0x40e6fb9e2131a44c, + 0x41747cdc1763715c, 0x40d94ef4c0d89c24, 0xc1b1542d372bd740, + 0x40e20038010564a4, 0x41edb8bbd09a4c44, 0xc0b66700c6dda260, + 0x41f315592f2fb3ce); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 15.1797, -57.5312, -39.9688, 95.8125, 22.3906, + // -30.2344, 61.3438, 67.1250, -80.6250, -20.6875, -34.1250, + // -7.6758, -25.1562, 64.8125, 28.0156, -51.9688 + VLOAD_16(v4, 0x4b97, 0xd331, 0xd0ff, 0x55fd, 0x4d99, 0xcf8f, 0x53ab, 0x5432, + 0xd50a, 0xcd2c, 0xd044, 0xc7ad, 0xce4a, 0x540d, 0x4f01, 0xd27f); + // -27.7344 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xceef); + // -90.47762299, -89.97399139, -34.20752716, + // -93.73470306, 81.75606537, 80.60296631, 73.45400238, + // -61.63031769, -55.39078903, 21.99703789, 29.49930191, + // -64.56553650, -17.54965782, 84.51310730, -88.96613312, + // -6.75917578 + VLOAD_32(v8, 0xc2b4f48b, 0xc2b3f2af, 0xc208d482, 0xc2bb782b, 0x42a3831b, + 0x42a134b8, 0x4292e873, 0xc2768572, 0xc25d902b, 0x41aff9ef, + 0x41ebfe92, 0xc281218e, 0xc18c65b3, 0x42a906b6, 0xc2b1eea9, + 0xc0d84b2b); + asm volatile("vfwnmsac.vf v8, %[A], v4" ::[A] "f"(dscalar_16)); + // 330.52151489, -1685.56726074, -1142.71582031, 2563.56518555, + // 702.74603271, -757.92852783, 1774.78454590, 1800.03955078, + // -2291.47485352, -551.75787354, -916.93621826, -277.44854736, + // -715.24255371, 1882.04724121, 688.02972412, -1448.07995605 + VCMP_U32(5, v8, 0x43a542c1, 0xc4d2b227, 0xc48ed6e8, 0x4520390b, 0x442fafbf, + 0xc43d7b6d, 0x44ddd91b, 0x44e10144, 0xc50f3799, 0xc409f081, + 0xc4653beb, 0xc38ab96a, 0xc432cf86, 0x44eb4183, 0x442c01e7, + 0xc4b5028f); + + VSET(16, e32, m4); + double dscalar_32; + // 467373.87500000, -160965.29687500, 883060.25000000, + // -737665.37500000, -482502.81250000, -983579.31250000, + // -407525.09375000, 564889.31250000, -121145.03125000, + // 744798.75000000, 160985.04687500, -9122.68847656, + // -708214.37500000, 763142.93750000, -340832.59375000, + // -663023.75000000 + VLOAD_32(v8, 0x48e435bc, 0xc81d3153, 0x49579744, 0xc9341816, 0xc8eb98da, + 0xc97021b5, 0xc8c6fca3, 0x4909e995, 0xc7ec9c84, 0x4935d5ec, + 0x481d3643, 0xc60e8ac1, 0xc92ce766, 0x493a506f, 0xc8a66c13, + 0xc921defc); + // 235169.78125000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x4865a872); + // -460724.6105727200629190, -944938.6498861069558188, + // -303510.4811713555827737, -748025.6652074699522927, + // 387702.0000469267833978, -894167.6638924945145845, + // 98379.0701996718998998, -950753.1128427713410929, + // -749333.7338243273552507, 898522.0366696736309677, + // -388606.5700500296661630, 47697.1169114386430010, + // -665347.3327810273040086, 976438.6193965608254075, + // -498588.0437998892739415, 793291.0511387982405722 + VLOAD_64(v16, 0xc11c1ed27139f9a2, 0xc12cd6554cbddf2f, 0xc1128659ecb82f10, + 0xc126d3f3549612d1, 0x4117a9d8000c4d34, 0xc12b49af53e9b790, + 0x40f804b11f89b0f0, 0xc12d03c239c68719, 0xc126de2b77b7d27e, + 0x412b6bb412c65e12, 0xc117b7fa47bb31ea, 0x40e74a23bdbd0eb0, + 0xc1244e06aa62465a, 0x412dcc6d3d218bc8, 0xc11e6e702cd9e0d0, + 0x412835961a2edd54); + asm volatile("vfwnmsac.vf v16, %[A], v8" ::[A] "f"(dscalar_32)); + // -109912672670.3254089355468750, 37853228716.2851715087890625, + // -207669389333.5514831542968750, 173475856848.7839965820312500, + // 113470468570.1348114013671875, 231307237594.9865112304687500, + // 95837685530.1434478759765625, -132845846804.2007293701171875, + // 28488901164.8530883789062500, -175153260590.7367553710937500, + // -37859206864.6847991943359375, 2145428350.5620102882385254, + // 166549954299.5226745605468750, -179467181235.7380371093750000, + // 80153027927.0138244628906250, 155923943542.1058349609375000 + VCMP_U64(6, v16, 0xc239974e499e534e, 0x4221a074dd589202, 0xc2482d07b40ac697, + 0x424431fbc0e8645a, 0x423a6b5df1da2283, 0x424aed7e2c6d7e46, + 0x4236505f071a24b9, 0xc23eee3ac1143363, 0x421a884888b36990, + 0xc24463f954175e4e, 0xc221a12b4da15e9e, 0x41dff828dfa3f7fa, + 0x424363934f7dc2e7, 0xc244e489ee59de78, 0x4232a97e2557038a, + 0x424226e5483b0d8c); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 15.1797, -57.5312, -39.9688, 95.8125, 22.3906, + // -30.2344, 61.3438, 67.1250, -80.6250, -20.6875, -34.1250, + // -7.6758, -25.1562, 64.8125, 28.0156, -51.9688 + VLOAD_16(v4, 0x4b97, 0xd331, 0xd0ff, 0x55fd, 0x4d99, 0xcf8f, 0x53ab, 0x5432, + 0xd50a, 0xcd2c, 0xd044, 0xc7ad, 0xce4a, 0x540d, 0x4f01, 0xd27f); + // -27.7344 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xceef); + VLOAD_8(v0, 0xAA, 0xAA); + // -90.47762299, -89.97399139, -34.20752716, + // -93.73470306, 81.75606537, 80.60296631, 73.45400238, + // -61.63031769, -55.39078903, 21.99703789, 29.49930191, + // -64.56553650, -17.54965782, 84.51310730, -88.96613312, + // -6.75917578 + VLOAD_32(v8, 0xc2b4f48b, 0xc2b3f2af, 0xc208d482, 0xc2bb782b, 0x42a3831b, + 0x42a134b8, 0x4292e873, 0xc2768572, 0xc25d902b, 0x41aff9ef, + 0x41ebfe92, 0xc281218e, 0xc18c65b3, 0x42a906b6, 0xc2b1eea9, + 0xc0d84b2b); + asm volatile("vfwnmsac.vf v8, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // -90.47762299, -1685.56726074, -34.20752716, + // 2563.56518555, 81.75606537, -757.92852783, 73.45400238, + // 1800.03955078, -55.39078903, -551.75787354, 29.49930191, + // -277.44854736, -17.54965782, 1882.04724121, -88.96613312, + // -1448.07995605 + VCMP_U32(7, v8, 0xc2b4f48b, 0xc4d2b227, 0xc208d482, 0x4520390b, 0x42a3831b, + 0xc43d7b6d, 0x4292e873, 0x44e10144, 0xc25d902b, 0xc409f081, + 0x41ebfe92, 0xc38ab96a, 0xc18c65b3, 0x44eb4183, 0xc2b1eea9, + 0xc4b5028f); + + VSET(16, e32, m4); + double dscalar_32; + // 467373.87500000, -160965.29687500, 883060.25000000, + // -737665.37500000, -482502.81250000, -983579.31250000, + // -407525.09375000, 564889.31250000, -121145.03125000, + // 744798.75000000, 160985.04687500, -9122.68847656, + // -708214.37500000, 763142.93750000, -340832.59375000, + // -663023.75000000 + VLOAD_32(v8, 0x48e435bc, 0xc81d3153, 0x49579744, 0xc9341816, 0xc8eb98da, + 0xc97021b5, 0xc8c6fca3, 0x4909e995, 0xc7ec9c84, 0x4935d5ec, + 0x481d3643, 0xc60e8ac1, 0xc92ce766, 0x493a506f, 0xc8a66c13, + 0xc921defc); + // 235169.78125000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x4865a872); + VLOAD_8(v0, 0xAA, 0xAA); + // -460724.6105727200629190, -944938.6498861069558188, + // -303510.4811713555827737, -748025.6652074699522927, + // 387702.0000469267833978, -894167.6638924945145845, + // 98379.0701996718998998, -950753.1128427713410929, + // -749333.7338243273552507, 898522.0366696736309677, + // -388606.5700500296661630, 47697.1169114386430010, + // -665347.3327810273040086, 976438.6193965608254075, + // -498588.0437998892739415, 793291.0511387982405722 + VLOAD_64(v16, 0xc11c1ed27139f9a2, 0xc12cd6554cbddf2f, 0xc1128659ecb82f10, + 0xc126d3f3549612d1, 0x4117a9d8000c4d34, 0xc12b49af53e9b790, + 0x40f804b11f89b0f0, 0xc12d03c239c68719, 0xc126de2b77b7d27e, + 0x412b6bb412c65e12, 0xc117b7fa47bb31ea, 0x40e74a23bdbd0eb0, + 0xc1244e06aa62465a, 0x412dcc6d3d218bc8, 0xc11e6e702cd9e0d0, + 0x412835961a2edd54); + asm volatile("vfwnmsac.vf v16, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // -460724.6105727200629190, 37853228716.2851715087890625, + // -303510.4811713555827737, 173475856848.7839965820312500, + // 387702.0000469267833978, 231307237594.9865112304687500, + // 98379.0701996718998998, -132845846804.2007293701171875, + // -749333.7338243273552507, -175153260590.7367553710937500, + // -388606.5700500296661630, 2145428350.5620102882385254, + // -665347.3327810273040086, -179467181235.7380371093750000, + // -498588.0437998892739415, 155923943542.1058349609375000 + VCMP_U64(8, v16, 0xc11c1ed27139f9a2, 0x4221a074dd589202, 0xc1128659ecb82f10, + 0x424431fbc0e8645a, 0x4117a9d8000c4d34, 0x424aed7e2c6d7e46, + 0x40f804b11f89b0f0, 0xc23eee3ac1143363, 0xc126de2b77b7d27e, + 0xc24463f954175e4e, 0xc117b7fa47bb31ea, 0x41dff828dfa3f7fa, + 0xc1244e06aa62465a, 0xc244e489ee59de78, 0xc11e6e702cd9e0d0, + 0x424226e5483b0d8c); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredosum.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredosum.c new file mode 100644 index 000000000..6e337252c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredosum.c @@ -0,0 +1,268 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Xiaorui Yin +// Date: 2022/05/03 + +#include "float_macros.h" +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(1, e32, m4); + VLOAD_32(v4, 0x3F800000); + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfwredosum.vs v8, v12, v4"); + VCMP_U32(1, v8, 0x42920000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(1, e64, m8); + VLOAD_64(v8, 0x3FF0000000000000); + VSET(16, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredosum.vs v16, v24, v8"); + VCMP_U64(2, v16, 0x4052400000000000); + + VSET(1, e32, m4); + VLOAD_32(v4, 0x3F800000); + VSET(2, e8, m1); + VLOAD_8(v0, 0xaa, 0x55); + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfwredosum.vs v8, v12, v4, v0.t"); + VCMP_U32(3, v8, 0x42140000); + + VSET(1, e64, m8); + VLOAD_64(v8, 0x3FF0000000000000); + VSET(2, e8, m1); + VLOAD_8(v0, 0xaa, 0x55); + VSET(16, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredosum.vs v16, v24, v8, v0.t"); + VCMP_U64(4, v16, 0x4042800000000000); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredosum.vs v8, v12, v4"); + VCMP_U32(5, v8, 0x42920000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(16, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredosum.vs v16, v24, v8"); + VCMP_U64(6, v16, 0x4052400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(1, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredosum.vs v16, v24, v8"); + VCMP_U64(7, v16, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(3, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredosum.vs v16, v24, v8"); + VCMP_U64(8, v16, 0x401C000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(7, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredosum.vs v16, v24, v8"); + VCMP_U64(9, v16, 0x403d000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(15, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredosum.vs v16, v24, v8"); + VCMP_U64(10, v16, 0x4050400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(7, e16, m2); + VLOAD_8(v0, 0x00, 0xff); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredosum.vs v8, v12, v4, v0.t"); + VCMP_U32(11, v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(1, e32, m4); + VLOAD_8(v0, 0xff, 0x00); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredosum.vs v16, v24, v8, v0.t"); + VCMP_U64(12, v16, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredusum.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredusum.c new file mode 100644 index 000000000..e6eeb6405 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredusum.c @@ -0,0 +1,272 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Xiaorui Yin +// Date: 2022/05/03 + +#include "float_macros.h" +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + // WARNING: setting vl == 1 is mandatory here since + // these variables are initialized on the stack, which is + // immediately before the UART. + // Loading more values would load from the UART + // addr space!!!! + VSET(1, e32, m4); + VLOAD_32(v4, 0x3F800000); + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfwredsum.vs v8, v12, v4"); + VCMP_U32(1, v8, 0x42920000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(1, e64, m8); + VLOAD_64(v8, 0x3FF0000000000000); + VSET(16, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredsum.vs v16, v24, v8"); + VCMP_U64(2, v16, 0x4052400000000000); + + VSET(1, e32, m4); + VLOAD_32(v4, 0x3F800000); + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfwredsum.vs v8, v12, v4, v0.t"); + VCMP_U32(3, v8, 0x42140000); + + VSET(1, e64, m8); + VLOAD_64(v8, 0x3FF0000000000000); + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v16, 0x3FF0000000000000); + asm volatile("vfwredsum.vs v8, v24, v16, v0.t"); + VCMP_U64(4, v8, 0x4042800000000000); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredsum.vs v8, v12, v4"); + VCMP_U32(5, v8, 0x42920000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(16, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredsum.vs v16, v24, v8"); + VCMP_U64(6, v16, 0x4052400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(1, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredsum.vs v16, v24, v8"); + VCMP_U64(7, v16, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(3, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredsum.vs v16, v24, v8"); + VCMP_U64(8, v16, 0x401C000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(7, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredsum.vs v16, v24, v8"); + VCMP_U64(9, v16, 0x403d000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(15, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredsum.vs v16, v24, v8"); + VCMP_U64(10, v16, 0x4050400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(7, e16, m2); + VLOAD_8(v0, 0x00, 0xff); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredsum.vs v8, v12, v4, v0.t"); + VCMP_U32(11, v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(1, e32, m4); + VLOAD_8(v0, 0xff, 0x00); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredsum.vs v16, v24, v8, v0.t"); + VCMP_U64(12, v16, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwsub.c new file mode 100644 index 000000000..e744d9ced --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwsub.c @@ -0,0 +1,527 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -15.5625, 95.7500, -42.4375, 30.7188, -50.7500, -90.2500, + // -95.5000, 29.5938, -41.4062, -94.0000, 34.3438, + // -69.5625, 31.5625, -75.0625, 46.2500, -63.6875 + VLOAD_16(v2, 0xcbc8, 0x55fc, 0xd14e, 0x4fae, 0xd258, 0xd5a4, 0xd5f8, 0x4f66, + 0xd12d, 0xd5e0, 0x504b, 0xd459, 0x4fe4, 0xd4b1, 0x51c8, 0xd3f6); + // 57.2500, 43.2812, -49.4062, -53.5625, -54.7812, + // -12.1406, 92.1875, 67.1875, -19.7656, -41.2812, 98.0625, + // -41.9062, 10.1719, -84.6250, -7.1016, 62.8750 + VLOAD_16(v4, 0x5328, 0x5169, 0xd22d, 0xd2b2, 0xd2d9, 0xca12, 0x55c3, 0x5433, + 0xccf1, 0xd129, 0x5621, 0xd13d, 0x4916, 0xd54a, 0xc71a, 0x53dc); + asm volatile("vfwsub.vv v8, v2, v4"); + // -72.81250000, 52.46875000, 6.96875000, 84.28125000, 4.03125000, + // -78.10937500, -187.68750000, -37.59375000, -21.64062500, + // -52.71875000, -63.71875000, + // -27.65625000, 21.39062500, 9.56250000, 53.35156250, + // -126.56250000 + VCMP_U32(1, v8, 0xc291a000, 0x4251e000, 0x40df0000, 0x42a89000, 0x40810000, + 0xc29c3800, 0xc33bb000, 0xc2166000, 0xc1ad2000, 0xc252e000, + 0xc27ee000, 0xc1dd4000, 0x41ab2000, 0x41190000, 0x42556800, + 0xc2fd2000); + + VSET(16, e32, m4); + // 74632.77343750, -65636.60937500, 16165.84765625, + // -17815.85937500, -85604.03125000, -76754.03125000, + // 21778.01171875, -70512.52343750, 85301.90625000, + // -8385.11035156, 98258.05468750, -50421.53125000, + // 69842.53906250, -65219.96093750, -65266.08984375, + // -90740.60156250 + VLOAD_32(v4, 0x4791c463, 0xc780324e, 0x467c9764, 0xc68b2fb8, 0xc7a73204, + 0xc795e904, 0x46aa2406, 0xc789b843, 0x47a69af4, 0xc6030471, + 0x47bfe907, 0xc744f588, 0x47886945, 0xc77ec3f6, 0xc77ef217, + 0xc7b13a4d); + // 5391.72216797, -90760.36718750, -22961.19531250, + // 12708.62500000, 87107.59375000, 54867.48437500, + // 55424.39453125, -71436.00781250, -61505.46484375, + // 57701.78906250, -81581.38281250, 53319.19531250, + // -86229.57031250, 44376.69531250, 46809.38671875, + // -92887.27343750 + VLOAD_32(v8, 0x45a87dc7, 0xc7b1442f, 0xc6b36264, 0x46469280, 0x47aa21cc, + 0x4756537c, 0x47588065, 0xc78b8601, 0xc7704177, 0x476165ca, + 0xc79f56b1, 0x47504732, 0xc7a86ac9, 0x472d58b2, 0x4736d963, + 0xc7b56ba3); + asm volatile("vfwsub.vv v16, v4, v8"); + // 69241.0512695312500000, 25123.7578125000000000, + // 39127.0429687500000000, -30524.4843750000000000, + // -172711.6250000000000000, -131621.5156250000000000, + // -33646.3828125000000000, 923.4843750000000000, + // 146807.3710937500000000, -66086.8994140625000000, + // 179839.4375000000000000, -103740.7265625000000000, + // 156072.1093750000000000, -109596.6562500000000000, + // -112075.4765625000000000, 2146.6718750000000000 + VCMP_U64(2, v16, 0x40f0e790d2000000, 0x40d888f080000000, 0x40e31ae160000000, + 0xc0ddcf1f00000000, 0xc105153d00000000, 0xc100112c20000000, + 0xc0e06dcc40000000, 0x408cdbe000000000, 0x4101ebbaf8000000, + 0xc0f0226e64000000, 0x4105f3fb80000000, 0xc0f953cba0000000, + 0x41030d40e0000000, 0xc0fac1ca80000000, 0xc0fb5cb7a0000000, + 0x40a0c55800000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -15.5625, 95.7500, -42.4375, 30.7188, -50.7500, -90.2500, + // -95.5000, 29.5938, -41.4062, -94.0000, 34.3438, + // -69.5625, 31.5625, -75.0625, 46.2500, -63.6875 + VLOAD_16(v4, 0xcbc8, 0x55fc, 0xd14e, 0x4fae, 0xd258, 0xd5a4, 0xd5f8, 0x4f66, + 0xd12d, 0xd5e0, 0x504b, 0xd459, 0x4fe4, 0xd4b1, 0x51c8, 0xd3f6); + // 57.2500, 43.2812, -49.4062, -53.5625, -54.7812, + // -12.1406, 92.1875, 67.1875, -19.7656, -41.2812, 98.0625, + // -41.9062, 10.1719, -84.6250, -7.1016, 62.8750 + VLOAD_16(v8, 0x5328, 0x5169, 0xd22d, 0xd2b2, 0xd2d9, 0xca12, 0x55c3, 0x5433, + 0xccf1, 0xd129, 0x5621, 0xd13d, 0x4916, 0xd54a, 0xc71a, 0x53dc); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vfwsub.vv v12, v4, v8, v0.t"); + // 0.00000000, 52.46875000, 0.00000000, 84.28125000, + // 0.00000000, -78.10937500, 0.00000000, -37.59375000, + // 0.00000000, -52.71875000, 0.00000000, -27.65625000, + // 0.00000000, 9.56250000, 0.00000000, -126.56250000 + VCMP_U32(3, v12, 0x0, 0x4251e000, 0x0, 0x42a89000, 0x0, 0xc29c3800, 0x0, + 0xc2166000, 0x0, 0xc252e000, 0x0, 0xc1dd4000, 0x0, 0x41190000, 0x0, + 0xc2fd2000); + + VSET(16, e32, m4); + // 74632.77343750, -65636.60937500, 16165.84765625, + // -17815.85937500, -85604.03125000, -76754.03125000, + // 21778.01171875, -70512.52343750, 85301.90625000, + // -8385.11035156, 98258.05468750, -50421.53125000, + // 69842.53906250, -65219.96093750, -65266.08984375, + // -90740.60156250 + VLOAD_32(v8, 0x4791c463, 0xc780324e, 0x467c9764, 0xc68b2fb8, 0xc7a73204, + 0xc795e904, 0x46aa2406, 0xc789b843, 0x47a69af4, 0xc6030471, + 0x47bfe907, 0xc744f588, 0x47886945, 0xc77ec3f6, 0xc77ef217, + 0xc7b13a4d); + // 5391.72216797, -90760.36718750, -22961.19531250, + // 12708.62500000, 87107.59375000, 54867.48437500, + // 55424.39453125, -71436.00781250, -61505.46484375, + // 57701.78906250, -81581.38281250, 53319.19531250, + // -86229.57031250, 44376.69531250, 46809.38671875, + // -92887.27343750 + VLOAD_32(v16, 0x45a87dc7, 0xc7b1442f, 0xc6b36264, 0x46469280, 0x47aa21cc, + 0x4756537c, 0x47588065, 0xc78b8601, 0xc7704177, 0x476165ca, + 0xc79f56b1, 0x47504732, 0xc7a86ac9, 0x472d58b2, 0x4736d963, + 0xc7b56ba3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vfwsub.vv v24, v8, v16, v0.t"); + // 0.0000000000000000, 25123.7578125000000000, + // 0.0000000000000000, -30524.4843750000000000, + // 0.0000000000000000, -131621.5156250000000000, + // 0.0000000000000000, 923.4843750000000000, + // 0.0000000000000000, -66086.8994140625000000, + // 0.0000000000000000, -103740.7265625000000000, + // 0.0000000000000000, -109596.6562500000000000, + // 0.0000000000000000, 2146.6718750000000000 + VCMP_U64(4, v24, 0x0, 0x40d888f080000000, 0x0, 0xc0ddcf1f00000000, 0x0, + 0xc100112c20000000, 0x0, 0x408cdbe000000000, 0x0, 0xc0f0226e64000000, + 0x0, 0xc0f953cba0000000, 0x0, 0xc0fac1ca80000000, 0x0, + 0x40a0c55800000000); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 36.4375 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x508e); + // 69.8125, -37.3125, -77.2500, 32.7188, + // -83.0000, 76.3125, 14.9375, 72.5000, 39.6250, + // -61.2188, 36.3438, 93.5000, -87.1875, -6.9258, 25.1094, + // -96.8750 + VLOAD_16(v4, 0x545d, 0xd0aa, 0xd4d4, 0x5017, 0xd530, 0x54c5, 0x4b78, 0x5488, + 0x50f4, 0xd3a7, 0x508b, 0x55d8, 0xd573, 0xc6ed, 0x4e47, 0xd60e); + asm volatile("vfwsub.vf v8, v4, %[A]" ::[A] "f"(dscalar_16)); + // 33.37500000, -73.75000000, -113.68750000, -3.71875000, + // -119.43750000, 39.87500000, + // -21.50000000, 36.06250000, 3.18750000, -97.65625000, + // -0.09375000, 57.06250000, -123.62500000, -43.36328125, + // -11.32812500, -133.31250000 + VCMP_U32(5, v8, 0x42058000, 0xc2938000, 0xc2e36000, 0xc06e0000, 0xc2eee000, + 0x421f8000, 0xc1ac0000, 0x42104000, 0x404c0000, 0xc2c35000, + 0xbdc00000, 0x42644000, 0xc2f74000, 0xc22d7400, 0xc1354000, + 0xc3055000); + + VSET(16, e32, m4); + double dscalar_32; + // -138614.20312500 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc8075d8d); + // 473107.93750000, 161975.07812500, -173044.89062500, + // -322046.09375000, -485607.56250000, -613808.37500000, + // -182790.53125000, 121114.44531250, -958537.81250000, + // 295217.40625000, 281159.84375000, -735195.87500000, + // -783982.56250000, 420983.65625000, 954426.12500000, + // -297052.53125000 + VLOAD_32(v8, 0x48e7027e, 0x481e2dc5, 0xc828fd39, 0xc89d3fc3, 0xc8ed1cf2, + 0xc915db06, 0xc83281a2, 0x47ec8d39, 0xc96a049d, 0x4890262d, + 0x488948fb, 0xc9337dbe, 0xc93f66e9, 0x48cd8ef5, 0x496903a2, + 0xc8910b91); + asm volatile("vfwsub.vf v16, v8, %[A]" ::[A] "f"(dscalar_32)); + // 611722.1406250000000000, 300589.2812500000000000, + // -34430.6875000000000000, -183431.8906250000000000, + // -346993.3593750000000000, -475194.1718750000000000, + // -44176.3281250000000000, 259728.6484375000000000, + // -819923.6093750000000000, 433831.6093750000000000, + // 419774.0468750000000000, -596581.6718750000000000, + // -645368.3593750000000000, 559597.8593750000000000, + // 1093040.3281250000000000, -158438.3281250000000000 + VCMP_U64(6, v16, 0x4122ab1448000000, 0x411258b520000000, 0xc0e0cfd600000000, + 0xc106643f20000000, 0xc1152dc570000000, 0xc11d00e8b0000000, + 0xc0e5920a80000000, 0x410fb48530000000, 0xc12905a738000000, + 0x411a7a9e70000000, 0x41199ef830000000, 0xc12234cb58000000, + 0xc123b1f0b8000000, 0x412113dbb8000000, 0x4130adb054000000, + 0xc1035732a0000000); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 36.4375 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x508e); + // 69.8125, -37.3125, -77.2500, 32.7188, + // -83.0000, 76.3125, 14.9375, 72.5000, 39.6250, + // -61.2188, 36.3438, 93.5000, -87.1875, -6.9258, 25.1094, + // -96.8750 + VLOAD_16(v4, 0x545d, 0xd0aa, 0xd4d4, 0x5017, 0xd530, 0x54c5, 0x4b78, 0x5488, + 0x50f4, 0xd3a7, 0x508b, 0x55d8, 0xd573, 0xc6ed, 0x4e47, 0xd60e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwsub.vf v8, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.00000000, -73.75000000, 0.00000000, -3.71875000, + // 0.00000000, 39.87500000, 0.00000000, 36.06250000, + // 0.00000000, -97.65625000, 0.00000000, 57.06250000, + // 0.00000000, -43.36328125, 0.00000000, -133.31250000 + VCMP_U32(7, v8, 0x0, 0xc2938000, 0x0, 0xc06e0000, 0x0, 0x421f8000, 0x0, + 0x42104000, 0x0, 0xc2c35000, 0x0, 0x42644000, 0x0, 0xc22d7400, 0x0, + 0xc3055000); + + VSET(16, e32, m4); + double dscalar_32; + // -138614.20312500 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc8075d8d); + // 473107.93750000, 161975.07812500, -173044.89062500, + // -322046.09375000, -485607.56250000, -613808.37500000, + // -182790.53125000, 121114.44531250, -958537.81250000, + // 295217.40625000, 281159.84375000, -735195.87500000, + // -783982.56250000, 420983.65625000, 954426.12500000, + // -297052.53125000 + VLOAD_32(v8, 0x48e7027e, 0x481e2dc5, 0xc828fd39, 0xc89d3fc3, 0xc8ed1cf2, + 0xc915db06, 0xc83281a2, 0x47ec8d39, 0xc96a049d, 0x4890262d, + 0x488948fb, 0xc9337dbe, 0xc93f66e9, 0x48cd8ef5, 0x496903a2, + 0xc8910b91); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwsub.vf v16, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.0000000000000000, 300589.2812500000000000, + // 0.0000000000000000, -183431.8906250000000000, + // 0.0000000000000000, -475194.1718750000000000, + // 0.0000000000000000, 259728.6484375000000000, + // 0.0000000000000000, 433831.6093750000000000, + // 0.0000000000000000, -596581.6718750000000000, + // 0.0000000000000000, 559597.8593750000000000, + // 0.0000000000000000, -158438.3281250000000000 + VCMP_U64(8, v16, 0x0, 0x411258b520000000, 0x0, 0xc106643f20000000, 0x0, + 0xc11d00e8b0000000, 0x0, 0x410fb48530000000, 0x0, 0x411a7a9e70000000, + 0x0, 0xc12234cb58000000, 0x0, 0x412113dbb8000000, 0x0, + 0xc1035732a0000000); +}; +// Simple random test with similar values +void TEST_CASE5(void) { + VSET(16, e16, m2); + // -92.15529633, 27.66998672, + // -5.68499708, 78.95133209, 57.52299500, 15.45270920, 50.26883316, + // 46.63587189, 71.16806793, -80.68485260, + // -22.34193420, 40.17027283, 93.54611969, 25.86016083, 41.82838821, + // 82.50254822 + VLOAD_32(v4, 0xc2b84f83, 0x41dd5c22, 0xc0b5eb7f, 0x429de715, 0x4266178c, + 0x41773e4c, 0x42491349, 0x423a8b22, 0x428e560d, 0xc2a15ea5, + 0xc1b2bc48, 0x4220ae5c, 0x42bb179d, 0x41cee19c, 0x42275045, + 0x42a5014e); + // -72.5625, -83.4375, 28.8281, 33.5938, + // -85.7500, 67.5000, 91.0625, -91.8750, -9.2578, -64.2500, + // -58.6250, 50.3438, -70.5000, 36.6250, 5.7930, 86.6875 + VLOAD_16(v8, 0xd489, 0xd537, 0x4f35, 0x5033, 0xd55c, 0x5438, 0x55b1, 0xd5be, + 0xc8a1, 0xd404, 0xd354, 0x524b, 0xd468, 0x5094, 0x45cb, 0x556b); + asm volatile("vfwsub.wv v12, v4, v8"); + // -19.59279633, 111.10748291, -34.51312256, 45.35758209, + // 143.27299500, -52.04729080, -40.79366684, + // 138.51086426, 80.42588043, -16.43485260, 36.28306580, + // -10.17347717, 164.04611206, -10.76483917, 36.03541946, + // -4.18495178 + VCMP_U32(9, v12, 0xc19cbe0c, 0x42de3708, 0xc20a0d70, 0x42356e2a, 0x430f45e3, + 0xc250306d, 0xc2232cb7, 0x430a82c8, 0x42a0da0d, 0xc1837a94, + 0x421121dc, 0xc122c690, 0x43240bce, 0xc12c3cc8, 0x42102445, + 0xc085eb20); + + VSET(16, e32, m4); + // -79494.9435096215456724, 81629.4152202270051930, + // 60506.1876363231276628, -81020.4028176319407066, + // -6814.2587861350475578, 11974.4045779409498209, + // 97975.7066144426062237, -93357.8779376419261098, + // 95959.4397212496260181, -58528.4286213813902577, + // 28958.3763895476586185, -36387.3665319164574612, + // -90399.7993234442838002, -78772.1006454367889091, + // -62854.6154750282003079, 37858.6386504948022775 + VLOAD_64(v8, 0xc0f3686f189d8b80, 0x40f3edd6a4bdf6fa, 0x40ed8b46011de3ec, + 0xc0f3c7c671f0e6b7, 0xc0ba9e423fcee2b0, 0x40c76333c935c088, + 0x40f7eb7b4e4af21e, 0xc0f6cade0c085740, 0x40f76d7709192628, + 0xc0ec940db7442fee, 0x40dc479816c42f70, 0xc0e1c46bbaa12444, + 0xc0f611fcca076142, 0xc0f33b419c3e63b8, 0xc0eeb0d3b1f8afb4, + 0x40e27c546fd32998); + // 95822.63281250, 21789.49804688, -42409.42968750, + // 60172.89062500, -46359.57812500, -71236.33593750, + // 4124.35888672, -80527.00000000, 27430.70507812, + // 39975.67578125, -71197.53125000, -66640.12500000, + // 47459.75390625, -34899.84375000, -21371.85937500, + // 17582.65820312 + VLOAD_32(v16, 0x47bb2751, 0x46aa3aff, 0xc725a96e, 0x476b0ce4, 0xc7351794, + 0xc78b222b, 0x4580e2df, 0xc79d4780, 0x46d64d69, 0x471c27ad, + 0xc78b0ec4, 0xc7822810, 0x473963c1, 0xc70853d8, 0xc6a6f7b8, + 0x46895d51); + asm volatile("vfwsub.wv v24, v8, v16"); + // -175317.5763221215456724, 59839.9171733520051930, + // 102915.6173238231276628, -141193.2934426319552585, + // 39545.3193388649524422, 83210.7405154409498209, + // 93851.3477277238562237, -12830.8779376419261098, + // 68528.7346431246260181, -98504.1044026313902577, + // 100155.9076395476586185, 30252.7584680835425388, + // -137859.5532296942838002, -43872.2568954367889091, + // -41482.7561000282003079, 20275.9804473698022775 + VCMP_U64(10, v24, 0xc10566ac9c4ec5c0, 0x40ed37fd597bedf4, 0x40f92039e08ef1f6, + 0xc1013c4a58f8735c, 0x40e34f2a380623aa, 0x40f450abd926b811, + 0x40f6e9b5904af21e, 0xc0c90f706042ba00, 0x40f0bb0bc1192628, + 0xc0f80c81aba217f7, 0x40f873be85b10bdc, 0x40dd8b308abdb778, + 0xc100d41c6d03b0a1, 0xc0e56c08387cc770, 0xc0e4415831f8afb4, + 0x40d3ccfebfa65330); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE6(void) { + VSET(16, e16, m2); + // -92.15529633, 27.66998672, + // -5.68499708, 78.95133209, 57.52299500, 15.45270920, 50.26883316, + // 46.63587189, 71.16806793, -80.68485260, + // -22.34193420, 40.17027283, 93.54611969, 25.86016083, 41.82838821, + // 82.50254822 + VLOAD_32(v4, 0xc2b84f83, 0x41dd5c22, 0xc0b5eb7f, 0x429de715, 0x4266178c, + 0x41773e4c, 0x42491349, 0x423a8b22, 0x428e560d, 0xc2a15ea5, + 0xc1b2bc48, 0x4220ae5c, 0x42bb179d, 0x41cee19c, 0x42275045, + 0x42a5014e); + // -72.5625, -83.4375, 28.8281, 33.5938, + // -85.7500, 67.5000, 91.0625, -91.8750, -9.2578, -64.2500, + // -58.6250, 50.3438, -70.5000, 36.6250, 5.7930, 86.6875 + VLOAD_16(v8, 0xd489, 0xd537, 0x4f35, 0x5033, 0xd55c, 0x5438, 0x55b1, 0xd5be, + 0xc8a1, 0xd404, 0xd354, 0x524b, 0xd468, 0x5094, 0x45cb, 0x556b); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vfwsub.wv v12, v4, v8, v0.t"); + // 0.00000000, 111.10748291, 0.00000000, 45.35758209, + // 0.00000000, -52.04729080, 0.00000000, 138.51086426, + // 0.00000000, -16.43485260, 0.00000000, -10.17347717, + // 0.00000000, -10.76483917, 0.00000000, -4.18495178 + VCMP_U32(11, v12, 0x0, 0x42de3708, 0x0, 0x42356e2a, 0x0, 0xc250306d, 0x0, + 0x430a82c8, 0x0, 0xc1837a94, 0x0, 0xc122c690, 0x0, 0xc12c3cc8, 0x0, + 0xc085eb20); + + VSET(16, e32, m4); + // -79494.9435096215456724, 81629.4152202270051930, + // 60506.1876363231276628, -81020.4028176319407066, + // -6814.2587861350475578, 11974.4045779409498209, + // 97975.7066144426062237, -93357.8779376419261098, + // 95959.4397212496260181, -58528.4286213813902577, + // 28958.3763895476586185, -36387.3665319164574612, + // -90399.7993234442838002, -78772.1006454367889091, + // -62854.6154750282003079, 37858.6386504948022775 + VLOAD_64(v8, 0xc0f3686f189d8b80, 0x40f3edd6a4bdf6fa, 0x40ed8b46011de3ec, + 0xc0f3c7c671f0e6b7, 0xc0ba9e423fcee2b0, 0x40c76333c935c088, + 0x40f7eb7b4e4af21e, 0xc0f6cade0c085740, 0x40f76d7709192628, + 0xc0ec940db7442fee, 0x40dc479816c42f70, 0xc0e1c46bbaa12444, + 0xc0f611fcca076142, 0xc0f33b419c3e63b8, 0xc0eeb0d3b1f8afb4, + 0x40e27c546fd32998); + // 95822.63281250, 21789.49804688, -42409.42968750, + // 60172.89062500, -46359.57812500, -71236.33593750, + // 4124.35888672, -80527.00000000, 27430.70507812, + // 39975.67578125, -71197.53125000, -66640.12500000, + // 47459.75390625, -34899.84375000, -21371.85937500, + // 17582.65820312 + VLOAD_32(v16, 0x47bb2751, 0x46aa3aff, 0xc725a96e, 0x476b0ce4, 0xc7351794, + 0xc78b222b, 0x4580e2df, 0xc79d4780, 0x46d64d69, 0x471c27ad, + 0xc78b0ec4, 0xc7822810, 0x473963c1, 0xc70853d8, 0xc6a6f7b8, + 0x46895d51); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vfwsub.wv v24, v8, v16, v0.t"); + // 0.0000000000000000, 59839.9171733520051930, + // 0.0000000000000000, -141193.2934426319552585, + // 0.0000000000000000, 83210.7405154409498209, + // 0.0000000000000000, -12830.8779376419261098, + // 0.0000000000000000, -98504.1044026313902577, + // 0.0000000000000000, 30252.7584680835425388, + // 0.0000000000000000, -43872.2568954367889091, + // 0.0000000000000000, 20275.9804473698022775 + VCMP_U64(12, v24, 0x0, 0x40ed37fd597bedf4, 0x0, 0xc1013c4a58f8735c, 0x0, + 0x40f450abd926b811, 0x0, 0xc0c90f706042ba00, 0x0, 0xc0f80c81aba217f7, + 0x0, 0x40dd8b308abdb778, 0x0, 0xc0e56c08387cc770, 0x0, + 0x40d3ccfebfa65330); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE7(void) { + VSET(16, e16, m2); + double dscalar_16; + // -8.76965809, 55.45920181, 71.29286957, -84.65414429, + // -81.93881226, 75.13192749, -75.44019318, -48.81898499, + // 0.10306206, -25.18898392, 49.68006516, 72.66278076, + // -24.90880966, -32.59431458, 14.58876038, -55.07221603 + VLOAD_32(v4, 0xc10c5085, 0x425dd639, 0x428e95f3, 0xc2a94eec, 0xc2a3e0ac, + 0x4296438c, 0xc296e161, 0xc24346a4, 0x3dd31233, 0xc1c9830a, + 0x4246b863, 0x42915358, 0xc1c7453e, 0xc2026094, 0x41696b90, + 0xc25c49f3); + // 34.7812 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x5059); + asm volatile("vfwsub.wf v8, v4, %[A]" ::[A] "f"(dscalar_16)); + // -43.55090714, 20.67795181, 36.51161957, -119.43539429, + // -116.72006226, 40.35067749, -110.22144318, -83.60023499, + // -34.67818832, -59.97023392, 14.89881516, 37.88153076, + // -59.69005966, -67.37556458, -20.19248962, -89.85346985 + VCMP_U32(13, v8, 0xc22e3421, 0x41a56c72, 0x42120be6, 0xc2eedeec, 0xc2e970ac, + 0x42216718, 0xc2dc7161, 0xc2a73352, 0xc20ab677, 0xc26fe185, + 0x416e618c, 0x421786b0, 0xc26ec29f, 0xc286c04a, 0xc1a18a38, + 0xc2b3b4fa); + + VSET(16, e32, m4); + double dscalar_32; + // 322189.5706008458510041, 914899.9451866354793310, + // -620811.0881863175891340, -456926.2657179111847654, + // -549945.8717311944346875, -386814.9759888321859762, + // 748677.5319772073999047, 821298.7777016961481422, + // 968861.0598710167687386, -343694.5546012039994821, + // -782815.4022130169905722, -561429.7869165195152164, + // 755371.9691831718664616, -954868.1761190977413207, + // -606267.0986005428712815, 818185.4808380266185850 + VLOAD_64(v8, 0x4113aa36484b9690, 0x412beba7e3ef80b0, 0xc122f2162d26c1cc, + 0xc11be37910185b2a, 0xc120c873be538d16, 0xc1179bfbe7699dce, + 0x4126d90b105f5108, 0x412910658e2eeaae, 0x412d913a1ea769f6, + 0xc114fa3a37e960c6, 0xc127e3becdeedd54, 0xc121222b92e6b8d8, + 0x41270d57f038c6d6, 0xc12d23e85a2c484a, 0xc1228076327bc536, + 0x4128f812f63066de); + // -83388.08593750 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc7a2de0b); + asm volatile("vfwsub.wf v16, v8, %[A]" ::[A] "f"(dscalar_32)); + // 405577.6565383458510041, 998288.0311241354793310, + // -537423.0022488175891340, -373538.1797804111847654, + // -466557.7857936944346875, -303426.8900513321859762, + // 832065.6179147073999047, 904686.8636391961481422, + // 1052249.1458085167687386, -260306.4686637039994821, + // -699427.3162755169905722, -478041.7009790195152164, + // 838760.0551206718664616, -871480.0901815977413207, + // -522879.0126630428712815, 901573.5667755266185850 + VCMP_U64(14, v16, 0x4118c126a04b9690, 0x412e77200fef80b0, 0xc120669e0126c1cc, + 0xc116cc88b8185b2a, 0xc11c79f724a71a2c, 0xc112850b8f699dce, + 0x412964833c5f5108, 0x412b9bddba2eeaae, 0x41300e592553b4fb, + 0xc10fc693bfd2c18c, 0xc1255846a1eedd54, 0xc11d2d66cdcd71b0, + 0x412998d01c38c6d6, 0xc12a98702e2c484a, 0xc11fe9fc0cf78a6c, + 0x412b838b223066de); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE8(void) { + VSET(16, e16, m2); + double dscalar_16; + // -8.76965809, 55.45920181, 71.29286957, -84.65414429, + // -81.93881226, 75.13192749, -75.44019318, -48.81898499, + // 0.10306206, -25.18898392, 49.68006516, 72.66278076, + // -24.90880966, -32.59431458, 14.58876038, -55.07221603 + VLOAD_32(v4, 0xc10c5085, 0x425dd639, 0x428e95f3, 0xc2a94eec, 0xc2a3e0ac, + 0x4296438c, 0xc296e161, 0xc24346a4, 0x3dd31233, 0xc1c9830a, + 0x4246b863, 0x42915358, 0xc1c7453e, 0xc2026094, 0x41696b90, + 0xc25c49f3); + // 34.7812 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x5059); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwsub.wf v8, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.00000000, 20.67795181, 0.00000000, -119.43539429, + // 0.00000000, 40.35067749, 0.00000000, -83.60023499, + // 0.00000000, -59.97023392, 0.00000000, 37.88153076, + // 0.00000000, -67.37556458, 0.00000000, -89.85346985 + VCMP_U32(15, v8, 0x0, 0x41a56c72, 0x0, 0xc2eedeec, 0x0, 0x42216718, 0x0, + 0xc2a73352, 0x0, 0xc26fe185, 0x0, 0x421786b0, 0x0, 0xc286c04a, 0x0, + 0xc2b3b4fa); + + VSET(16, e32, m4); + double dscalar_32; + // 322189.5706008458510041, 914899.9451866354793310, + // -620811.0881863175891340, -456926.2657179111847654, + // -549945.8717311944346875, -386814.9759888321859762, + // 748677.5319772073999047, 821298.7777016961481422, + // 968861.0598710167687386, -343694.5546012039994821, + // -782815.4022130169905722, -561429.7869165195152164, + // 755371.9691831718664616, -954868.1761190977413207, + // -606267.0986005428712815, 818185.4808380266185850 + VLOAD_64(v8, 0x4113aa36484b9690, 0x412beba7e3ef80b0, 0xc122f2162d26c1cc, + 0xc11be37910185b2a, 0xc120c873be538d16, 0xc1179bfbe7699dce, + 0x4126d90b105f5108, 0x412910658e2eeaae, 0x412d913a1ea769f6, + 0xc114fa3a37e960c6, 0xc127e3becdeedd54, 0xc121222b92e6b8d8, + 0x41270d57f038c6d6, 0xc12d23e85a2c484a, 0xc1228076327bc536, + 0x4128f812f63066de); + // -83388.08593750 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc7a2de0b); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwsub.wf v16, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.0000000000000000, 998288.0311241354793310, + // 0.0000000000000000, -373538.1797804111847654, + // 0.0000000000000000, -303426.8900513321859762, + // 0.0000000000000000, 904686.8636391961481422, + // 0.0000000000000000, -260306.4686637039994821, + // 0.0000000000000000, -478041.7009790195152164, + // 0.0000000000000000, -871480.0901815977413207, + // 0.0000000000000000, 901573.5667755266185850 + VCMP_U64(16, v16, 0x0, 0x412e77200fef80b0, 0x0, 0xc116cc88b8185b2a, 0x0, + 0xc112850b8f699dce, 0x0, 0x412b9bddba2eeaae, 0x0, 0xc10fc693bfd2c18c, + 0x0, 0xc11d2d66cdcd71b0, 0x0, 0xc12a98702e2c484a, 0x0, + 0x412b838b223066de); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vid.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vid.c new file mode 100644 index 000000000..8aae09e69 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vid.c @@ -0,0 +1,31 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(8, e8, m1); + __asm__ volatile("vid.v v1"); + VCMP_U8(1, v1, 0, 1, 2, 3, 4, 5, 6, 7); +} + +void TEST_CASE2() { + VSET(8, e8, m1); + VLOAD_8(v0, 85, 0, 0, 0, 0, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("vid.v v1, v0.t"); + VCMP_U8(2, v1, 0, 0, 2, 0, 4, 0, 6, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/viota.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/viota.c new file mode 100644 index 000000000..9a1ab49d6 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/viota.c @@ -0,0 +1,37 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(1, e8, m1); + VLOAD_8(v1, 0b10001001); + VSET(8, e8, m1); + asm volatile("viota.m v2, v1"); + VCMP_U8(1, v2, 0, 1, 1, 1, 2, 2, 2, 2); +} + +void TEST_CASE2() { + VSET(8, e8, m1); + VCLEAR(v2); + VLOAD_8(v2, 0, 1, 2, 3, 4, 5, 6, 7); + VSET(1, e8, m1); + VLOAD_8(v1, 0b10001001); + VLOAD_8(v0, 0b11000111); + VSET(8, e8, m1); + asm volatile("viota.m v2, v1, v0.t"); + VCMP_U8(2, v2, 0, 1, 1, 3, 4, 5, 1, 1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl.c new file mode 100644 index 000000000..887110b14 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl.c @@ -0,0 +1,79 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +#define AXI_DWIDTH 128 + +static volatile uint8_t ALIGNED_I8[16] __attribute__((aligned(AXI_DWIDTH))) = { + 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, + 0x88, 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89}; + +static volatile uint16_t ALIGNED_I16[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989}; + +static volatile uint32_t ALIGNED_I32[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, 0xab8b9148, + 0x90318509, 0x31897598, 0x83195999, 0x89139848}; + +static volatile uint64_t ALIGNED_I64[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + +// Misaligned access wrt 128-bit +void TEST_CASE1(void) { + VSET(15, e8, m1); + asm volatile("vle8.v v1, (%0)" ::"r"(&ALIGNED_I8[1])); + VCMP_U8(1, v1, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); +} + +void TEST_CASE2(void) { + VSET(15, e16, m2); + asm volatile("vle16.v v2, (%0)" ::"r"(&ALIGNED_I16[1])); + VCMP_U16(2, v2, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); +} + +void TEST_CASE3(void) { + VSET(15, e32, m4); + asm volatile("vle32.v v4, (%0)" ::"r"(&ALIGNED_I32[1])); + VCMP_U32(3, v4, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, + 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, 0x89139848); +} + +void TEST_CASE4(void) { + VSET(15, e64, m8); + asm volatile("vle64.v v8, (%0)" ::"r"(&ALIGNED_I64[1])); + VCMP_U64(4, v8, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x1893179501093489, + 0x81937598aa819388, 0x1874754791888188, 0x3eeeeeeee33111ae, + 0x9013930148815808, 0xab8b914891484891, 0x9031850931584902, + 0x3189759837598759, 0x8319599991911111, 0x8913984898951989); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl1r.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl1r.c new file mode 100644 index 000000000..52db36c24 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl1r.c @@ -0,0 +1,439 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +uint64_t counter; + +// Vectors are statically allocated not to exceed the stack and go in the UART +// address space + +// Maximum size: (VLEN/8 Bytes * (MAX_LMUL == 8)) = VLEN +// Define VLEN before compiling me +// #define VLEN 128 +uint8_t gold_vec_8b[VLEN]; +uint16_t gold_vec_16b[VLEN / 2]; +uint32_t gold_vec_32b[VLEN / 4]; +uint64_t gold_vec_64b[VLEN / 8]; + +uint8_t zero_vec_8b[VLEN]; +uint16_t zero_vec_16b[VLEN / 2]; +uint32_t zero_vec_32b[VLEN / 4]; +uint64_t zero_vec_64b[VLEN / 8]; + +uint8_t buf_vec_8b[VLEN]; +uint16_t buf_vec_16b[VLEN / 2]; +uint32_t buf_vec_32b[VLEN / 4]; +uint64_t buf_vec_64b[VLEN / 8]; + +//////////// +// vl1reX // +//////////// + +// 1 whole register load +void TEST_CASE1(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 8); + // Set vl and vtype to super short values + VSET(1, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 8, e8, m1); + // Check that the whole register was loaded + VSTORE(uint8_t, e8, v16, buf_vec_8b); + VMCMP(uint8_t, % hhu, 0, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + VSTORE(uint8_t, e8, v17, buf_vec_8b); + VMCMP(uint8_t, % hhu, 0, buf_vec_8b, zero_vec_8b, VLEN / 8); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_16b, VLEN / 16); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_16b, VLEN / 16); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_16b, VLEN / 16); + // Set vl and vtype to super short values + VSET(1, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re16.v v16, (%0)" ::"r"(gold_vec_16b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 16, e16, m1); + // Check that the whole register was loaded + VSTORE(uint16_t, e16, v16, buf_vec_16b); + VMCMP(uint16_t, % hu, 1, buf_vec_16b, gold_vec_16b, VLEN / 16); + // Check that the neighbour registers are okay + VSTORE(uint16_t, e16, v17, buf_vec_16b); + VMCMP(uint16_t, % hu, 1, buf_vec_16b, zero_vec_16b, VLEN / 16); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_32b, VLEN / 32); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_32b, VLEN / 32); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_32b, VLEN / 32); + // Set vl and vtype to super short values + VSET(1, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re32.v v16, (%0)" ::"r"(gold_vec_32b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 32, e32, m1); + // Check that the whole register was loaded + VSTORE(uint32_t, e32, v16, buf_vec_32b); + VMCMP(uint32_t, % u, 2, buf_vec_32b, gold_vec_32b, VLEN / 32); + // Check that the neighbour registers are okay + VSTORE(uint32_t, e32, v17, buf_vec_32b); + VMCMP(uint32_t, % u, 2, buf_vec_32b, zero_vec_32b, VLEN / 32); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_64b, VLEN / 64); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_64b, VLEN / 64); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_64b, VLEN / 64); + // Set vl and vtype to super short values + VSET(1, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re64.v v16, (%0)" ::"r"(gold_vec_64b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 64, e64, m1); + // Check that the whole register was loaded3 + VSTORE(uint64_t, e64, v16, buf_vec_64b); + VMCMP(uint64_t, % lu, 3, buf_vec_64b, gold_vec_64b, VLEN / 64); + // Check that the neighbour registers are okay + VSTORE(uint64_t, e64, v17, buf_vec_64b); + VMCMP(uint64_t, % lu, 3, buf_vec_64b, zero_vec_64b, VLEN / 64); +} + +//////////// +// vl2reX // +//////////// + +// 2 whole registers load +void TEST_CASE2(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 4); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 4); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 4); + // Set vl and vtype to super short values + VSET(1, e64, m4); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl2re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 4, e8, m2); + // Check that the whole register was loaded + VSTORE(uint8_t, e8, v16, buf_vec_8b); + VMCMP(uint8_t, % hhu, 4, buf_vec_8b, gold_vec_8b, VLEN / 4); + // Check that the neighbour registers are okay + VSTORE(uint8_t, e8, v18, buf_vec_8b); + VMCMP(uint8_t, % hhu, 4, buf_vec_8b, zero_vec_8b, VLEN / 4); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_16b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_16b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_16b, VLEN / 8); + // Set vl and vtype to super short values + VSET(1, e64, m4); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl2re16.v v16, (%0)" ::"r"(gold_vec_16b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 8, e16, m2); + // Check that the whole register was loaded + VSTORE(uint16_t, e16, v16, buf_vec_16b); + VMCMP(uint16_t, % hu, 5, buf_vec_16b, gold_vec_16b, VLEN / 8); + // Check that the neighbour registers are okay + VSTORE(uint16_t, e16, v18, buf_vec_16b); + VMCMP(uint16_t, % hu, 5, buf_vec_16b, zero_vec_16b, VLEN / 8); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_32b, VLEN / 16); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_32b, VLEN / 16); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_32b, VLEN / 16); + // Set vl and vtype to super short values + VSET(1, e64, m4); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl2re32.v v16, (%0)" ::"r"(gold_vec_32b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 16, e32, m2); + // Check that the whole register was loaded + VSTORE(uint32_t, e32, v16, buf_vec_32b); + VMCMP(uint32_t, % u, 6, buf_vec_32b, gold_vec_32b, VLEN / 16); + // Check that the neighbour registers are okay + VSTORE(uint32_t, e32, v18, buf_vec_32b); + VMCMP(uint32_t, % u, 6, buf_vec_32b, zero_vec_32b, VLEN / 16); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_64b, VLEN / 32); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_64b, VLEN / 32); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_64b, VLEN / 32); + // Set vl and vtype to super short values + VSET(1, e64, m4); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl2re64.v v16, (%0)" ::"r"(gold_vec_64b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 32, e64, m2); + // Check that the whole register was loaded3 + VSTORE(uint64_t, e64, v16, buf_vec_64b); + VMCMP(uint64_t, % lu, 7, buf_vec_64b, gold_vec_64b, VLEN / 32); + // Check that the neighbour registers are okay + VSTORE(uint64_t, e64, v18, buf_vec_64b); + VMCMP(uint64_t, % lu, 7, buf_vec_64b, zero_vec_64b, VLEN / 32); +} + +//////////// +// vl4reX // +//////////// + +// 4 whole registers load +void TEST_CASE3(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 2); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 2); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 2); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl4re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 2, e8, m4); + // Check that the whole register was loaded + VSTORE(uint8_t, e8, v16, buf_vec_8b); + VMCMP(uint8_t, % hhu, 8, buf_vec_8b, gold_vec_8b, VLEN / 2); + // Check that the neighbour registers are okay + VSTORE(uint8_t, e8, v20, buf_vec_8b); + VMCMP(uint8_t, % hhu, 8, buf_vec_8b, zero_vec_8b, VLEN / 2); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_16b, VLEN / 4); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_16b, VLEN / 4); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_16b, VLEN / 4); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl4re16.v v16, (%0)" ::"r"(gold_vec_16b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 4, e16, m4); + // Check that the whole register was loaded + VSTORE(uint16_t, e16, v16, buf_vec_16b); + VMCMP(uint16_t, % hu, 9, buf_vec_16b, gold_vec_16b, VLEN / 4); + // Check that the neighbour registers are okay + VSTORE(uint16_t, e16, v20, buf_vec_16b); + VMCMP(uint16_t, % hu, 9, buf_vec_16b, zero_vec_16b, VLEN / 4); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_32b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_32b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_32b, VLEN / 8); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl4re32.v v16, (%0)" ::"r"(gold_vec_32b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 8, e32, m4); + // Check that the whole register was loaded + VSTORE(uint32_t, e32, v16, buf_vec_32b); + VMCMP(uint32_t, % u, 10, buf_vec_32b, gold_vec_32b, VLEN / 8); + // Check that the neighbour registers are okay + VSTORE(uint32_t, e32, v20, buf_vec_32b); + VMCMP(uint32_t, % u, 10, buf_vec_32b, zero_vec_32b, VLEN / 8); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_64b, VLEN / 16); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_64b, VLEN / 16); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_64b, VLEN / 16); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl4re64.v v16, (%0)" ::"r"(gold_vec_64b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 16, e64, m4); + // Check that the whole register was loaded + VSTORE(uint64_t, e64, v16, buf_vec_64b); + VMCMP(uint64_t, % lu, 11, buf_vec_64b, gold_vec_64b, VLEN / 16); + // Check that the neighbour registers are okay + VSTORE(uint64_t, e64, v20, buf_vec_64b); + VMCMP(uint64_t, % lu, 11, buf_vec_64b, zero_vec_64b, VLEN / 16); +} + +//////////// +// vl8reX // +//////////// + +// 8 whole registers load +void TEST_CASE4(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + VCLEAR(v24); + // Load a buffer from memory - whole register load + asm volatile("vl8re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Change vtype and vl to match the whole register + VSET(VLEN, e8, m8); + // Check that the whole register was loaded + VSTORE(uint8_t, e8, v16, buf_vec_8b); + VMCMP(uint8_t, % hhu, 12, buf_vec_8b, gold_vec_8b, VLEN); + // Check that the neighbour registers are okay + VSTORE(uint8_t, e8, v24, buf_vec_8b); + VMCMP(uint8_t, % hhu, 12, buf_vec_8b, zero_vec_8b, VLEN); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_16b, VLEN / 2); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_16b, VLEN / 2); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_16b, VLEN / 2); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + VCLEAR(v24); + // Load a buffer from memory - whole register load + asm volatile("vl8re16.v v16, (%0)" ::"r"(gold_vec_16b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 2, e16, m8); + // Check that the whole register was loaded + VSTORE(uint16_t, e16, v16, buf_vec_16b); + VMCMP(uint16_t, % hu, 13, buf_vec_16b, gold_vec_16b, VLEN / 2); + // Check that the neighbour registers are okay + VSTORE(uint16_t, e16, v24, buf_vec_16b); + VMCMP(uint16_t, % hu, 13, buf_vec_16b, zero_vec_16b, VLEN / 2); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_32b, VLEN / 4); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_32b, VLEN / 4); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_32b, VLEN / 4); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + VCLEAR(v24); + // Load a buffer from memory - whole register load + asm volatile("vl8re32.v v16, (%0)" ::"r"(gold_vec_32b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 4, e32, m8); + // Check that the whole register was loaded + VSTORE(uint32_t, e32, v16, buf_vec_32b); + VMCMP(uint32_t, % u, 14, buf_vec_32b, gold_vec_32b, VLEN / 4); + // Check that the neighbour registers are okay + VSTORE(uint32_t, e32, v24, buf_vec_32b); + VMCMP(uint32_t, % u, 14, buf_vec_32b, zero_vec_32b, VLEN / 4); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_64b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_64b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_64b, VLEN / 8); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + VCLEAR(v24); + // Load a buffer from memory - whole register load + asm volatile("vl8re64.v v16, (%0)" ::"r"(gold_vec_64b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 8, e64, m8); + // Check that the whole register was loaded3 + VSTORE(uint64_t, e64, v16, buf_vec_64b); + VMCMP(uint64_t, % lu, 15, buf_vec_64b, gold_vec_64b, VLEN / 8); + // Check that the neighbour registers are okay + VSTORE(uint64_t, e64, v24, buf_vec_64b); + VMCMP(uint64_t, % lu, 15, buf_vec_64b, zero_vec_64b, VLEN / 8); +} + +//////////// +// Others // +//////////// + +// Check with initial vl == 0 +void TEST_CASE5(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 8); + // Set vl and vtype to super short values + VSET(0, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 8, e8, m1); + // Check that the whole register was loaded + VSTORE(uint8_t, e8, v16, buf_vec_8b); + VMCMP(uint8_t, % hhu, 16, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + VSTORE(uint8_t, e8, v17, buf_vec_8b); + VMCMP(uint8_t, % hhu, 16, buf_vec_8b, zero_vec_8b, VLEN / 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl_nocheck.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl_nocheck.c new file mode 100644 index 000000000..7260e19f8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl_nocheck.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +// or add inp here +void TEST_CASE1(void) { + VSET(4, e8, m1); + volatile int8_t INP1[] = {0xff, 0x00, 0x0f, 0xf0}; // flush + __asm__ volatile("fence"); + __asm__ volatile("vle8.v v1, (%0)" ::"r"(INP1)); + // VEC_CMP_8(1,v1,0xff, 0x00, 0x0f,0xf0); + // __asm__ volatile ("fence"); +} + +void TEST_CASE2(void) { + VSET(4, e16, m1); + volatile int16_t INP1[] = {0xffff, 0x0000, 0x0f0f, 0xf0f0}; // flush + __asm__ volatile("fence"); + __asm__ volatile("vle16.v v1, (%0)" ::"r"(INP1)); + // VEC_CMP_16(2,v1,0xffff, 0x0000, 0x0f0f,0xf0f0); + // __asm__ volatile ("fence"); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + volatile int32_t INP3[] = {0xffffffff, 0x00000000, 0x0f0f0f0f, + 0xf0f0f0f0}; // flush + __asm__ volatile("fence"); + __asm__ volatile("vle32.v v1, (%0)" ::"r"(INP3)); + // VEC_CMP_32(3,v1,0xffffffff, 0x00000000, 0x0f0f0f0f,0xf0f0f0f0); + // __asm__ volatile ("fence"); +} + +void TEST_CASE4(void) { + VSET(4, e64, m1); + volatile int64_t INP1[] = {0xffffffffffffffff, 0x0000000000000000, + 0x0f0f0f0f0f0f0f0f, 0xf0f0f0f0f0f0f0f0}; // flush + __asm__ volatile("fence"); + __asm__ volatile("vle64.v v1, (%0)" ::"r"(INP1)); + // VEC_CMP_64(4,v1,0xffffffffffffffff, 0x00000000000000000, + // 0x0f0f0f0f0f0f0f0f,0xf0f0f0f0f0f0f0f0); + // __asm__ volatile ("fence"); +} + +/* void TEST_CASE2(void) { */ +/* VSET(4,e8,m1); */ +/* volatile int8_t INP2[] = {0xff, 0x00, 0x0f, 0xf0}; */ +/* __asm__ volatile ("fence"); */ +/* VLOAD_8(v0,0x1,0x0,0x1,0x0); */ +/* VCLEAR_U8(v1); */ +/* __asm__ volatile ("vle8.v v1, (%0), v0.t"::"r" (INP2)); */ +/* VEC_CMP_8(2,v1,0xff, 0x00, 0x0f,0x00); */ +/* } */ + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle1.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle1.c new file mode 100644 index 000000000..1a3daeab8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle1.c @@ -0,0 +1,45 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +#define AXI_DWIDTH 128 + +static volatile uint8_t ALIGNED_I8[16] __attribute__((aligned(AXI_DWIDTH))) = { + 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, + 0x88, 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89}; + +// All the accesses are misaligned wrt AXI DATA WIDTH + +void TEST_CASE1(void) { + VSET(9, e8, m1); + asm volatile("vle1.v v1, (%0)" ::"r"(&ALIGNED_I8[1])); + VCMP_U8(1, v1, 0xd3, 0x40); + + VSET(9, e64, m2); + asm volatile("vle1.v v1, (%0)" ::"r"(&ALIGNED_I8[1])); + VCMP_U8(2, v1, 0xd3, 0x40); + + VSET(16, e64, m8); + asm volatile("vle1.v v1, (%0)" ::"r"(&ALIGNED_I8[1])); + VCMP_U8(3, v1, 0xd3, 0x40); + + VSET(3, e64, m8); + asm volatile("vle1.v v1, (%0)" ::"r"(&ALIGNED_I8[1])); + // The vector used by VCMP_U8 is actually 16 elements long + // Don't store more if you don't want to overflow + VSET(16, e64, m8); + VCMP_U8(4, v1, 0xd3); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle16.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle16.c new file mode 100644 index 000000000..893d0e379 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle16.c @@ -0,0 +1,293 @@ +// TODO uncomment TEST_CASE13 and TEST_CASE 15 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved + +#include "long_array.h" +#include "vector_macros.h" + +#define AXI_DWIDTH 128 +// Exception Handler for rtl + +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +static volatile uint16_t ALIGNED_I16[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989}; + +//**********Checking functionality of vle16******// +void TEST_CASE1(void) { + VSET(15, e16, m2); + asm volatile("vle16.v v0, (%0)" ::"r"(&ALIGNED_I16[1])); + VCMP_U16(1, v0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + VSET(15, e32, m4); + asm volatile("vle16.v v1, (%0)" ::"r"(&ALIGNED_I16[1])); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vle16 with different values of masking +// register******// +void TEST_CASE3(void) { + VSET(16, e16, m2); + VCLEAR(v6); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vle16.v v6, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(3, v6, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, + 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, + 0x1989); +} + +void TEST_CASE4(void) { + VSET(16, e16, m2); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vle16.v v6, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(4, v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE5(void) { + VSET(16, e16, m2); + VCLEAR(v6); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vle16.v v6, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(5, v6, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, 0x11ae, 11, + 0x4891, 13, 0x8759, 15, 0x1989); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + uint64_t avl; + VSET(16, e16, m2); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e16, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle16.v v8, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m2); + VCMP_U16(6, v8, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, 0x11ae, 11, + 0x4891, 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + uint64_t avl; + VSET(16, e16, m2); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e16, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vle16.v v8, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m2); + VCMP_U16(7, v8, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, 0x11ae, 11, + 0x4891, 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + uint64_t avl; + VSET(16, e16, m2); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e16, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vle16.v v8, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m2); + VCMP_U16(8, v8, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, 0x11ae, 11, + 0x4891, 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + uint64_t avl; + VSET(16, e16, m2); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e16, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vle16.v v8, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m2); + VCMP_U16(9, v8, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, 0x11ae, 11, + 0x4891, 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case cover corner case for EEW = 16.If LMUL is changed to +// mf8 it will give error because emul become less than 1/8 (EMUL = 1/16) +// But it does not support this configuration because SEW/LMUL > ELEN +void TEST_CASE10(void) { + VSET(2, e32, mf2); + asm volatile("vle16.v v5, (%0)" ::"r"(&ALIGNED_I16[1])); + VCMP_U16(10, v5, 0xbbd3, 0x3840); +} + +// This test case execute upper bound case of EMUL (8) +// If LMUL is changed to m8 it will give error because emul become greater than +// 8 (EMUL = 16) + +void TEST_CASE11(void) { + VSET(16, e8, m4); + asm volatile("vle16.v v8, (%0)" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(11, v8, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, + 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, + 0x1989); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE12(void) { + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(16, e16, m2); + asm volatile("vle16.v v12, (%0)" ::"r"(&ALIGNED_I16[0])); + VSET(16, e8, m2); + VCMP_U16(12, v12, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, + 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, + 0x1989); +} + +void TEST_CASE13(void) { + uint64_t avl; + VSET(16, e16, m1); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + __asm__ volatile("vsetivli %[A], 0, e16, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle16.v v6, (%0)" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m1); + VCMP_U16(13, v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE14(void) { + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(13, e16, m2); + asm volatile("vle16.v v12, (%0)" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m2); + VCMP_U16(14, v12, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, + 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE15(void) { + VSET(16, e16, m1); + VLOAD_16(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + write_csr(vstart, 2); + asm volatile("vle16.v v7, (%0)" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m1); + VCMP_U16(15, v7, 1, 2, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, 0x8188, + 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// +void TEST_CASE16(void) { + VSET(1024, e16, m4); + asm volatile("vle16.v v8, (%0)" ::"r"(&LONG_I16[0])); + LVCMP_U16(16, v8, LONG_I16); +} + +void TEST_CASE17(void) { + VSET(512, e16, m2); + asm volatile("vle16.v v10, (%0)" ::"r"(&LONG_I16[0])); + LVCMP_U16(17, v10, LONG_I16); +} + +void TEST_CASE18(void) { + VSET(300, e16, m2); + asm volatile("vle16.v v12, (%0)" ::"r"(&LONG_I16[0])); + LVCMP_U16(18, v12, LONG_I16); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vle16.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + // TEST_CASE13(); + TEST_CASE14(); + // TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle32.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle32.c new file mode 100644 index 000000000..0e4f1c1c5 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle32.c @@ -0,0 +1,307 @@ +// TODO uncomment TEST_CASE13 and TEST_CASE 15 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved + +#include "long_array.h" +#include "vector_macros.h" +#define AXI_DWIDTH 128 +// Exception Handler for rtl + +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +static volatile uint32_t ALIGNED_I32[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, 0xab8b9148, + 0x90318509, 0x31897598, 0x83195999, 0x89139848}; + +//**********Checking functionality of vle32********// +void TEST_CASE1(void) { + VSET(15, e32, m4); + asm volatile("vle32.v v0, (%0)" ::"r"(&ALIGNED_I32[1])); + VCMP_U32(1, v0, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, + 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, 0x89139848); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + VSET(15, e64, m4); + asm volatile("vle32.v v1, (%0)" ::"r"(&ALIGNED_I32[1])); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vle32 with different values of masking +// register******// +void TEST_CASE3(void) { + VSET(16, e32, m4); + VCLEAR(v12); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vle32.v v12, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(3, v12, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); +} + +void TEST_CASE4(void) { + VSET(16, e32, m4); + VLOAD_32(v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vle32.v v12, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(4, v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE5(void) { + VSET(16, e32, m4); + VCLEAR(v12); + VLOAD_32(v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vle32.v v12, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(5, v12, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, 0x81937598, + 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 0x31897598, 15, 0x89139848); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e32, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle32.v v12, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VSET(16, e32, m4); + VCMP_U32(6, v12, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, 0x81937598, + 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e32, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vle32.v v16, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VSET(16, e32, m4); + VCMP_U32(7, v16, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, 0x81937598, + 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e32, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vle32.v v16, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VSET(16, e32, m4); + VCMP_U32(8, v16, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, 0x81937598, + 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e32, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vle32.v v16, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VSET(16, e32, m4); + VCMP_U32(9, v16, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, 0x81937598, + 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case cover corner case for EEW = 32.If LMUL is changed to +// mf8 and SEW is changed to e64 it will give error because emul become less +// than 1/8 (EMUL = 1/16) But it does not support this configuration because +// SEW/LMUL > ELEN +void TEST_CASE10(void) { + VSET(1, e32, mf2); + asm volatile("vle32.v v5, (%0)" ::"r"(&ALIGNED_I32[1])); + VCMP_U32(10, v5, 0xf9aa71f0); +} + +// This test case execute upper bound case of EMUL (8) +// If LMUL is changed to m8 or m4 it will give error because emul become greater +// than +// 8 +// (EMUL = 16) + +void TEST_CASE11(void) { + VSET(8, e8, m2); + asm volatile("vle32.v v8, (%0)" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(11, v8, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE12(void) { + VSET(16, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(16, e32, m4); + asm volatile("vle32.v v24, (%0)" ::"r"(&ALIGNED_I32[0])); + VSET(16, e32, m4); + VCMP_U32(12, v24, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); +} + +void TEST_CASE13(void) { + uint64_t avl; + VSET(16, e32, m1); + VLOAD_32(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + __asm__ volatile("vsetivli %[A], 0, e16, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle32.v v6, (%0)" ::"r"(&ALIGNED_I32[0])); + VSET(16, e32, m1); + VCMP_U32(13, v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE14(void) { + VSET(16, e16, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(13, e16, m4); + asm volatile("vle32.v v24, (%0)" ::"r"(&ALIGNED_I32[0])); + VSET(16, e16, m4); + VCMP_U32(14, v24, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE15(void) { + VSET(16, e32, m1); + VLOAD_32(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + write_csr(vstart, 2); + asm volatile("vle32.v v7, (%0)" ::"r"(&ALIGNED_I32[0])); + VSET(15, e32, m1); + VCMP_U32(16, v7, 1, 2, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, + 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, 0x89139848); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// + +void TEST_CASE16(void) { + VSET(1024, e32, m8); + asm volatile("vle32.v v8, (%0)" ::"r"(&LONG_I32[0])); + LVCMP_U32(16, v8, LONG_I32); +} + +void TEST_CASE17(void) { + VSET(512, e32, m4); + asm volatile("vle32.v v12, (%0)" ::"r"(&LONG_I32[0])); + LVCMP_U32(17, v12, LONG_I32); +} + +void TEST_CASE18(void) { + VSET(256, e32, m2); + asm volatile("vle32.v v14, (%0)" ::"r"(&LONG_I32[0])); + LVCMP_U32(18, v14, LONG_I32); +} + +void TEST_CASE19(void) { + VSET(200, e32, m2); + asm volatile("vle32.v v16, (%0)" ::"r"(&LONG_I32[0])); + LVCMP_U32(19, v16, LONG_I32); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vle32.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + // TEST_CASE13(); + TEST_CASE14(); + // TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + TEST_CASE19(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle64.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle64.c new file mode 100644 index 000000000..282fd11b4 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle64.c @@ -0,0 +1,315 @@ +// TODO uncomment TEST_CASE12 and TEST_CASE 14 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved + +#include "long_array.h" +#include "vector_macros.h" + +#define AXI_DWIDTH 128 +// Exception Handler for rtl + +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +static volatile uint64_t ALIGNED_I64[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + +//**********Checking functionality of vle64********// +void TEST_CASE1(void) { + VSET(15, e64, m8); + asm volatile("vle64.v v0, (%0)" ::"r"(&ALIGNED_I64[1])); + VCMP_U64(1, v0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x1893179501093489, + 0x81937598aa819388, 0x1874754791888188, 0x3eeeeeeee33111ae, + 0x9013930148815808, 0xab8b914891484891, 0x9031850931584902, + 0x3189759837598759, 0x8319599991911111, 0x8913984898951989); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + VSET(15, e64, m2); + asm volatile("vle64.v v1, (%0)" ::"r"(&ALIGNED_I64[1])); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vle64 with different values of masking +// register******// +void TEST_CASE3(void) { + VSET(16, e64, m8); + VCLEAR(v24); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vle64.v v24, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(3, v24, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); +} + +void TEST_CASE4(void) { + VSET(16, e64, m8); + VCLEAR(v24); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vle64.v v24, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(4, v24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +} + +void TEST_CASE5(void) { + VSET(16, e64, m8); + VCLEAR(v24); + VLOAD_64(v24, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vle64.v v24, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(5, v24, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, 11, + 0xab8b914891484891, 13, 0x3189759837598759, 15, 0x8913984898951989); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e64, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle64.v v8, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m8); + VCMP_U64(6, v8, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, 11, + 0xab8b914891484891, 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e64, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vle64.v v8, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m8); + VCMP_U64(7, v8, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, 11, + 0xab8b914891484891, 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + uint64_t avl; + VSET(16, e64, m1); + VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e64, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vle64.v v4, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m1); + VCMP_U64(8, v4, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, 11, + 0xab8b914891484891, 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + uint64_t avl; + VSET(16, e64, m1); + VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e64, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vle64.v v4, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m1); + VCMP_U64(9, v4, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, 11, + 0xab8b914891484891, 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case cover upper bound of EMUL(8). If LMUL is changed to +// m2 it will give error because emul become greater than 8 (EMUL = 16) +void TEST_CASE10(void) { + VSET(15, e8, m1); + asm volatile("vle64.v v8, (%0)" ::"r"(&ALIGNED_I64[1])); + VCMP_U64(10, v8, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x1893179501093489, + 0x81937598aa819388, 0x1874754791888188, 0x3eeeeeeee33111ae, + 0x9013930148815808, 0xab8b914891484891, 0x9031850931584902, + 0x3189759837598759, 0x8319599991911111, 0x8913984898951989); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE11(void) { + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(16, e64, m8); + asm volatile("vle64.v v8, (%0)" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m8); + VCMP_U64(11, v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); +} + +void TEST_CASE12(void) { + uint64_t avl; + VSET(16, e64, m1); + VLOAD_64(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + __asm__ volatile("vsetivli %[A], 0, e64, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle64.v v6, (%0)" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m1); + VCMP_U64(12, v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE13(void) { + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(13, e64, m8); + asm volatile("vle64.v v8, (%0)" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m8); + VCMP_U64(13, v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE14(void) { + VSET(16, e64, m1); + VLOAD_64(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + write_csr(vstart, 2); + asm volatile("vle64.v v7, (%0)" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m1); + VCMP_U64(14, v7, 1, 2, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x1893179501093489, + 0x81937598aa819388, 0x1874754791888188, 0x3eeeeeeee33111ae, + 0x9013930148815808, 0xab8b914891484891, 0x9031850931584902, + 0x3189759837598759, 0x8319599991911111, 0x8913984898951989); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// +void TEST_CASE15(void) { + VSET(512, e64, m8); + asm volatile("vle64.v v8, (%0)" ::"r"(&LONG_I64[0])); + LVCMP_U64(15, v8, LONG_I64); +} + +void TEST_CASE16(void) { + VSET(256, e64, m4); + asm volatile("vle64.v v12, (%0)" ::"r"(&LONG_I64[0])); + LVCMP_U64(16, v12, LONG_I64); +} + +void TEST_CASE17(void) { + VSET(128, e64, m2); + asm volatile("vle64.v v10, (%0)" ::"r"(&LONG_I64[0])); + LVCMP_U64(17, v10, LONG_I64); +} + +void TEST_CASE18(void) { + VSET(100, e64, m2); + asm volatile("vle64.v v14, (%0)" ::"r"(&LONG_I64[0])); + LVCMP_U64(18, v14, LONG_I64); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vle64.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + // TEST_CASE12(); + TEST_CASE13(); + // TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle8.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle8.c new file mode 100644 index 000000000..b4e1d84ee --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle8.c @@ -0,0 +1,273 @@ +// TODO uncomment TEST_CASE12 and TEST_CASE 14 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved + +#include "long_array.h" +#include "vector_macros.h" + +#define AXI_DWIDTH 128 + +// Exception Handler for rtl + +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +static volatile uint8_t ALIGNED_I8[16] __attribute__((aligned(AXI_DWIDTH))) = { + 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, + 0x88, 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89}; + +//**********Checking functionality of vle8 ********// +void TEST_CASE1(void) { + VSET(15, e8, m1); + asm volatile("vle8.v v0, (%0)" ::"r"(&ALIGNED_I8[1])); + VCMP_U8(1, v0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + VSET(15, e16, m4); + asm volatile("vle8.v v1, (%0)" ::"r"(&ALIGNED_I8[1])); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vle8 with different values of masking +// register******// +void TEST_CASE3(void) { + VSET(16, e8, m1); + VCLEAR(v3); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vle8.v v3, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(3, v3, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, + 0x08, 0x91, 0x02, 0x59, 0x11, 0x89); +} + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vle8.v v3, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(4, v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VCLEAR(v3); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vle8.v v3, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(5, v3, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, 13, + 0x59, 15, 0x89); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e8, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle8.v v4, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(6, v4, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, 13, 14, + 15, 16); +} + +void TEST_CASE7(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e8, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vle8.v v4, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(7, v4, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, 13, 14, + 15, 16); +} + +void TEST_CASE8(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e8, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vle8.v v4, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(8, v4, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, 13, 14, + 15, 16); +} + +void TEST_CASE9(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e8, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vle8.v v4, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(9, v4, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, 13, 14, + 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case execute lower bound case of EMUL (1/8). If LMUL is changed to +// mf4 or mf8 it will give error because emul become out of range +void TEST_CASE10(void) { + VSET(2, e32, mf2); + asm volatile("vle8.v v5, (%0)" ::"r"(&ALIGNED_I8[1])); + VCMP_U8(10, v5, 0xd3, 0x40); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE11(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(16, e8, m1); // Setting vl=16 + asm volatile("vle8.v v6, (%0)" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(11, v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, + 0x08, 0x91, 0x02, 0x59, 0x11, 0x89); +} + +void TEST_CASE12(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + __asm__ volatile("vsetivli %[A], 0, e8, m1, ta, ma" + : [A] "=r"(avl)); // Setting vl=0 + asm volatile("vle8.v v6, (%0)" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(12, v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE13(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(13, e8, m1); // Setting vl =13 + asm volatile("vle8.v v6, (%0)" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(13, v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, + 0x08, 0x91, 0x02, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE14(void) { + uint64_t vstart; + VSET(16, e8, m1); + VLOAD_8(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + write_csr(vstart, 2); + asm volatile("vle8.v v7, (%0)" ::"r"(&ALIGNED_I8[0])); + write_csr(vstart, 0); + VSET(16, e8, m1); + VCMP_U8(14, v7, 1, 2, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// +void TEST_CASE15(void) { + VSET(1024, e8, m2); + asm volatile("vle8.v v8, (%0)" ::"r"(&LONG_I8[0])); + LVCMP_U8(15, v8, LONG_I8); +} + +void TEST_CASE16(void) { + VSET(800, e8, m2); + asm volatile("vle8.v v8, (%0)" ::"r"(&LONG_I8[0])); + LVCMP_U8(16, v8, LONG_I8); +} +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vle8.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + // TEST_CASE12(); + TEST_CASE13(); + // TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vlff.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vlff.c new file mode 100644 index 000000000..b837f525d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vlff.c @@ -0,0 +1,91 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e8, m1); + volatile int8_t INP[] = {0xff, 0x00, 0x0f, 0xf0}; + __asm__ volatile("vle8ff.v v1, (%0)" ::"r"(INP)); + VEC_CMP_8(1, v1, 0xff, 0x00, 0x0f, 0xf0); +} + +void TEST_CASE2(void) { + VSET(4, e8, m1); + volatile int8_t INP[] = {0xff, 0x00, 0x0f, 0xf0}; + VLOAD_8(v0, 0x5, 0x0, 0x0, 0x0); + CLEAR(v1); + __asm__ volatile("vle8ff.v v1, (%0), v0.t" ::"r"(INP)); + VEC_CMP_8(2, v1, 0xff, 0x00, 0x0f, 0x00); +} + +void TEST_CASE3(void) { + VSET(3, e16, m1); + volatile int16_t INP[] = {0xffff, 0x0000, 0x0f0f, 0xf0f0}; + __asm__ volatile("vle16ff.v v1, (%0)" ::"r"(INP)); + VEC_CMP_16(3, v1, 0xffff, 0x0000, 0x0f0f); +} + +void TEST_CASE4(void) { + VSET(3, e16, m1); + volatile int16_t INP[] = {0xffff, 0x0001, 0x0f0f, 0xf0f0}; + VLOAD_16(v0, 0x5, 0x0, 0x0, 0x0); + CLEAR(v1); + __asm__ volatile("vle16ff.v v1, (%0), v0.t" ::"r"(INP)); + VEC_CMP_16(4, v1, 0xffff, 0x0000, 0x0f0f); +} + +void TEST_CASE5(void) { + VSET(4, e32, m1); + volatile int32_t INP[] = {0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0}; + __asm__ volatile("vle32ff.v v1, (%0)" ::"r"(INP)); + VEC_CMP_32(5, v1, 0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0); +} + +void TEST_CASE6(void) { + VSET(4, e32, m1); + volatile int32_t INP[] = {0xffffffff, 0x80000000, 0x0f0f0f0f, 0xf0f0f0f0}; + VLOAD_32(v0, 0x5, 0x0, 0x0, 0x0); + CLEAR(v1); + __asm__ volatile(" vle32ff.v v1, (%0), v0.t \n" ::"r"(INP)); + VEC_CMP_32(6, v1, 0xffffffff, 0x0, 0x0f0f0f0f, 0x0); +} + +void TEST_CASE7(void) { + VSET(4, e64, m1); + volatile int64_t INP[] = {0xdeadbeefffffffff, 0xdeadbeef00000000, + 0xdeadbeef0f0f0f0f, 0xdeadbeeff0f0f0f0}; + __asm__ volatile("vle64ff.v v1,(%0)" ::"r"(INP)); + VEC_CMP_64(7, v1, 0xdeadbeefffffffff, 0xdeadbeef00000000, 0xdeadbeef0f0f0f0f, + 0xdeadbeeff0f0f0f0); +} + +void TEST_CASE8(void) { + VSET(4, e64, m1); + volatile int64_t INP[] = {0xdeadbeefffffffff, 0xdeadbeef00000000, + 0xdeadbeef0f0f0f0f, 0xdeadbeeff0f0f0f0}; + VLOAD_64(v0, 0x5, 0x0, 0x0, 0x0); + CLEAR(v1); + __asm__ volatile("vle64ff.v v1,(%0), v0.t" ::"r"(INP)); + VEC_CMP_64(8, v1, 0xdeadbeefffffffff, 0x0000000000000000, 0xdeadbeef0f0f0f0f, + 0x0000000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vls.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vls.c new file mode 100644 index 000000000..65d71ab04 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vls.c @@ -0,0 +1,190 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Positive-stride tests +void TEST_CASE1(void) { + VSET(4, e8, m1); + volatile uint8_t INP1[] = {0x9f, 0xe4, 0x19, 0x20, 0x8f, 0x2e, 0x05, 0xe0, + 0xf9, 0xaa, 0x71, 0xf0, 0xc3, 0x94, 0xbb, 0xd3}; + uint64_t stride = 3; + asm volatile("vlse8.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U8(1, v1, 0x9f, 0x20, 0x05, 0xaa); +} + +void TEST_CASE2(void) { + VSET(4, e16, m1); + volatile uint16_t INP1[] = {0x9fe4, 0x1920, 0x8f2e, 0x05e0, + 0xf9aa, 0x71f0, 0xc394, 0xbbd3}; + uint64_t stride = 4; + asm volatile("vlse16.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U16(2, v1, 0x9fe4, 0x8f2e, 0xf9aa, 0xc394); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + volatile uint32_t INP1[] = {0x9fe41920, 0x8f2e05e0, 0xf9aa71f0, 0xc394bbd3, + 0xa11a9384, 0xa7163840, 0x99991348, 0xa9f38cd1}; + uint64_t stride = 8; + asm volatile("vlse32.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U32(3, v1, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348); +} + +void TEST_CASE4(void) { + VSET(2, e64, m1); + volatile uint64_t INP1[] = {0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1}; + uint64_t stride = 16; + VCLEAR(v1); + asm volatile("vlse64.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U64(4, v1, 0x9fe419208f2e05e0, 0xa11a9384a7163840); +} + +// Zero-stride tests +// The implementation must perform all the memory accesses +void TEST_CASE5(void) { + VSET(16, e8, m1); + volatile uint8_t INP1[] = {0x9f}; + uint64_t stride = 0; + asm volatile("vlse8.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U8(5, v1, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, + 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f); +} + +// The implementation can also perform fewer accesses +void TEST_CASE6(void) { + VSET(16, e8, m1); + volatile uint8_t INP1[] = {0x9f}; + asm volatile("vlse8.v v1, (%0), x0" ::"r"(INP1)); + VCMP_U8(6, v1, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, + 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f); +} + +// Different LMUL +void TEST_CASE7(void) { + VSET(8, e64, m4); + volatile uint64_t INP1[] = {0x9fa831c7a11a9384}; + asm volatile("vlse64.v v4, (%0), x0" ::"r"(INP1)); + VCMP_U64(7, v4, 0x9fa831c7a11a9384, 0x9fa831c7a11a9384, 0x9fa831c7a11a9384, + 0x9fa831c7a11a9384, 0x9fa831c7a11a9384, 0x9fa831c7a11a9384, + 0x9fa831c7a11a9384, 0x9fa831c7a11a9384); +} + +// Others +// Negative-stride test +void TEST_CASE8(void) { + VSET(4, e16, m1); + volatile uint16_t INP1[] = {0x9fe4, 0x1920, 0x8f2e, 0x05e0, + 0xf9aa, 0x71f0, 0xc394, 0xbbd3}; + uint64_t stride = -4; + asm volatile("vlse16.v v1, (%0), %1" ::"r"(&INP1[7]), "r"(stride)); + VCMP_U16(8, v1, 0xbbd3, 0x71f0, 0x05e0, 0x1920); +} + +// Stride greater than default Ara AXI width == 128-bit (4 lanes) +void TEST_CASE9(void) { + VSET(2, e64, m1); + volatile uint64_t INP1[] = {0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x9fa831c7a11a9384, 0x9fa831c7a11a9384, + 0x9fa831c7a11a9384, 0x01015ac1309bb678}; + uint64_t stride = 40; + asm volatile("vlse64.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U64(9, v1, 0x99991348a9f38cd1, 0x01015ac1309bb678); +} + +// Fill Ara internal Load Buffer +void TEST_CASE10(void) { + VSET(8, e64, m4); + volatile uint64_t INP1[] = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + uint64_t stride = 16; + asm volatile("vlse64.v v4, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U64(10, v4, 0x9fe419208f2e05e0, 0xa11a9384a7163840, 0x9fa831c7a11a9384, + 0x1893179501093489, 0x1874754791888188, 0x9013930148815808, + 0x9031850931584902, 0x8319599991911111); +} + +// Masked stride loads +void TEST_CASE11(void) { + VSET(4, e8, m1); + volatile uint8_t INP1[] = {0x9f, 0xe4, 0x19, 0x20, 0x8f, 0x2e, 0x05, 0xe0, + 0xf9, 0xaa, 0x71, 0xf0, 0xc3, 0x94, 0xbb, 0xd3}; + uint64_t stride = 3; + VLOAD_8(v0, 0xAA); + VCLEAR(v1); + asm volatile("vlse8.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U8(11, v1, 0x00, 0x20, 0x00, 0xaa); +} + +void TEST_CASE12(void) { + VSET(4, e16, m1); + volatile uint16_t INP1[] = {0x9fe4, 0x1920, 0x8f2e, 0x05e0, + 0xf9aa, 0x71f0, 0xc394, 0xbbd3}; + uint64_t stride = 4; + VLOAD_8(v0, 0xAA); + VCLEAR(v1); + asm volatile("vlse16.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U16(12, v1, 0, 0x8f2e, 0, 0xc394); +} + +void TEST_CASE13(void) { + VSET(4, e32, m1); + volatile uint32_t INP1[] = {0x9fe41920, 0x8f2e05e0, 0xf9aa71f0, 0xc394bbd3, + 0xa11a9384, 0xa7163840, 0x99991348, 0xa9f38cd1}; + uint64_t stride = 8; + VLOAD_8(v0, 0xAA); + VCLEAR(v1); + asm volatile("vlse32.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U32(13, v1, 0, 0xf9aa71f0, 0, 0x99991348); +} + +void TEST_CASE14(void) { + VSET(8, e64, m4); + volatile uint64_t INP1[] = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + uint64_t stride = 16; + VLOAD_8(v0, 0xAA); + VCLEAR(v4); + asm volatile("vlse64.v v4, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U64(14, v4, 0, 0xa11a9384a7163840, 0, 0x1893179501093489, 0, + 0x9013930148815808, 0, 0x8319599991911111); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + + TEST_CASE11(); + TEST_CASE12(); + TEST_CASE13(); + TEST_CASE14(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vluxei.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vluxei.c new file mode 100644 index 000000000..fae863c9b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vluxei.c @@ -0,0 +1,167 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +#define AXI_DWIDTH 128 + +static volatile uint8_t ALIGNED_I8[16] __attribute__((aligned(AXI_DWIDTH))) = { + 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, + 0x88, 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89}; + +static volatile uint16_t ALIGNED_I16[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989}; + +static volatile uint32_t ALIGNED_I32[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, 0xab8b9148, + 0x90318509, 0x31897598, 0x83195999, 0x89139848}; + +static volatile uint64_t ALIGNED_I64[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + +// EEW destination == EEW indexes +void TEST_CASE1(void) { + VSET(2, e8, m1); + VLOAD_8(v2, 1, 15); + asm volatile("vluxei8.v v1, (%0), v2" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(1, v1, 0xd3, 0x89); + + VSET(2, e16, m1); + VLOAD_16(v2, 2, 30); + asm volatile("vluxei16.v v1, (%0), v2" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(2, v1, 0xbbd3, 0x1989); + + VSET(2, e32, m1); + VLOAD_32(v2, 4, 60); + asm volatile("vluxei32.v v1, (%0), v2" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(3, v1, 0xf9aa71f0, 0x89139848); + + VSET(2, e64, m1); + VLOAD_64(v2, 8, 120); + VCLEAR(v1); + asm volatile("vluxei64.v v1, (%0), v2" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(4, v1, 0xf9aa71f0c394bbd3, 0x8913984898951989); +} + +// EEW Destination > EEW indexes +void TEST_CASE2(void) { + VSET(2, e16, m1); + VLOAD_8(v2, 2, 30); + asm volatile("vluxei8.v v1, (%0), v2" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(5, v1, 0xbbd3, 0x1989); + + VSET(2, e32, m1); + VLOAD_16(v2, 4, 60); + asm volatile("vluxei16.v v1, (%0), v2" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(6, v1, 0xf9aa71f0, 0x89139848); + + VSET(2, e64, m1); + VLOAD_32(v2, 8, 120); + asm volatile("vluxei32.v v1, (%0), v2" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(7, v1, 0xf9aa71f0c394bbd3, 0x8913984898951989); +} + +// EEW Destination < EEW indexes +void TEST_CASE3(void) { + VSET(2, e8, m1); + VLOAD_16(v2, 1, 15); + asm volatile("vluxei16.v v1, (%0), v2" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(8, v1, 0xd3, 0x89); + + VSET(2, e16, m1); + VLOAD_32(v2, 2, 30); + asm volatile("vluxei32.v v1, (%0), v2" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(9, v1, 0xbbd3, 0x1989); + + VSET(2, e32, m1); + VLOAD_64(v2, 4, 60); + asm volatile("vluxei64.v v1, (%0), v2" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(10, v1, 0xf9aa71f0, 0x89139848); +} + +// Naive, masked +void TEST_CASE4(void) { + VSET(2, e8, m1); + VLOAD_8(v1, 99, 99); + VLOAD_8(v2, 1, 15); + VLOAD_8(v0, 0xAA); + asm volatile("vluxei8.v v1, (%0), v2, v0.t" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(11, v1, 99, 0x89); + + VSET(2, e16, m1); + VLOAD_16(v1, 999, 999); + VLOAD_16(v2, 2, 30); + VLOAD_8(v0, 0xAA); + asm volatile("vluxei16.v v1, (%0), v2, v0.t" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(12, v1, 999, 0x1989); + + VSET(2, e32, m1); + VLOAD_32(v1, 999, 999); + VLOAD_32(v2, 4, 60); + VLOAD_8(v0, 0xAA); + asm volatile("vluxei32.v v1, (%0), v2, v0.t" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(13, v1, 999, 0x89139848); + + VSET(2, e64, m1); + VLOAD_64(v1, 999, 999); + VLOAD_64(v2, 8, 120); + VLOAD_8(v0, 0xAA); + asm volatile("vluxei64.v v1, (%0), v2, v0.t" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(14, v1, 999, 0x8913984898951989); +} + +// EEW destination == EEW indexes, many elements +void TEST_CASE5(void) { + VSET(12, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15); + asm volatile("vluxei8.v v1, (%0), v2" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(15, v1, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x88, 0x88, 0xae, 0x91, 0x02, + 0x59, 0x89); + + VSET(12, e16, m2); + VLOAD_16(v4, 2, 4, 6, 8, 10, 14, 16, 18, 22, 24, 26, 30); + asm volatile("vluxei16.v v2, (%0), v4" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(16, v2, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x9388, 0x8188, + 0x11ae, 0x4891, 0x4902, 0x8759, 0x1989); + + VSET(12, e32, m4); + VLOAD_32(v8, 4, 8, 12, 16, 20, 28, 32, 36, 44, 48, 52, 60); + asm volatile("vluxei32.v v4, (%0), v8" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(17, v4, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x81937598, 0x18747547, 0x3eeeeeee, 0xab8b9148, 0x90318509, + 0x31897598, 0x89139848); + + VSET(12, e64, m8); + VLOAD_64(v16, 8, 16, 24, 32, 40, 56, 64, 72, 88, 96, 104, 120); + asm volatile("vluxei64.v v8, (%0), v16" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(18, v8, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8913984898951989); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vlx.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vlx.c new file mode 100644 index 000000000..a56f5b1e3 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vlx.c @@ -0,0 +1,101 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e8, m1); + VLOAD_U8(v2, 0, 1, 2, 3); + volatile uint8_t INP[] = {0xff, 0x00, 0x0f, 0xf0}; + MEMBARRIER; + __asm__ volatile("vlxei8.v v1, (%0), v2" ::"r"(INP)); + VEC_CMP_U8(1, v1, 0xff, 0x00, 0x0f, 0xf0); +} + +// void TEST_CASE2(void) { +// VSET(4,e8,m1); +// VLOAD_8(v2,0,1,2,3); +// volatile int8_t INP[] = {0xff, 0x00, 0x0f, 0xf0}; +// VLOAD_8(v0,0x5,0x0,0x0,0x0); +// CLEAR(v1); +// __asm__ volatile ("vlxei8.v v1, (%0), v2, v0.t"::"r" (INP)); +// VEC_CMP_8(2,v1,0xff, 0x00, 0x0f,0x00); +// } + +void TEST_CASE3(void) { + VSET(3, e16, m1); + VLOAD_U16(v2, 0, 2, 4); + volatile uint16_t INP[] = {0xffff, 0x0000, 0x0f0f, 0xf0f0}; + MEMBARRIER; + __asm__ volatile("vlxei16.v v1, (%0), v2" ::"r"(INP)); + VEC_CMP_U16(3, v1, 0xffff, 0x0000, 0x0f0f); +} + +// void TEST_CASE4(void) { +// VSET(3,e16,m1); +// VLOAD_16(v2,0,2,4); +// volatile int16_t INP[] = {0xffff, 0x0000, 0x0f0f, 0xf0f0}; +// VLOAD_16(v0,0x5,0x0,0x0,0x0); +// CLEAR(v1); +// __asm__ volatile ("vlxei16.v v1, (%0), v2, v0.t"::"r" (INP)); +// VEC_CMP_16(4,v1,0xffff, 0x0000, 0x0f0f); +// } + +void TEST_CASE5(void) { + VSET(4, e32, m1); + VLOAD_U32(v2, 0, 4, 8, 12); + volatile uint32_t INP[] = {0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0}; + MEMBARRIER; + __asm__ volatile("vlxei32.v v1, (%0), v2" ::"r"(INP)); + VEC_CMP_U32(5, v1, 0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0); +} + +// void TEST_CASE6(void) { +// VSET(4,e32,m1); +// VLOAD_32(v2,0,4,8,12); +// volatile int32_t INP[] = {0xffffffff, 0x80000000, 0x0f0f0f0f, 0xf0f0f0f0}; +// VLOAD_32(v0,0x5,0x0,0x0,0x0); +// CLEAR(v1); +// __asm__ volatile (" vlxei32.v v1, (%0), v2, v0.t \n" :: "r" (INP)); +// VEC_CMP_32(6,v1,0xffffffff, 0x0, 0x0f0f0f0f, 0x0); +// } + +void TEST_CASE7(void) { + VSET(4, e64, m1); + VLOAD_U64(v2, 0, 8, 16, 24); + volatile uint64_t INP[] = {0xdeadbeefffffffff, 0xdeadbeef00000000, + 0xdeadbeef0f0f0f0f, 0xdeadbeeff0f0f0f0}; + MEMBARRIER; + __asm__ volatile("vlxei64.v v1,(%0), v2" ::"r"(INP)); + VEC_CMP_U64(7, v1, 0xdeadbeefffffffff, 0xdeadbeef00000000, 0xdeadbeef0f0f0f0f, + 0xdeadbeeff0f0f0f0); +} + +// void TEST_CASE8(void) { +// VSET(4,e64,m1); +// VLOAD_64(v2,0,8,16,24); +// volatile int64_t INP[] = +// {0xdeadbeefffffffff,0xdeadbeef00000000,0xdeadbeef0f0f0f0f,0xdeadbeeff0f0f0f0}; +// VLOAD_64(v0,0x5,0x0,0x0,0x0); +// CLEAR(v1); +// __asm__ volatile ("vlxei64.v v1,(%0), v2, v0.t"::"r" (INP)); +// VEC_CMP_64(8,v1,0xdeadbeefffffffff,0x0000000000000000,0xdeadbeef0f0f0f0f,0x0000000000000000); +// } + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE3(); + TEST_CASE5(); + TEST_CASE7(); + // TEST_CASE2(); + // TEST_CASE4(); + // TEST_CASE6(); + // TEST_CASE8(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmacc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmacc.c new file mode 100644 index 000000000..18fb6661a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmacc.c @@ -0,0 +1,292 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v3, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, + 0xea, 0x14, 0xce, 0xb0, 0x37); + VLOAD_8(v2, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, + 0x42, 0x52, 0x40, 0xa8, 0x53); + VLOAD_8(v1, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, + 0x70, 0xc2, 0x62, 0xe0, 0x99); + asm volatile("vmacc.vv v1, v2, v3"); + VCMP_U8(1, v1, 0xee, 0x42, 0xae, 0x96, 0xfd, 0xc7, 0x00, 0xae, 0xe4, 0x29, + 0x17, 0xc4, 0x2a, 0xe2, 0x60, 0x6e); + + VSET(16, e16, m2); + VLOAD_16(v6, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, + 0x3d2a, 0x67d5, 0x3f07, 0x2889, 0x8812, 0x0bd9, 0x56f4, 0xe068); + VLOAD_16(v4, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, + 0xe8fb, 0x1e83, 0xed20, 0xbfee, 0xee87, 0x6b0f, 0xf6cf, 0x4cd1); + VLOAD_16(v2, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, + 0x4631, 0x1f8b, 0x68c3, 0x6fbc, 0x3b5c, 0xf98b, 0x2db1, 0x8e75); + asm volatile("vmacc.vv v2, v4, v6"); + VCMP_U16(2, v2, 0x8d70, 0x6dcb, 0xb74e, 0x6761, 0xa639, 0xf452, 0x22f6, + 0x86f2, 0x4e5f, 0x378a, 0xc4a3, 0x561a, 0xb8da, 0x5e42, 0xf4fd, + 0xa35d); + + VSET(16, e32, m4); + VLOAD_32(v12, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, + 0x23ca1d7d, 0x599c994e, 0xb2d8adc5, 0x4710afae, 0x69c61cad, + 0x96ee5026, 0x2c197996, 0xd95da451, 0x3a654fb9, 0xbe990e4b, + 0xc41fd55a); + VLOAD_32(v8, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, + 0x28720dc8, 0x600fb42b, 0xf2937fa7, 0x4032d36f, 0xc676e3b3, + 0xf1cd5f96, 0x1c14bcbf, 0x7dea81ed, 0x40270562, 0x9577b3be, + 0xea615f0a); + VLOAD_32(v4, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, + 0xbf234fb5, 0xe7c83142, 0xb7048f12, 0x8eb340e3, 0xef253e93, + 0xffef4a03, 0xdf346833, 0xd0922181, 0xf159ee1d, 0xf86a7c06, + 0xfcb24a2d); + asm volatile("vmacc.vv v4, v8, v12"); + VCMP_U32(3, v4, 0x448bd85e, 0xf2cbc4a8, 0x5cd02119, 0xf69b4268, 0x3c60ee0c, + 0xa233b25d, 0x4c72c95c, 0xe2b1a595, 0xefb7d755, 0x95d6b28a, + 0xd3be5a47, 0x6338471d, 0xfb1a117e, 0xabe00fef, 0xbede88b0, + 0x913705b1); + + VSET(16, e64, m8); + VLOAD_64(v24, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, + 0x610bf2c1435b3cf6, 0x8a0c6e4bc950e81f, 0x4296e7147ef94d7a, + 0x27d7ec90ba159756, 0x2a6c87932c3aef86, 0xbfd90c33e58a8fe3, + 0x1114f7672cf625c1, 0x1a7b72dd8ac39fab, 0xdb80f952e5fd2e5b, + 0x6b01c18a3daf288b, 0x69b4b0e4335f26d5, 0x0c059f365ec6d3d5, + 0xc22568276f1dcdd0); + VLOAD_64(v16, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, + 0x4827a9b40add2507, 0xd24c4005695a64d6, 0xb97c8e41e912f84a, + 0xc8c22e3b3b2e2fa1, 0x26712aa325bd00b6, 0xdf7ad19151df27b5, + 0x68ba6d050ffcba1e, 0x94448979a2b854e6, 0x84bf5d544f97f739, + 0x6d4bfa429e9d6ef0, 0xdb6c54b9a91ab935, 0x1a0051ca72162c5e, + 0xe04b73fdf1b61f9c); + VLOAD_64(v8, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, + 0xf173f78d09c997e4, 0xaab9d34e4aeaa57a, 0xa8fe3bf12b7c95e8, + 0xc4bd99b066821092, 0x9c2f1daf5fe2db9d, 0xa8b041a876aabcae, + 0xb9a2e6f9ded9a60a, 0x8bdf55954f50101d, 0x704f0e648c11d63f, + 0x0c8ca4d0a6d1a982, 0xa74d01c12ae6aea5, 0x3f2cd5d2e2f5b538, + 0x79803b24efa2caa3); + asm volatile("vmacc.vv v8, v16, v24"); + VCMP_U64(4, v8, 0xf7c2044aeebff5e8, 0xad447a1b99a48a53, 0x78676efbe1b5763a, + 0x813582af4d75d09e, 0x483adf8d811ecb64, 0x36d90fe4df2f2b2c, + 0xf833b173685307a8, 0x955c2ac405b724e1, 0xdcf9681f074b0d2d, + 0x10277404741c4ca8, 0x25d9bca0245d9fbf, 0x58439c4175d7f582, + 0x27ae9e3365b265d2, 0xabfe86591f4ba5be, 0xd964de90eaae196e, + 0xfb655e2263986563); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v3, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, + 0xea, 0x14, 0xce, 0xb0, 0x37); + VLOAD_8(v2, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, + 0x42, 0x52, 0x40, 0xa8, 0x53); + VLOAD_8(v1, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, + 0x70, 0xc2, 0x62, 0xe0, 0x99); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0x30, 0x42, 0xb4, 0x96, 0x6d, 0xc7, 0x2c, 0xae, 0xf0, 0x29, + 0xd7, 0xc4, 0xc2, 0xe2, 0xe0, 0x6e); + + VSET(16, e16, m2); + VLOAD_16(v6, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, + 0x3d2a, 0x67d5, 0x3f07, 0x2889, 0x8812, 0x0bd9, 0x56f4, 0xe068); + VLOAD_16(v4, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, + 0xe8fb, 0x1e83, 0xed20, 0xbfee, 0xee87, 0x6b0f, 0xf6cf, 0x4cd1); + VLOAD_16(v2, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, + 0x4631, 0x1f8b, 0x68c3, 0x6fbc, 0x3b5c, 0xf98b, 0x2db1, 0x8e75); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0xe3f0, 0x6dcb, 0x2fde, 0x6761, 0x910c, 0xf452, 0x82aa, + 0x86f2, 0x4631, 0x378a, 0x68c3, 0x561a, 0x3b5c, 0x5e42, 0x2db1, + 0xa35d); + + VSET(16, e32, m4); + VLOAD_32(v12, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, + 0x23ca1d7d, 0x599c994e, 0xb2d8adc5, 0x4710afae, 0x69c61cad, + 0x96ee5026, 0x2c197996, 0xd95da451, 0x3a654fb9, 0xbe990e4b, + 0xc41fd55a); + VLOAD_32(v8, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, + 0x28720dc8, 0x600fb42b, 0xf2937fa7, 0x4032d36f, 0xc676e3b3, + 0xf1cd5f96, 0x1c14bcbf, 0x7dea81ed, 0x40270562, 0x9577b3be, + 0xea615f0a); + VLOAD_32(v4, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, + 0xbf234fb5, 0xe7c83142, 0xb7048f12, 0x8eb340e3, 0xef253e93, + 0xffef4a03, 0xdf346833, 0xd0922181, 0xf159ee1d, 0xf86a7c06, + 0xfcb24a2d); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0xa055bbb6, 0xf2cbc4a8, 0x0be640c9, 0xf69b4268, 0xca121638, + 0xa233b25d, 0xe7c83142, 0xe2b1a595, 0x8eb340e3, 0x95d6b28a, + 0xffef4a03, 0x6338471d, 0xd0922181, 0xabe00fef, 0xf86a7c06, + 0x913705b1); + + VSET(16, e64, m8); + VLOAD_64(v24, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, + 0x610bf2c1435b3cf6, 0x8a0c6e4bc950e81f, 0x4296e7147ef94d7a, + 0x27d7ec90ba159756, 0x2a6c87932c3aef86, 0xbfd90c33e58a8fe3, + 0x1114f7672cf625c1, 0x1a7b72dd8ac39fab, 0xdb80f952e5fd2e5b, + 0x6b01c18a3daf288b, 0x69b4b0e4335f26d5, 0x0c059f365ec6d3d5, + 0xc22568276f1dcdd0); + VLOAD_64(v16, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, + 0x4827a9b40add2507, 0xd24c4005695a64d6, 0xb97c8e41e912f84a, + 0xc8c22e3b3b2e2fa1, 0x26712aa325bd00b6, 0xdf7ad19151df27b5, + 0x68ba6d050ffcba1e, 0x94448979a2b854e6, 0x84bf5d544f97f739, + 0x6d4bfa429e9d6ef0, 0xdb6c54b9a91ab935, 0x1a0051ca72162c5e, + 0xe04b73fdf1b61f9c); + VLOAD_64(v8, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, + 0xf173f78d09c997e4, 0xaab9d34e4aeaa57a, 0xa8fe3bf12b7c95e8, + 0xc4bd99b066821092, 0x9c2f1daf5fe2db9d, 0xa8b041a876aabcae, + 0xb9a2e6f9ded9a60a, 0x8bdf55954f50101d, 0x704f0e648c11d63f, + 0x0c8ca4d0a6d1a982, 0xa74d01c12ae6aea5, 0x3f2cd5d2e2f5b538, + 0x79803b24efa2caa3); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0x32a4c1edbbfe5591, 0xad447a1b99a48a53, 0x3a29727ae38b9b92, + 0x813582af4d75d09e, 0xaab9d34e4aeaa57a, 0x36d90fe4df2f2b2c, + 0xc4bd99b066821092, 0x955c2ac405b724e1, 0xa8b041a876aabcae, + 0x10277404741c4ca8, 0x8bdf55954f50101d, 0x58439c4175d7f582, + 0x0c8ca4d0a6d1a982, 0xabfe86591f4ba5be, 0x3f2cd5d2e2f5b538, + 0xfb655e2263986563); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v2, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, + 0x01, 0xe7, 0x51, 0x53, 0x29); + VLOAD_8(v1, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, + 0xaa, 0xd2, 0x93, 0x83, 0xa8); + asm volatile("vmacc.vx v1, %[A], v2" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0xdb, 0x8a, 0xe0, 0xc9, 0xb0, 0x8f, 0xf7, 0x0b, 0x32, 0x06, + 0x74, 0xaf, 0x55, 0x28, 0x22, 0x75); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, + 0x0a79, 0x0523, 0x6f34, 0xe5fd, 0xc95a, 0xca1c, 0x36bf, 0x16a1); + VLOAD_16(v2, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, + 0xb1af, 0x9f16, 0x22ab, 0xa244, 0xf1c9, 0xe363, 0x9bed, 0xa06f); + asm volatile("vmacc.vx v2, %[A], v4" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0x145d, 0xb5af, 0x54f9, 0x342e, 0x78a8, 0x4cb6, 0xa9ce, + 0x8131, 0x7b60, 0x9c21, 0xd43f, 0x9759, 0x0e53, 0x109f, 0x71b4, + 0xcd08); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, + 0x7569b952, 0x507fd5c7, 0x5d09af12, 0x0bf1c209, 0x7be6ed49, + 0x842ba667, 0x53360ec0, 0xd85d7415, 0xf20de61f, 0x153e7e16, + 0xec5512e4); + VLOAD_32(v4, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, + 0x5cf91d42, 0x7764b8a3, 0x6269f72c, 0xb0dff3a6, 0x838d6893, + 0xa98a861e, 0x758b63de, 0xde488617, 0x371696ab, 0xc3ba8192, + 0x7ca33236); + asm volatile("vmacc.vx v4, %[A], v8" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0x8e0d1d47, 0xf29d4830, 0xb5213626, 0xb21bb5a3, 0xbc2f367d, + 0x18eb9d88, 0x91c53550, 0x69a6ceb2, 0xc09822e9, 0x66c98b96, + 0xf6b125ab, 0xef3fae1e, 0x4c40925e, 0x6b652c20, 0x998385c4, + 0x75d88d82); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v16, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, + 0x31eaddea171055a9, 0x609cbc4a78316c29, 0xd7bb8f31d8b59d88, + 0x97860fd5fba018c0, 0x724cecf178bd2125, 0x866d16f96d3d8b67, + 0x56153b0315164a5a, 0x6962bde49e3edf3f, 0x9b3f792bfbf5f343, + 0x64cf433b239e7764, 0x583c3a4ae481fef0, 0x217e2df75fcf0d8d, + 0x935ac02069fe54ce); + VLOAD_64(v8, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, + 0x3c7e70a75643cce5, 0x80c45834a5026c02, 0xcdf1fcd83b8133a0, + 0x9d31b9b802ae2db1, 0xba7e57975c5febf5, 0x8732f75adf268ddb, + 0x5ff488a4187bd3f3, 0x6a259fe666091333, 0x5afc4de057de51c4, + 0x8a479b7e3558e399, 0xbc21e79022996c26, 0xe2c7432cd7e3e81d, + 0xdab377ddbdfb2df7); + asm volatile("vmacc.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0x093861b79ac45352, 0xfd3c909decf66b5b, 0x04eb13132ce4267b, + 0xb258e6b065bbf956, 0x62775181e33422f3, 0xdc0ae0e371686968, + 0xf8db06270cad2c71, 0x6c3cc52cd1fb49c2, 0x41c19c0ac1b5a2fa, + 0x8867d35049c7b01d, 0x6d71fe0f35a1feea, 0xace16ac43ec0279f, + 0x82faf4a574c9dc1d, 0xa875c9d17e310a96, 0x1f75616001b61192, + 0x16ce205f44fb8635); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v2, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, + 0x01, 0xe7, 0x51, 0x53, 0x29); + VLOAD_8(v1, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, + 0xaa, 0xd2, 0x93, 0x83, 0xa8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vx v1, %[A], v2, v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0xfb, 0x8a, 0xc0, 0xc9, 0xa7, 0x8f, 0xc8, 0x0b, 0x57, 0x06, + 0x51, 0xaf, 0xd2, 0x28, 0x83, 0x75); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, + 0x0a79, 0x0523, 0x6f34, 0xe5fd, 0xc95a, 0xca1c, 0x36bf, 0x16a1); + VLOAD_16(v2, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, + 0xb1af, 0x9f16, 0x22ab, 0xa244, 0xf1c9, 0xe363, 0x9bed, 0xa06f); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0x0a9f, 0xb5af, 0x494e, 0x342e, 0x394c, 0x4cb6, 0xc117, + 0x8131, 0xb1af, 0x9c21, 0x22ab, 0x9759, 0xf1c9, 0x109f, 0x9bed, + 0xcd08); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, + 0x7569b952, 0x507fd5c7, 0x5d09af12, 0x0bf1c209, 0x7be6ed49, + 0x842ba667, 0x53360ec0, 0xd85d7415, 0xf20de61f, 0x153e7e16, + 0xec5512e4); + VLOAD_32(v4, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, + 0x5cf91d42, 0x7764b8a3, 0x6269f72c, 0xb0dff3a6, 0x838d6893, + 0xa98a861e, 0x758b63de, 0xde488617, 0x371696ab, 0xc3ba8192, + 0x7ca33236); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vx v4, %[A], v8, v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0xb2436fad, 0xf29d4830, 0xd94eebe7, 0xb21bb5a3, 0xb80f178d, + 0x18eb9d88, 0x7764b8a3, 0x69a6ceb2, 0xb0dff3a6, 0x66c98b96, + 0xa98a861e, 0xef3fae1e, 0xde488617, 0x6b652c20, 0xc3ba8192, + 0x75d88d82); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v16, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, + 0x31eaddea171055a9, 0x609cbc4a78316c29, 0xd7bb8f31d8b59d88, + 0x97860fd5fba018c0, 0x724cecf178bd2125, 0x866d16f96d3d8b67, + 0x56153b0315164a5a, 0x6962bde49e3edf3f, 0x9b3f792bfbf5f343, + 0x64cf433b239e7764, 0x583c3a4ae481fef0, 0x217e2df75fcf0d8d, + 0x935ac02069fe54ce); + VLOAD_64(v8, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, + 0x3c7e70a75643cce5, 0x80c45834a5026c02, 0xcdf1fcd83b8133a0, + 0x9d31b9b802ae2db1, 0xba7e57975c5febf5, 0x8732f75adf268ddb, + 0x5ff488a4187bd3f3, 0x6a259fe666091333, 0x5afc4de057de51c4, + 0x8a479b7e3558e399, 0xbc21e79022996c26, 0xe2c7432cd7e3e81d, + 0xdab377ddbdfb2df7); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0x0dc8fa1b817237e5, 0xfd3c909decf66b5b, 0xb015bdbf0f39ec01, + 0xb258e6b065bbf956, 0x80c45834a5026c02, 0xdc0ae0e371686968, + 0x9d31b9b802ae2db1, 0x6c3cc52cd1fb49c2, 0x8732f75adf268ddb, + 0x8867d35049c7b01d, 0x6a259fe666091333, 0xace16ac43ec0279f, + 0x8a479b7e3558e399, 0xa875c9d17e310a96, 0xe2c7432cd7e3e81d, + 0x16ce205f44fb8635); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadc.c new file mode 100644 index 000000000..9f10378b8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadc.c @@ -0,0 +1,224 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, + 8, 0x81); + VLOAD_8(v2, 4, 8, 12, 0x80, 4, 8, 12, 0x80, 4, 8, 12, 0x80, 4, 8, 12, 0x80); + VLOAD_8(v0, 0xDD, 0xDD); + asm volatile("vmadc.vvm v3, v1, v2, v0"); + VSET(2, e8, m1); + VCMP_U8(1, v3, 0xAA, 0xAA); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xffff, 8, 0x8001, 16, 0xffff, 8, 0x8001); + VLOAD_16(v2, 4, 8, 12, 0x8000, 4, 8, 12, 0x8000); + VLOAD_16(v0, 0xDD); + asm volatile("vmadc.vvm v3, v1, v2, v0"); + VSET(1, e8, m1); + VCMP_U8(2, v3, 0xAA); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xffffffff, 8, 0x80000001); + VLOAD_32(v2, 4, 8, 12, 0x80000000); + VLOAD_8(v0, 0x0D); + VCLEAR(v3); + asm volatile("vmadc.vvm v3, v1, v2, v0"); + VSET(1, e8, m1); + VCMP_U8(3, v3, 0x0A); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xffffffffffffffff); + VLOAD_64(v2, 4, 8); + VLOAD_8(v0, 0x03); + VCLEAR(v3); + asm volatile("vmadc.vvm v3, v1, v2, v0"); + VSET(1, e8, m1); + VCMP_U8(4, v3, 0x02); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, + 8, 0x81); + VLOAD_8(v2, 4, 8, 12, 0x80, 4, 8, 12, 0x80, 4, 8, 12, 0x80, 4, 8, 12, 0x80); + asm volatile("vmadc.vv v3, v1, v2"); + VSET(2, e8, m1); + VCMP_U8(5, v3, 0xAA, 0xAA); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xffff, 8, 0x8001, 16, 0xffff, 8, 0x8001); + VLOAD_16(v2, 4, 8, 12, 0x8000, 4, 8, 12, 0x8000); + VCLEAR(v3); + asm volatile("vmadc.vv v3, v1, v2"); + VSET(1, e8, m1); + VCMP_U8(6, v3, 0xAA); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xffffffff, 8, 0x80000001); + VLOAD_32(v2, 4, 8, 12, 0x80000000); + VCLEAR(v3); + asm volatile("vmadc.vv v3, v1, v2"); + VSET(1, e8, m1); + VCMP_U8(7, v3, 0x0A); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xffffffffffffffff); + VLOAD_64(v2, 4, 8); + VCLEAR(v3); + asm volatile("vmadc.vv v3, v1, v2"); + VSET(2, e8, m1); + VCMP_U8(8, v3, 0x02); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 0x8000000080008080; + + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, + 8, 0x81); + VLOAD_8(v0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1); + asm volatile("vmadc.vxm v3, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v3, 0xAA, 0xAA); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xffff, 8, 0x8001, 16, 0xffff, 8, 0x8001); + VLOAD_16(v0, 1, 1, 0, 1, 1, 1, 0, 1); + asm volatile("vmadc.vxm v2, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(10, v2, 0xAA); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xffffffff, 8, 0x80000001); + VLOAD_32(v0, 1, 1, 0, 1); + VCLEAR(v2); + asm volatile("vmadc.vxm v2, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(11, v2, 0x0A); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xffffffffffffffff); + VLOAD_64(v0, 1, 1); + VCLEAR(v2); + asm volatile("vmadc.vxm v2, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(12, v2, 0x02); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 0x8000000080008080; + + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, + 8, 0x81); + asm volatile("vmadc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v2, 0xAA, 0xAA); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xffff, 8, 0x8001, 16, 0xffff, 8, 0x8001); + VCLEAR(v2); + asm volatile("vmadc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(14, v2, 0xAA); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xffffffff, 8, 0x80000001); + VCLEAR(v2); + asm volatile("vmadc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(15, v2, 0x0A); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xffffffffffffffff); + VCLEAR(v2); + asm volatile("vmadc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(16, v2, 0x02); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, + 8, 0x81); + VLOAD_8(v0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1); + asm volatile("vmadc.vim v2, v1, 10, v0"); + VSET(2, e8, m1); + VCMP_U8(17, v2, 0x22, 0x22); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xffff, 8, 0x8001, 16, 0xffff, 8, 0x8001); + VLOAD_16(v0, 1, 1, 0, 1, 1, 1, 0, 1); + VCLEAR(v2); + asm volatile("vmadc.vim v2, v1, 10, v0"); + VSET(1, e8, m1); + VCMP_U8(18, v2, 0x22); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xffffffff, 8, 0x80000001); + VLOAD_32(v0, 1, 1, 0, 1); + VCLEAR(v2); + asm volatile("vmadc.vim v2, v1, 10, v0"); + VSET(1, e8, m1); + VCMP_U8(19, v2, 0x02); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xffffffffffffffff); + VLOAD_64(v0, 1, 1); + VCLEAR(v2); + asm volatile("vmadc.vim v2, v1, 10, v0"); + VSET(1, e8, m1); + VCMP_U8(20, v2, 0x02); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, + 8, 0x81); + asm volatile("vmadc.vi v3, v1, 10"); + VSET(2, e8, m1); + VCMP_U8(21, v3, 0x22, 0x22); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xffff, 8, 0x8001, 16, 0xffff, 8, 0x8001); + VCLEAR(v2); + asm volatile("vmadc.vi v2, v1, 10"); + VSET(1, e8, m1); + VCMP_U8(22, v2, 0x22); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xffffffff, 8, 0x80000001); + VCLEAR(v2); + asm volatile("vmadc.vi v2, v1, 10"); + VSET(1, e8, m1); + VCMP_U8(23, v2, 0x02); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xffffffffffffffff); + VCLEAR(v2); + asm volatile("vmadc.vi v2, v1, 10"); + VSET(1, e8, m1); + VCMP_U8(24, v2, 0x02); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadd.c new file mode 100644 index 000000000..b657e3f59 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadd.c @@ -0,0 +1,292 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v1, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, + 0xea, 0x14, 0xce, 0xb0, 0x37); + VLOAD_8(v2, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, + 0x42, 0x52, 0x40, 0xa8, 0x53); + VLOAD_8(v3, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, + 0x70, 0xc2, 0x62, 0xe0, 0x99); + asm volatile("vmadd.vv v1, v2, v3"); + VCMP_U8(1, v1, 0xee, 0x42, 0xae, 0x96, 0xfd, 0xc7, 0x00, 0xae, 0xe4, 0x29, + 0x17, 0xc4, 0x2a, 0xe2, 0x60, 0x6e); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, + 0x3d2a, 0x67d5, 0x3f07, 0x2889, 0x8812, 0x0bd9, 0x56f4, 0xe068); + VLOAD_16(v4, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, + 0xe8fb, 0x1e83, 0xed20, 0xbfee, 0xee87, 0x6b0f, 0xf6cf, 0x4cd1); + VLOAD_16(v6, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, + 0x4631, 0x1f8b, 0x68c3, 0x6fbc, 0x3b5c, 0xf98b, 0x2db1, 0x8e75); + asm volatile("vmadd.vv v2, v4, v6"); + VCMP_U16(2, v2, 0x8d70, 0x6dcb, 0xb74e, 0x6761, 0xa639, 0xf452, 0x22f6, + 0x86f2, 0x4e5f, 0x378a, 0xc4a3, 0x561a, 0xb8da, 0x5e42, 0xf4fd, + 0xa35d); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, + 0x23ca1d7d, 0x599c994e, 0xb2d8adc5, 0x4710afae, 0x69c61cad, + 0x96ee5026, 0x2c197996, 0xd95da451, 0x3a654fb9, 0xbe990e4b, + 0xc41fd55a); + VLOAD_32(v8, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, + 0x28720dc8, 0x600fb42b, 0xf2937fa7, 0x4032d36f, 0xc676e3b3, + 0xf1cd5f96, 0x1c14bcbf, 0x7dea81ed, 0x40270562, 0x9577b3be, + 0xea615f0a); + VLOAD_32(v12, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, + 0xbf234fb5, 0xe7c83142, 0xb7048f12, 0x8eb340e3, 0xef253e93, + 0xffef4a03, 0xdf346833, 0xd0922181, 0xf159ee1d, 0xf86a7c06, + 0xfcb24a2d); + asm volatile("vmadd.vv v4, v8, v12"); + VCMP_U32(3, v4, 0x448bd85e, 0xf2cbc4a8, 0x5cd02119, 0xf69b4268, 0x3c60ee0c, + 0xa233b25d, 0x4c72c95c, 0xe2b1a595, 0xefb7d755, 0x95d6b28a, + 0xd3be5a47, 0x6338471d, 0xfb1a117e, 0xabe00fef, 0xbede88b0, + 0x913705b1); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, + 0x610bf2c1435b3cf6, 0x8a0c6e4bc950e81f, 0x4296e7147ef94d7a, + 0x27d7ec90ba159756, 0x2a6c87932c3aef86, 0xbfd90c33e58a8fe3, + 0x1114f7672cf625c1, 0x1a7b72dd8ac39fab, 0xdb80f952e5fd2e5b, + 0x6b01c18a3daf288b, 0x69b4b0e4335f26d5, 0x0c059f365ec6d3d5, + 0xc22568276f1dcdd0); + VLOAD_64(v16, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, + 0x4827a9b40add2507, 0xd24c4005695a64d6, 0xb97c8e41e912f84a, + 0xc8c22e3b3b2e2fa1, 0x26712aa325bd00b6, 0xdf7ad19151df27b5, + 0x68ba6d050ffcba1e, 0x94448979a2b854e6, 0x84bf5d544f97f739, + 0x6d4bfa429e9d6ef0, 0xdb6c54b9a91ab935, 0x1a0051ca72162c5e, + 0xe04b73fdf1b61f9c); + VLOAD_64(v24, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, + 0xf173f78d09c997e4, 0xaab9d34e4aeaa57a, 0xa8fe3bf12b7c95e8, + 0xc4bd99b066821092, 0x9c2f1daf5fe2db9d, 0xa8b041a876aabcae, + 0xb9a2e6f9ded9a60a, 0x8bdf55954f50101d, 0x704f0e648c11d63f, + 0x0c8ca4d0a6d1a982, 0xa74d01c12ae6aea5, 0x3f2cd5d2e2f5b538, + 0x79803b24efa2caa3); + asm volatile("vmadd.vv v8, v16, v24"); + VCMP_U64(4, v8, 0xf7c2044aeebff5e8, 0xad447a1b99a48a53, 0x78676efbe1b5763a, + 0x813582af4d75d09e, 0x483adf8d811ecb64, 0x36d90fe4df2f2b2c, + 0xf833b173685307a8, 0x955c2ac405b724e1, 0xdcf9681f074b0d2d, + 0x10277404741c4ca8, 0x25d9bca0245d9fbf, 0x58439c4175d7f582, + 0x27ae9e3365b265d2, 0xabfe86591f4ba5be, 0xd964de90eaae196e, + 0xfb655e2263986563); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v1, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, + 0xea, 0x14, 0xce, 0xb0, 0x37); + VLOAD_8(v2, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, + 0x42, 0x52, 0x40, 0xa8, 0x53); + VLOAD_8(v3, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, + 0x70, 0xc2, 0x62, 0xe0, 0x99); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0x21, 0x42, 0x7f, 0x96, 0x50, 0xc7, 0x3f, 0xae, 0x74, 0x29, + 0x29, 0xc4, 0x14, 0xe2, 0xb0, 0x6e); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, + 0x3d2a, 0x67d5, 0x3f07, 0x2889, 0x8812, 0x0bd9, 0x56f4, 0xe068); + VLOAD_16(v4, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, + 0xe8fb, 0x1e83, 0xed20, 0xbfee, 0xee87, 0x6b0f, 0xf6cf, 0x4cd1); + VLOAD_16(v6, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, + 0x4631, 0x1f8b, 0x68c3, 0x6fbc, 0x3b5c, 0xf98b, 0x2db1, 0x8e75); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0x1c20, 0x6dcb, 0xde38, 0x6761, 0x3eb5, 0xf452, 0x48e1, + 0x86f2, 0x3d2a, 0x378a, 0x3f07, 0x561a, 0x8812, 0x5e42, 0x56f4, + 0xa35d); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, + 0x23ca1d7d, 0x599c994e, 0xb2d8adc5, 0x4710afae, 0x69c61cad, + 0x96ee5026, 0x2c197996, 0xd95da451, 0x3a654fb9, 0xbe990e4b, + 0xc41fd55a); + VLOAD_32(v8, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, + 0x28720dc8, 0x600fb42b, 0xf2937fa7, 0x4032d36f, 0xc676e3b3, + 0xf1cd5f96, 0x1c14bcbf, 0x7dea81ed, 0x40270562, 0x9577b3be, + 0xea615f0a); + VLOAD_32(v12, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, + 0xbf234fb5, 0xe7c83142, 0xb7048f12, 0x8eb340e3, 0xef253e93, + 0xffef4a03, 0xdf346833, 0xd0922181, 0xf159ee1d, 0xf86a7c06, + 0xfcb24a2d); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0x0401c584, 0xf2cbc4a8, 0x4a71aa0c, 0xf69b4268, 0x273fcd5d, + 0xa233b25d, 0x599c994e, 0xe2b1a595, 0x4710afae, 0x95d6b28a, + 0x96ee5026, 0x6338471d, 0xd95da451, 0xabe00fef, 0xbe990e4b, + 0x913705b1); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, + 0x610bf2c1435b3cf6, 0x8a0c6e4bc950e81f, 0x4296e7147ef94d7a, + 0x27d7ec90ba159756, 0x2a6c87932c3aef86, 0xbfd90c33e58a8fe3, + 0x1114f7672cf625c1, 0x1a7b72dd8ac39fab, 0xdb80f952e5fd2e5b, + 0x6b01c18a3daf288b, 0x69b4b0e4335f26d5, 0x0c059f365ec6d3d5, + 0xc22568276f1dcdd0); + VLOAD_64(v16, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, + 0x4827a9b40add2507, 0xd24c4005695a64d6, 0xb97c8e41e912f84a, + 0xc8c22e3b3b2e2fa1, 0x26712aa325bd00b6, 0xdf7ad19151df27b5, + 0x68ba6d050ffcba1e, 0x94448979a2b854e6, 0x84bf5d544f97f739, + 0x6d4bfa429e9d6ef0, 0xdb6c54b9a91ab935, 0x1a0051ca72162c5e, + 0xe04b73fdf1b61f9c); + VLOAD_64(v24, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, + 0xf173f78d09c997e4, 0xaab9d34e4aeaa57a, 0xa8fe3bf12b7c95e8, + 0xc4bd99b066821092, 0x9c2f1daf5fe2db9d, 0xa8b041a876aabcae, + 0xb9a2e6f9ded9a60a, 0x8bdf55954f50101d, 0x704f0e648c11d63f, + 0x0c8ca4d0a6d1a982, 0xa74d01c12ae6aea5, 0x3f2cd5d2e2f5b538, + 0x79803b24efa2caa3); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0x9cffef345b95f00b, 0xad447a1b99a48a53, 0xadfda1d2464c6433, + 0x813582af4d75d09e, 0x8a0c6e4bc950e81f, 0x36d90fe4df2f2b2c, + 0x27d7ec90ba159756, 0x955c2ac405b724e1, 0xbfd90c33e58a8fe3, + 0x10277404741c4ca8, 0x1a7b72dd8ac39fab, 0x58439c4175d7f582, + 0x6b01c18a3daf288b, 0xabfe86591f4ba5be, 0x0c059f365ec6d3d5, + 0xfb655e2263986563); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v1, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, + 0x01, 0xe7, 0x51, 0x53, 0x29); + VLOAD_8(v2, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, + 0xaa, 0xd2, 0x93, 0x83, 0xa8); + asm volatile("vmadd.vx v1, %[A], v2" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0xdb, 0x8a, 0xe0, 0xc9, 0xb0, 0x8f, 0xf7, 0x0b, 0x32, 0x06, + 0x74, 0xaf, 0x55, 0x28, 0x22, 0x75); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v2, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, + 0x0a79, 0x0523, 0x6f34, 0xe5fd, 0xc95a, 0xca1c, 0x36bf, 0x16a1); + VLOAD_16(v4, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, + 0xb1af, 0x9f16, 0x22ab, 0xa244, 0xf1c9, 0xe363, 0x9bed, 0xa06f); + asm volatile("vmadd.vx v2, %[A], v4" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0x145d, 0xb5af, 0x54f9, 0x342e, 0x78a8, 0x4cb6, 0xa9ce, + 0x8131, 0x7b60, 0x9c21, 0xd43f, 0x9759, 0x0e53, 0x109f, 0x71b4, + 0xcd08); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v4, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, + 0x7569b952, 0x507fd5c7, 0x5d09af12, 0x0bf1c209, 0x7be6ed49, + 0x842ba667, 0x53360ec0, 0xd85d7415, 0xf20de61f, 0x153e7e16, + 0xec5512e4); + VLOAD_32(v8, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, + 0x5cf91d42, 0x7764b8a3, 0x6269f72c, 0xb0dff3a6, 0x838d6893, + 0xa98a861e, 0x758b63de, 0xde488617, 0x371696ab, 0xc3ba8192, + 0x7ca33236); + asm volatile("vmadd.vx v4, %[A], v8" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0x8e0d1d47, 0xf29d4830, 0xb5213626, 0xb21bb5a3, 0xbc2f367d, + 0x18eb9d88, 0x91c53550, 0x69a6ceb2, 0xc09822e9, 0x66c98b96, + 0xf6b125ab, 0xef3fae1e, 0x4c40925e, 0x6b652c20, 0x998385c4, + 0x75d88d82); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v8, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, + 0x31eaddea171055a9, 0x609cbc4a78316c29, 0xd7bb8f31d8b59d88, + 0x97860fd5fba018c0, 0x724cecf178bd2125, 0x866d16f96d3d8b67, + 0x56153b0315164a5a, 0x6962bde49e3edf3f, 0x9b3f792bfbf5f343, + 0x64cf433b239e7764, 0x583c3a4ae481fef0, 0x217e2df75fcf0d8d, + 0x935ac02069fe54ce); + VLOAD_64(v16, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, + 0x3c7e70a75643cce5, 0x80c45834a5026c02, 0xcdf1fcd83b8133a0, + 0x9d31b9b802ae2db1, 0xba7e57975c5febf5, 0x8732f75adf268ddb, + 0x5ff488a4187bd3f3, 0x6a259fe666091333, 0x5afc4de057de51c4, + 0x8a479b7e3558e399, 0xbc21e79022996c26, 0xe2c7432cd7e3e81d, + 0xdab377ddbdfb2df7); + asm volatile("vmadd.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0x093861b79ac45352, 0xfd3c909decf66b5b, 0x04eb13132ce4267b, + 0xb258e6b065bbf956, 0x62775181e33422f3, 0xdc0ae0e371686968, + 0xf8db06270cad2c71, 0x6c3cc52cd1fb49c2, 0x41c19c0ac1b5a2fa, + 0x8867d35049c7b01d, 0x6d71fe0f35a1feea, 0xace16ac43ec0279f, + 0x82faf4a574c9dc1d, 0xa875c9d17e310a96, 0x1f75616001b61192, + 0x16ce205f44fb8635); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v1, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, + 0x01, 0xe7, 0x51, 0x53, 0x29); + VLOAD_8(v2, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, + 0xaa, 0xd2, 0x93, 0x83, 0xa8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vx v1, %[A], v2, v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0x60, 0x8a, 0xa0, 0xc9, 0x35, 0x8f, 0xa3, 0x0b, 0x5f, 0x06, + 0x07, 0xaf, 0xe7, 0x28, 0x53, 0x75); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v2, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, + 0x0a79, 0x0523, 0x6f34, 0xe5fd, 0xc95a, 0xca1c, 0x36bf, 0x16a1); + VLOAD_16(v4, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, + 0xb1af, 0x9f16, 0x22ab, 0xa244, 0xf1c9, 0xe363, 0x9bed, 0xa06f); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0x992e, 0xb5af, 0x90c3, 0x342e, 0xd53c, 0x4cb6, 0x2d2f, + 0x8131, 0x0a79, 0x9c21, 0x6f34, 0x9759, 0xc95a, 0x109f, 0x36bf, + 0xcd08); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v4, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, + 0x7569b952, 0x507fd5c7, 0x5d09af12, 0x0bf1c209, 0x7be6ed49, + 0x842ba667, 0x53360ec0, 0xd85d7415, 0xf20de61f, 0x153e7e16, + 0xec5512e4); + VLOAD_32(v8, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, + 0x5cf91d42, 0x7764b8a3, 0x6269f72c, 0xb0dff3a6, 0x838d6893, + 0xa98a861e, 0x758b63de, 0xde488617, 0x371696ab, 0xc3ba8192, + 0x7ca33236); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vx v4, %[A], v8, v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0x709e784e, 0xf29d4830, 0xad5df7fd, 0xb21bb5a3, 0x0a0030d0, + 0x18eb9d88, 0x507fd5c7, 0x69a6ceb2, 0x0bf1c209, 0x66c98b96, + 0x842ba667, 0xef3fae1e, 0xd85d7415, 0x6b652c20, 0x153e7e16, + 0x75d88d82); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v8, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, + 0x31eaddea171055a9, 0x609cbc4a78316c29, 0xd7bb8f31d8b59d88, + 0x97860fd5fba018c0, 0x724cecf178bd2125, 0x866d16f96d3d8b67, + 0x56153b0315164a5a, 0x6962bde49e3edf3f, 0x9b3f792bfbf5f343, + 0x64cf433b239e7764, 0x583c3a4ae481fef0, 0x217e2df75fcf0d8d, + 0x935ac02069fe54ce); + VLOAD_64(v16, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, + 0x3c7e70a75643cce5, 0x80c45834a5026c02, 0xcdf1fcd83b8133a0, + 0x9d31b9b802ae2db1, 0xba7e57975c5febf5, 0x8732f75adf268ddb, + 0x5ff488a4187bd3f3, 0x6a259fe666091333, 0x5afc4de057de51c4, + 0x8a479b7e3558e399, 0xbc21e79022996c26, 0xe2c7432cd7e3e81d, + 0xdab377ddbdfb2df7); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0x2a47beb4fd7729c5, 0xfd3c909decf66b5b, 0xbbaf5fe50c41f22a, + 0xb258e6b065bbf956, 0x609cbc4a78316c29, 0xdc0ae0e371686968, + 0x97860fd5fba018c0, 0x6c3cc52cd1fb49c2, 0x866d16f96d3d8b67, + 0x8867d35049c7b01d, 0x6962bde49e3edf3f, 0xace16ac43ec0279f, + 0x64cf433b239e7764, 0xa875c9d17e310a96, 0x217e2df75fcf0d8d, + 0x16ce205f44fb8635); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmand.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmand.c new file mode 100644 index 000000000..9e280b9f2 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmand.c @@ -0,0 +1,79 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x84, 0x21); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0xCD, 0xEF); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0x00, 0x00); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0x0D, 0xE0); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x84, 0xE1); +} + +void TEST_CASE6() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF, 0xCD, 0xEF, 0xCD, 0xEF, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21, 0x84, 0x21, 0x84, 0x21, 0x84, 0x21); + asm volatile("vmand.mm v1, v2, v3"); + VSET(13, e8, m1); + VCLEAR(v2); + VCMP_U8(6, v2, 0, 0, 0, 0, 0, 0, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmandnot.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmandnot.c new file mode 100644 index 000000000..4952d9760 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmandnot.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x49, 0xCE); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0x00, 0x00); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0xCD, 0xEF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0xC0, 0x0F); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x49, 0xEE); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmax.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmax.c new file mode 100644 index 000000000..6348fc5f1 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmax.c @@ -0,0 +1,181 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + asm volatile("vmax.vv v2, v4, v6"); + VCMP_I16(1, v2, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + 7000, 2560, 19901, 12345, 7000, 2560, 19901); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vmax.vv v4, v8, v12"); + VCMP_I32(2, v4, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + 7000, 2560, 19901, 12345, 7000, 2560, 19901); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vmax.vv v8, v16, v24"); + VCMP_I64(3, v8, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + 7000, 2560, 19901, 12345, 7000, 2560, 19901); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vmax.vv v2, v4, v6, v0.t"); + VCMP_I16(4, v2, 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901, + 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vmax.vv v4, v8, v12, v0.t"); + VCMP_I32(5, v4, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, 0xdeadbeef, + 2560, 19901, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, + 0xdeadbeef, 2560, 19901); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vmax.vv v8, v16, v24, v0.t"); + VCMP_I64(6, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + asm volatile("vmax.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(7, v1, 123, 40, 40, 99, 123, 40, 40, 99, 123, 40, 40, 99, 123, 40, 40, + 99); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + asm volatile("vmax.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(8, v2, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + 12345, 40, 40, 199); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + asm volatile("vmax.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(9, v4, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + 12345, 40, 40, 199); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + asm volatile("vmax.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(10, v8, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + 12345, 40, 40, 199); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef, 0xef, 0xef, 0xef, 0xef); + asm volatile("vmax.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(11, v1, 0xef, 0xef, 40, 99, 0xef, 0xef, 40, 99, 0xef, 0xef, 40, 99, + 0xef, 0xef, 40, 99); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vmax.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(12, v2, 0xbeef, 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199, 0xbeef, + 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vmax.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(13, v4, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, + 199, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, + 199); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vmax.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(14, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 40, 199); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmaxu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmaxu.c new file mode 100644 index 000000000..d71e3c8f1 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmaxu.c @@ -0,0 +1,181 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + asm volatile("vmaxu.vv v2, v4, v6"); + VCMP_U16(1, v2, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + 7000, 2560, 19901, 12345, 7000, 2560, 19901); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vmaxu.vv v4, v8, v12"); + VCMP_U32(2, v4, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + 7000, 2560, 19901, 12345, 7000, 2560, 19901); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vmaxu.vv v8, v16, v24"); + VCMP_U64(3, v8, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + 7000, 2560, 19901, 12345, 7000, 2560, 19901); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vmaxu.vv v2, v4, v6, v0.t"); + VCMP_U16(4, v2, 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901, + 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vmaxu.vv v4, v8, v12, v0.t"); + VCMP_U32(5, v4, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, 0xdeadbeef, + 2560, 19901, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, + 0xdeadbeef, 2560, 19901); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vmaxu.vv v8, v16, v24, v0.t"); + VCMP_U64(6, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + asm volatile("vmaxu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(7, v1, 123, 40, 40, 199, 123, 40, 40, 199, 123, 40, 40, 199, 123, 40, + 40, 199); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + asm volatile("vmaxu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(8, v2, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + 12345, 40, 40, 199); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + asm volatile("vmaxu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(9, v4, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + 12345, 40, 40, 199); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + asm volatile("vmaxu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(10, v8, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + 12345, 40, 40, 199); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef, 0xef, 0xef, 0xef, 0xef); + asm volatile("vmaxu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(11, v1, 0xef, 0xef, 40, 199, 0xef, 0xef, 40, 199, 0xef, 0xef, 40, 199, + 0xef, 0xef, 40, 199); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vmaxu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(12, v2, 0xbeef, 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199, 0xbeef, + 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vmaxu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(13, v4, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, + 199, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, + 199); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vmaxu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(14, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 40, 199); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmerge.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmerge.c new file mode 100644 index 000000000..9e0eb91f7 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmerge.c @@ -0,0 +1,113 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vvm v3, v1, v2, v0"); + VCMP_U8(1, v3, 1, 7, 3, 5, 5, 3, 7, 1, 8, 2, 6, 4, 4, 6, 2, 8); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vvm v6, v2, v4, v0"); + VCMP_U16(2, v6, 1, 7, 3, 5, 5, 3, 7, 1, 8, 2, 6, 4, 4, 6, 2, 8); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vvm v12, v4, v8, v0"); + VCMP_U32(3, v12, 1, 7, 3, 5, 5, 3, 7, 1, 8, 2, 6, 4, 4, 6, 2, 8); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vvm v24, v8, v16, v0"); + VCMP_U64(4, v24, 1, 7, 3, 5, 5, 3, 7, 1, 8, 2, 6, 4, 4, 6, 2, 8); +} + +void TEST_CASE2() { + const uint64_t scalar = 0x00000000deadbeef; + + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vxm v3, v1, %[A], v0" ::[A] "r"(scalar)); + VCMP_U8(5, v3, 1, 0xef, 3, 0xef, 5, 0xef, 7, 0xef, 0xef, 2, 0xef, 4, 0xef, 6, + 0xef, 8); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vxm v4, v2, %[A], v0" ::[A] "r"(scalar)); + VCMP_U16(6, v4, 1, 0xbeef, 3, 0xbeef, 5, 0xbeef, 7, 0xbeef, 0xbeef, 2, 0xbeef, + 4, 0xbeef, 6, 0xbeef, 8); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vxm v8, v4, %[A], v0" ::[A] "r"(scalar)); + VCMP_U32(7, v8, 1, 0xdeadbeef, 3, 0xdeadbeef, 5, 0xdeadbeef, 7, 0xdeadbeef, + 0xdeadbeef, 2, 0xdeadbeef, 4, 0xdeadbeef, 6, 0xdeadbeef, 8); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vxm v16, v8, %[A], v0" ::[A] "r"(scalar)); + VCMP_U64(8, v16, 1, 0x00000000deadbeef, 3, 0x00000000deadbeef, 5, + 0x00000000deadbeef, 7, 0x00000000deadbeef, 0x00000000deadbeef, 2, + 0x00000000deadbeef, 4, 0x00000000deadbeef, 6, 0x00000000deadbeef, 8); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vim v3, v1, -1, v0"); + VCMP_U8(9, v3, 1, 0xff, 3, 0xff, 5, 0xff, 7, 0xff, 0xff, 2, 0xff, 4, 0xff, 6, + 0xff, 8); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vim v4, v2, -1, v0"); + VCMP_U16(10, v4, 1, 0xffff, 3, 0xffff, 5, 0xffff, 7, 0xffff, 0xffff, 2, + 0xffff, 4, 0xffff, 6, 0xffff, 8); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vim v8, v4, -1, v0"); + VCMP_U32(11, v8, 1, 0xffffffff, 3, 0xffffffff, 5, 0xffffffff, 7, 0xffffffff, + 0xffffffff, 2, 0xffffffff, 4, 0xffffffff, 6, 0xffffffff, 8); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vim v16, v8, -1, v0"); + VCMP_U64(12, v16, 1, 0xffffffffffffffff, 3, 0xffffffffffffffff, 5, + 0xffffffffffffffff, 7, 0xffffffffffffffff, 0xffffffffffffffff, 2, + 0xffffffffffffffff, 4, 0xffffffffffffffff, 6, 0xffffffffffffffff, 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfeq.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfeq.c new file mode 100644 index 000000000..f99187197 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfeq.c @@ -0,0 +1,503 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// This instruction writes a mask to a register, with a layout of elements as +// described in section "Mask Register Layout" +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.0590, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x2b8c, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3041, 0xbb98); + asm volatile("vmfeq.vv v2, v4, v6"); + VSET(1, e16, m2); + VCMP_U16(1, v2, 0x0); + + VSET(16, e32, m4); + // +0, sNaN, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0xffffffff, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5b88a4); + // -0, sNaN, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, -0.00670803 + VLOAD_32(v12, 0x80000000, 0xffffffff, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0xbbdbcefe); + asm volatile("vmfeq.vv v4, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(2, v4, 0x1); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, 0.4329957213663693 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0x3fdbb633afa4e520); + // -0.3562510538138417, -0.0135629748736219, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0xbfd6ccd13852f170, 0xbf8bc6e7ac263f80, 0x3fed8915c5665532, + 0x3fef0d4f6aafa2f6, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + asm volatile("vmfeq.vv v8, v16, v24"); + VSET(1, e16, m2); + VCMP_U16(3, v8, 0x4); +}; + +// Simple random test with similar values + 1 subnormal (masked) +void TEST_CASE2(void) { + VSET(16, e16, m4); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v8, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.7285, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v12, 0x39db, 0x39d4, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3507, 0xbb98); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfeq.vv v4, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(4, v4, 0x0002); + + VSET(16, e32, m4); + // 0x00000000, 0.09933749, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0x3dcb7174, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5d88a4); + // 0x00000000, -0.64782482, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, 0.85755372 + VLOAD_32(v12, 0x00000000, 0xbf25d7d9, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0x3f5d88a4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfeq.vv v4, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(5, v4, 0x8000); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, -0.7793965434104730 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0xbfe8f0d105120796); + // 0.8643613633211786, 0.4842301798024149, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfeq.vv v8, v16, v24, v0.t"); + VSET(1, e16, m2); + VCMP_U16(6, v8, 0x800a); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.0651, 0.5806, 0.2563, -0.4783, 0.7393, -0.2649, -0.4590, + // 0.5469, -0.9082, 0.6235, -0.8276, -0.7939, -0.0236, -0.1166, + // 0.4026, 0.0022 + VLOAD_16(v4, 0xac2a, 0x38a5, 0x341a, 0xb7a7, 0x39ea, 0xb43d, 0xb758, 0x3860, + 0xbb44, 0x38fd, 0xba9f, 0xba5a, 0xa60b, 0xaf76, 0x3671, 0x1896); + asm volatile("vmfeq.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(7, v2, 0x0020); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // -0.15601152, -0.92020410, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0xbe1fc17c, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0xbf4b1daf); + asm volatile("vmfeq.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(8, v4, 0x7ffe); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, 0.4585094341291300, + // 0.8417440789882031, -0.1215927835809432, 0.9442717441528423, + // -0.3993868853091622, 0.5719771249018739, + // 0.0497853851400327, 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + asm volatile("vmfeq.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(9, v8, 0x0008); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.2649, 0.5806, -0.2649, -0.4783, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, + VLOAD_16(v4, 0xb43d, 0x7653, 0xad3d, 0x033d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, + 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfeq.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(10, v2, 0xaaa0); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // 0.80517912, 0.80517912, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0x3f4e2038, 0x3f4e2038, 0xbe967703, 0x3f7c66bb, 0x3f61b2e8, + 0xbee4905c, 0x3f61c543, 0x3e1b4092, 0xbf4cad78, 0xbd16465d, + 0xbec4f07b, 0xbf0c2627, 0x3dcbe820, 0x3e5dbf70, 0xbec04b31, + 0xbf4b1daf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfeq.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(11, v4, 0x0002); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, -0.3394093097660049, + // 0.8417440789882031, -0.1215927835809432, + // 0.9442717441528423, -0.3993868853091622, + // 0.5719771249018739, 0.0497853851400327, + // 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfeq.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(12, v8, 0x0008); +}; + +// Check if only the correct destination bits are written +void TEST_CASE5(void) { + // Fill 64-bits with 1 + VSET(1, e64, m1); + VLOAD_64(v1, 0xffffffffffffffff); + // Perform vmfeq.vv on 16 different elements, and then check that the last (64 + // - 16 = 48) bits were not overwritten with zeroes + VSET(16, e16, m1); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v2, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.0590, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v3, 0x33ca, 0x2b8c, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3041, 0xbb98); + asm volatile("vmfeq.vv v1, v2, v3"); + VSET(1, e64, m1); + VCMP_U64(13, v1, 0xffffffffffff0001); + + // Fill 64-bits with 1 + VSET(1, e64, m1); + VLOAD_64(v1, 0xffffffffffffffff); + // Perform vmfeq.vv on 16 different elements, and then check that the last (64 + // - 16 = 48) bits were not overwritten with zeroes + VSET(16, e32, m1); + // -0.72077256, sNaN, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v2, 0x70000000, 0xffffffff, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5b88a4); + // 0.79994357, sNaN, -0.34645590, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, -0.00670803 + VLOAD_32(v3, 0x80000000, 0xffffffff, 0xbeb162ab, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0xbbdbcefe); + asm volatile("vmfeq.vv v1, v2, v3"); + VSET(1, e64, m1); + VCMP_U64(14, v1, 0xffffffffffff0004); + + // Fill 64-bits with 1 + VSET(1, e64, m1); + VLOAD_64(v1, 0xffffffffffffffff); + // Perform vmfeq.vv on 16 different elements, and then check that the last (64 + // - 16 = 48) bits were not overwritten with zeroes + VSET(16, e64, m1); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, 0.4329957213663693 + VLOAD_64(v2, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0xbf3180f63f75db3c, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0x3fdbb633afa4e520); + // 0.8643613633211786, -0.0135629748736219, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v3, 0x3feba8d9296c7e74, 0xbf8bc6e7ac263f80, 0x3fed8915c5665532, + 0x3fef0d4f6aafa2f6, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + asm volatile("vmfeq.vv v1, v2, v3"); + VSET(1, e64, m1); + VCMP_U64(15, v1, 0xffffffffffff0001); +}; + +// Write to v0 during a masked operation, WAR dependency should be respected +void TEST_CASE6(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, 0.7241, 0.0027, -0.7114, 0.8701, + // 0.8701, -0.5786, -0.4229, 0.6968, 0.6968, 0.7217, -0.2842, + // 0.1659, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3af6, 0x3af6, + 0xb8a1, 0xb6c4, 0x3993, 0x3993, 0x39c6, 0xb48c, 0x314f, 0x314f); + // 0.2434, 0.7285, -0.2678, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.2622, -0.5786, -0.4229, 0.5981, 0.5981, 0.7217, -0.2842, + // 0.1328, 0.1328 + VLOAD_16(v6, 0x33ca, 0x39d4, 0xb449, 0x39cb, 0x1975, 0xb9b1, 0x3432, 0x3432, + 0xb8a1, 0xb6c4, 0x38c9, 0x38c9, 0x39c6, 0xb48c, 0x3040, 0x3040); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfeq.vv v0, v4, v6, v0.t"); + VSET(1, e16, m2); + VCMP_U16(16, v0, 0x2222); + + VSET(16, e32, m4); + // 0x00000000, 0.09933749, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0x3dcb7174, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5d88a4); + // 0x00000000, 0.09933749, 0.39402914, -0.81853813, + // 0.96037650, -0.81018746, -0.44735566, -0.25510681, + // -0.30920035, -0.31596854, 0.19188073, -0.29310879, + // 0.22002794, 0.48599416, -0.80913633, -0.30138883 + VLOAD_32(v12, 0x00000000, 0x3dcb7174, 0x3ec9be30, 0xbf518bb7, 0x3f75db3c, + 0xbf4f6872, 0xbee50bcd, 0xbe829d5c, 0xbe9e4f82, 0xbea1c6a1, + 0x3e447c62, 0xbe96125b, 0x3e614f01, 0x3ef8d43a, 0xbf4f238f, + 0xbe9a4fa3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfeq.vv v0, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(17, v0, 0x2222); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.8792039527057112, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, -0.7793965434104730 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfec227053ec5198, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0xbfe8f0d105120796); + // 0.8643613633211786, 0.4842301798024149, -0.8792039527057112, + // 0.9703747829163081, -0.1308855743137316, -0.3798019472030296, + // -0.8792039527057112, -0.1745056251010144, + // -0.3736408604742532, 0.4947226024634424, + // -0.9079294226891812, -0.9490909352855985, 0.6283940115157876, + // 0.1053912590957002, -0.5927175227484118, -0.3032110323317654 + VLOAD_64(v24, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0xbfec227053ec5198, + 0x9fee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0xbfec227053ec5198, 0xbfc6563348637140, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfe2f78abcff0ede, + 0xbfd367cf3ee9af68); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfeq.vv v0, v16, v24, v0.t"); + VSET(1, e16, m2); + VCMP_U16(18, v0, 0x2222); +}; + +// Test sNaN/qNaN behaviour +void TEST_CASE7(void) { + CLEAR_FFLAGS; + // First, give only qNaN (no exception is generated) + VSET(16, e16, m2); + CHECK_FFLAGS(0); + VLOAD_16(v4, qNaNh, qNaNh, 0x39cb, qNaNh, 0x1975, 0xb9b1, 0x3af6, 0x3af6, + 0xb8a1, 0xb6c4, 0x3993, 0x3993, qNaNh, 0xb48c, qNaNh, qNaNh); + VLOAD_16(v6, 0x33ca, qNaNh, qNaNh, 0x39cb, 0x1975, 0xb9b1, 0x3432, 0x3432, + 0xb8a1, 0xb6c4, 0x38c9, 0x38c9, 0x39c6, qNaNh, qNaNh, 0x3040); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfeq.vv v0, v4, v6"); + VSET(1, e16, m2); + VCMP_U16(19, v0, 0x0330); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3f75db3c, qNaNf, qNaNf, qNaNf, 0x3f75db3c, 0xbf4f6872, + 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, 0xbf6bd1a2, + 0x3f036ba4, qNaNf, qNaNf, 0x3f3110b0, qNaNf); + VLOAD_32(v12, 0x3f75db3c, 0x3dcb7174, qNaNf, 0xbf518bb7, 0x3f75db3c, + 0xbf4f6872, 0xbee50bcd, 0xbe829d5c, 0xbe9e4f82, 0xbea1c6a1, + 0x3e447c62, 0xbe96125b, qNaNf, 0x3ef8d43a, qNaNf, qNaNf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfeq.vv v0, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(20, v0, 0x0331); + + VSET(16, e64, m8); + VLOAD_64(v16, qNaNd, qNaNd, 0x3fed8915c5665532, 0xbfec227053ec5198, + 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, 0x3fc41b3c98507fe0, + 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, 0x3fdfa988fd8b0a24, + 0xbfd367cf3ee9af68, 0x3feccb416af162fc, qNaNd, qNaNd, + 0xbfd2cb447b63f610, qNaNd); + VLOAD_64(v24, qNaNd, 0x3fdefda0947f3460, qNaNd, 0x9fee55c27d3d743e, + 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, 0xbfec227053ec5198, + 0xbfc6563348637140, 0xbfd7e9bb5b0beaf8, 0x3fdfa988fd8b0a24, + 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, qNaNd, 0x3fbafaebeb19acf0, + qNaNd, qNaNd); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfeq.vv v0, v16, v24"); + VSET(1, e16, m1); + VCMP_U16(21, v0, 0x0330); + CHECK_FFLAGS(0); + + // Give sNaN (Invalid operation) + VSET(16, e32, m4); + VLOAD_32(v8, 0x3f75db3c, sNaNf, sNaNf, qNaNf, 0x3f75db3c, 0xbf4f6872, + 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, 0xbf6bd1a2, + 0x3f036ba4, qNaNf, qNaNf, 0x3f3110b0, qNaNf); + VLOAD_32(v12, 0x3f75db3c, 0x3dcb7174, qNaNf, 0xbf518bb7, 0x3f75db3c, + 0xbf4f6872, 0xbee50bcd, 0xbe829d5c, 0xbe9e4f82, 0xbea1c6a1, + 0x3e447c62, 0xbe96125b, qNaNf, 0x3ef8d43a, qNaNf, qNaNf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfeq.vv v0, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(22, v0, 0x0331); + CHECK_FFLAGS(NV); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + // TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfge.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfge.c new file mode 100644 index 000000000..07b4943f2 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfge.c @@ -0,0 +1,134 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values (vector-scalar) +void TEST_CASE1(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.0651, 0.5806, 0.2563, -0.4783, 0.7393, -0.2649, -0.4590, + // 0.5469, -0.9082, 0.6235, -0.8276, -0.7939, -0.0236, -0.1166, + // 0.4026, 0.0022 + VLOAD_16(v4, 0xac2a, 0x38a5, 0x341a, 0xb7a7, 0x39ea, 0xb43d, 0xb758, 0x3860, + 0xbb44, 0x38fd, 0xba9f, 0xba5a, 0xa60b, 0xaf76, 0x3671, 0x1896); + asm volatile("vmfge.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(1, v2, 0xf2b7); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // -0.15601152, -0.92020410, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0xbe1fc17c, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0xbf4b1daf); + asm volatile("vmfge.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(2, v4, 0x7ffe); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, 0.4585094341291300, + // 0.8417440789882031, -0.1215927835809432, 0.9442717441528423, + // -0.3993868853091622, 0.5719771249018739, + // 0.0497853851400327, 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + asm volatile("vmfge.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(3, v8, 0x4f7b); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.2649, 0.5806, -0.2649, -0.4783, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, + VLOAD_16(v4, 0xb43d, 0x7653, 0xad3d, 0x033d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, + 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfge.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(4, v2, 0xaaaa); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // 0.80517912, 0.80517912, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0x3f4e2038, 0x3f4e2038, 0xbe967703, 0x3f7c66bb, 0x3f61b2e8, + 0xbee4905c, 0x3f61c543, 0x3e1b4092, 0xbf4cad78, 0xbd16465d, + 0xbec4f07b, 0xbf0c2627, 0x3dcbe820, 0x3e5dbf70, 0xbec04b31, + 0xbf4b1daf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfge.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(5, v4, 0x000a); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, -0.3394093097660049, + // 0.8417440789882031, -0.1215927835809432, + // 0.9442717441528423, -0.3993868853091622, + // 0.5719771249018739, 0.0497853851400327, + // 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfge.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(6, v8, 0x0a2a); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfgt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfgt.c new file mode 100644 index 000000000..2e401cd5a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfgt.c @@ -0,0 +1,134 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values (vector-scalar) +void TEST_CASE1(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.0651, 0.5806, 0.2563, -0.4783, 0.7393, -0.2649, -0.4590, + // 0.5469, -0.9082, 0.6235, -0.8276, -0.7939, -0.0236, -0.1166, + // 0.4026, 0.0022 + VLOAD_16(v4, 0xac2a, 0x38a5, 0x341a, 0xb7a7, 0x39ea, 0xb43d, 0xb758, 0x3860, + 0xbb44, 0x38fd, 0xba9f, 0xba5a, 0xa60b, 0xaf76, 0x3671, 0x1896); + asm volatile("vmfgt.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(1, v2, 0xf297); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // -0.15601152, -0.92020410, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0xbe1fc17c, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0xbf4b1daf); + asm volatile("vmfgt.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(2, v4, 0x0000); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, 0.4585094341291300, + // 0.8417440789882031, -0.1215927835809432, 0.9442717441528423, + // -0.3993868853091622, 0.5719771249018739, + // 0.0497853851400327, 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + asm volatile("vmfgt.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(3, v8, 0x4f73); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.2649, 0.5806, -0.2649, -0.4783, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, + VLOAD_16(v4, 0xb43d, 0x7653, 0xad3d, 0x033d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, + 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfgt.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(4, v2, 0x000a); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // 0.80517912, 0.80517912, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0x3f4e2038, 0x3f4e2038, 0xbe967703, 0x3f7c66bb, 0x3f61b2e8, + 0xbee4905c, 0x3f61c543, 0x3e1b4092, 0xbf4cad78, 0xbd16465d, + 0xbec4f07b, 0xbf0c2627, 0x3dcbe820, 0x3e5dbf70, 0xbec04b31, + 0xbf4b1daf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfgt.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(5, v4, 0x0008); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, -0.3394093097660049, + // 0.8417440789882031, -0.1215927835809432, + // 0.9442717441528423, -0.3993868853091622, + // 0.5719771249018739, 0.0497853851400327, + // 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfgt.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(6, v8, 0x0a22); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfle.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfle.c new file mode 100644 index 000000000..d8bbce14a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfle.c @@ -0,0 +1,273 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// This instruction writes a mask to a register, with a layout of elements as +// described in section "Mask Register Layout" +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.0590, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x2b8c, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3041, 0xbb98); + asm volatile("vmfle.vv v2, v4, v6"); + VSET(1, e16, m2); + VCMP_U16(1, v2, 0x6325); + + VSET(16, e32, m4); + // +0, sNaN, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0xffffffff, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5b88a4); + // -0, sNaN, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, -0.00670803 + VLOAD_32(v12, 0x80000000, 0xffffffff, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0xbbdbcefe); + asm volatile("vmfle.vv v4, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(2, v4, 0x0665); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0x3fdbb633afa4e520); + // -0.3562510538138417, -0.0135629748736219, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0xbfd6ccd13852f170, 0xbf8bc6e7ac263f80, 0x3fed8915c5665532, + 0x3fef0d4f6aafa2f6, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + asm volatile("vmfle.vv v8, v16, v24"); + VSET(1, e16, m2); + VCMP_U16(3, v8, 0x31bc); +}; + +// Simple random test with similar values + 1 subnormal (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.7285, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x39d4, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3507, 0xbb98); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfle.vv v2, v4, v6, v0.t"); + VSET(1, e16, m2); + VCMP_U16(4, v2, 0x2222); + + VSET(16, e32, m4); + // 0x00000000, 0.09933749, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0x3dcb7174, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5d88a4); + // 0x00000000, -0.64782482, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, 0.85755372 + VLOAD_32(v12, 0x00000000, 0xbf25d7d9, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0x3f5d88a4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfle.vv v4, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(5, v4, 0x8220); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, -0.7793965434104730 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0xbfe8f0d105120796); + // 0.8643613633211786, 0.4842301798024149, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfle.vv v8, v16, v24, v0.t"); + VSET(1, e16, m2); + VCMP_U16(6, v8, 0xa0aa); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.0651, 0.5806, 0.2563, -0.4783, 0.7393, -0.2649, -0.4590, + // 0.5469, -0.9082, 0.6235, -0.8276, -0.7939, -0.0236, -0.1166, + // 0.4026, 0.0022 + VLOAD_16(v4, 0xac2a, 0x38a5, 0x341a, 0xb7a7, 0x39ea, 0xb43d, 0xb758, 0x3860, + 0xbb44, 0x38fd, 0xba9f, 0xba5a, 0xa60b, 0xaf76, 0x3671, 0x1896); + asm volatile("vmfle.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(7, v2, 0x0d68); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // -0.15601152, -0.92020410, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0xbe1fc17c, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0xbf4b1daf); + asm volatile("vmfle.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(8, v4, 0xffff); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, 0.4585094341291300, + // 0.8417440789882031, -0.1215927835809432, 0.9442717441528423, + // -0.3993868853091622, 0.5719771249018739, + // 0.0497853851400327, 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + asm volatile("vmfle.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(9, v8, 0xb08c); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.2649, 0.5806, -0.2649, -0.4783, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, + VLOAD_16(v4, 0xb43d, 0x7653, 0xad3d, 0x033d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, + 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfle.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(10, v2, 0xaaa0); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // 0.80517912, 0.80517912, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0x3f4e2038, 0x3f4e2038, 0xbe967703, 0x3f7c66bb, 0x3f61b2e8, + 0xbee4905c, 0x3f61c543, 0x3e1b4092, 0xbf4cad78, 0xbd16465d, + 0xbec4f07b, 0xbf0c2627, 0x3dcbe820, 0x3e5dbf70, 0xbec04b31, + 0xbf4b1daf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfle.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(11, v4, 0xaaa2); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, -0.3394093097660049, + // 0.8417440789882031, -0.1215927835809432, + // 0.9442717441528423, -0.3993868853091622, + // 0.5719771249018739, 0.0497853851400327, + // 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfle.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VSET(1, e16, m8); + VCMP_U16(12, v8, 0xa088); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmflt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmflt.c new file mode 100644 index 000000000..d168798ec --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmflt.c @@ -0,0 +1,279 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// This instruction writes a mask to a register, with a layout of elements as +// described in section "Mask Register Layout" +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.0590, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x2b8c, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3041, 0xbb98); + asm volatile("vmflt.vv v2, v4, v6"); + VSET(1, e16, m2); + VCMP_U16(1, v2, 0x6325); + + VSET(16, e32, m4); + // +0, sNaN, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0xffffffff, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5b88a4); + // -0, sNaN, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, -0.00670803 + VLOAD_32(v12, 0x80000000, 0xffffffff, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0xbbdbcefe); + asm volatile("vmflt.vv v4, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(2, v4, 0x0664); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, 0.4329957213663693 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0x3fdbb633afa4e520); + // -0.3562510538138417, -0.0135629748736219, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0xbfd6ccd13852f170, 0xbf8bc6e7ac263f80, 0x3fed8915c5665532, + 0x3fef0d4f6aafa2f6, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + asm volatile("vmflt.vv v8, v16, v24"); + VSET(1, e16, m2); + VCMP_U16(3, v8, 0x31b8); +}; + +// Simple random test with similar values + 1 subnormal (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.7285, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x39d4, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3507, 0xbb98); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmflt.vv v2, v4, v6, v0.t"); + VSET(1, e16, m2); + VCMP_U16(4, v2, 0x2220); + + VSET(16, e32, m4); + // 0x00000000, 0.09933749, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0x3dcb7174, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5d88a4); + // 0x00000000, -0.64782482, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, 0.85755372 + VLOAD_32(v12, 0x00000000, 0xbf25d7d9, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0x3f5d88a4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmflt.vv v4, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(5, v4, 0x0220); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, -0.7793965434104730 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0xbfe8f0d105120796); + // 0.8643613633211786, 0.4842301798024149, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmflt.vv v8, v16, v24, v0.t"); + VSET(1, e16, m2); + VCMP_U16(6, v8, 0x20a0); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.0651, 0.5806, 0.2563, -0.4783, 0.7393, -0.2649, -0.4590, + // 0.5469, -0.9082, 0.6235, -0.8276, -0.7939, -0.0236, -0.1166, + // 0.4026, 0.0022 + VLOAD_16(v4, 0xac2a, 0x38a5, 0x341a, 0xb7a7, 0x39ea, 0xb43d, 0xb758, 0x3860, + 0xbb44, 0x38fd, 0xba9f, 0xba5a, 0xa60b, 0xaf76, 0x3671, 0x1896); + asm volatile("vmflt.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(7, v2, 0x0d48); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // -0.15601152, -0.92020410, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0xbe1fc17c, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0xbf4b1daf); + asm volatile("vmflt.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(8, v4, 0x8001); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, 0.4585094341291300, + // 0.8417440789882031, -0.1215927835809432, 0.9442717441528423, + // -0.3993868853091622, 0.5719771249018739, + // 0.0497853851400327, 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + asm volatile("vmflt.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(9, v8, 0xb084); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.2649, 0.5806, -0.2649, -0.4783, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, + VLOAD_16(v4, 0xb43d, 0x7653, 0xad3d, 0x033d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, + 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmflt.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(10, v2, 0x0000); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // 0.80517912, 0.80517912, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0x3f4e2038, 0x3f4e2038, 0xbe967703, 0x3f7c66bb, 0x3f61b2e8, + 0xbee4905c, 0x3f61c543, 0x3e1b4092, 0xbf4cad78, 0xbd16465d, + 0xbec4f07b, 0xbf0c2627, 0x3dcbe820, 0x3e5dbf70, 0xbec04b31, + 0xbf4b1daf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmflt.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(11, v4, 0xaaa0); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, -0.3394093097660049, + // 0.8417440789882031, -0.1215927835809432, + // 0.9442717441528423, -0.3993868853091622, + // 0.5719771249018739, 0.0497853851400327, + // 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmflt.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(12, v8, 0xa080); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfne.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfne.c new file mode 100644 index 000000000..19adf60ba --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfne.c @@ -0,0 +1,503 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// This instruction writes a mask to a register, with a layout of elements as +// described in section "Mask Register Layout" +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.0590, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x2b8c, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3041, 0xbb98); + asm volatile("vmfne.vv v2, v4, v6"); + VSET(1, e16, m2); + VCMP_U16(1, v2, 0xffff); + + VSET(16, e32, m4); + // +0, sNaN, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0xffffffff, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5b88a4); + // -0, sNaN, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, -0.00670803 + VLOAD_32(v12, 0x80000000, 0xffffffff, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0xbbdbcefe); + asm volatile("vmfne.vv v4, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(2, v4, 0xfffe); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, 0.4329957213663693 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0x3fdbb633afa4e520); + // -0.3562510538138417, -0.0135629748736219, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0xbfd6ccd13852f170, 0xbf8bc6e7ac263f80, 0x3fed8915c5665532, + 0x3fef0d4f6aafa2f6, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + asm volatile("vmfne.vv v8, v16, v24"); + VSET(1, e16, m2); + VCMP_U16(3, v8, 0xfffb); +}; + +// Simple random test with similar values + 1 subnormal (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.7285, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x39d4, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3507, 0xbb98); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfne.vv v2, v4, v6, v0.t"); + VSET(1, e16, m2); + VCMP_U16(4, v2, 0xaaa8); + + VSET(16, e32, m4); + // 0x00000000, 0.09933749, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0x3dcb7174, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5d88a4); + // 0x00000000, -0.64782482, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, 0.85755372 + VLOAD_32(v12, 0x00000000, 0xbf25d7d9, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0x3f5d88a4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfne.vv v4, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(5, v4, 0x2aaa); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, -0.7793965434104730 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0xbfe8f0d105120796); + // 0.8643613633211786, 0.4842301798024149, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfne.vv v8, v16, v24, v0.t"); + VSET(1, e16, m2); + VCMP_U16(6, v8, 0x2aa0); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.0651, 0.5806, 0.2563, -0.4783, 0.7393, -0.2649, -0.4590, + // 0.5469, -0.9082, 0.6235, -0.8276, -0.7939, -0.0236, -0.1166, + // 0.4026, 0.0022 + VLOAD_16(v4, 0xac2a, 0x38a5, 0x341a, 0xb7a7, 0x39ea, 0xb43d, 0xb758, 0x3860, + 0xbb44, 0x38fd, 0xba9f, 0xba5a, 0xa60b, 0xaf76, 0x3671, 0x1896); + asm volatile("vmfne.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(7, v2, 0xffdf); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // -0.15601152, -0.92020410, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0xbe1fc17c, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0xbf4b1daf); + asm volatile("vmfne.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(8, v4, 0x8001); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, 0.4585094341291300, + // 0.8417440789882031, -0.1215927835809432, 0.9442717441528423, + // -0.3993868853091622, 0.5719771249018739, + // 0.0497853851400327, 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + asm volatile("vmfne.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(9, v8, 0xfff7); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.2649, 0.5806, -0.2649, -0.4783, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, + VLOAD_16(v4, 0xb43d, 0x7653, 0xad3d, 0x033d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, + 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfne.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(10, v2, 0x000a); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // 0.80517912, 0.80517912, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0x3f4e2038, 0x3f4e2038, 0xbe967703, 0x3f7c66bb, 0x3f61b2e8, + 0xbee4905c, 0x3f61c543, 0x3e1b4092, 0xbf4cad78, 0xbd16465d, + 0xbec4f07b, 0xbf0c2627, 0x3dcbe820, 0x3e5dbf70, 0xbec04b31, + 0xbf4b1daf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfne.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(11, v4, 0xaaa8); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, -0.3394093097660049, + // 0.8417440789882031, -0.1215927835809432, + // 0.9442717441528423, -0.3993868853091622, + // 0.5719771249018739, 0.0497853851400327, + // 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfne.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(12, v8, 0xaaa2); +}; + +// Check if only the correct destination bits are written +void TEST_CASE5(void) { + // Fill 64-bits with 1 + VSET(1, e64, m1); + VLOAD_64(v1, 0xffffffffffffffff); + // Perform vmfne.vv on 16 different elements, and then check that the last (64 + // - 16 = 48) bits were not overwritten with zeroes + VSET(16, e16, m1); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v2, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.0590, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v3, 0x33ca, 0x2b8c, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3041, 0xbb98); + asm volatile("vmfne.vv v1, v2, v3"); + VSET(1, e64, m1); + VCMP_U64(13, v1, 0xfffffffffffffffe); + + // Fill 64-bits with 1 + VSET(1, e64, m1); + VLOAD_64(v1, 0xffffffffffffffff); + // Perform vmfne.vv on 16 different elements, and then check that the last (64 + // - 16 = 48) bits were not overwritten with zeroes + VSET(16, e32, m1); + // -0.72077256, sNaN, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v2, 0x70000000, 0xffffffff, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5b88a4); + // 0.79994357, sNaN, -0.34645590, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, -0.00670803 + VLOAD_32(v3, 0x80000000, 0xffffffff, 0xbeb162ab, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0xbbdbcefe); + asm volatile("vmfne.vv v1, v2, v3"); + VSET(1, e64, m1); + VCMP_U64(14, v1, 0xfffffffffffffffb); + + // Fill 64-bits with 1 + VSET(1, e64, m1); + VLOAD_64(v1, 0xffffffffffffffff); + // Perform vmfne.vv on 16 different elements, and then check that the last (64 + // - 16 = 48) bits were not overwritten with zeroes + VSET(16, e64, m1); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, 0.4329957213663693 + VLOAD_64(v2, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0xbf3180f63f75db3c, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0x3fdbb633afa4e520); + // 0.8643613633211786, -0.0135629748736219, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v3, 0x3feba8d9296c7e74, 0xbf8bc6e7ac263f80, 0x3fed8915c5665532, + 0x3fef0d4f6aafa2f6, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + asm volatile("vmfne.vv v1, v2, v3"); + VSET(1, e64, m1); + VCMP_U64(15, v1, 0xfffffffffffffffe); +}; + +// Write to v0 during a masked operation, WAR dependency should be respected +void TEST_CASE6(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, 0.7241, 0.0027, -0.7114, 0.8701, + // 0.8701, -0.5786, -0.4229, 0.6968, 0.6968, 0.7217, -0.2842, + // 0.1659, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3af6, 0x3af6, + 0xb8a1, 0xb6c4, 0x3993, 0x3993, 0x39c6, 0xb48c, 0x314f, 0x314f); + // 0.2434, 0.7285, -0.2678, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.2622, -0.5786, -0.4229, 0.5981, 0.5981, 0.7217, -0.2842, + // 0.1328, 0.1328 + VLOAD_16(v6, 0x33ca, 0x39d4, 0xb449, 0x39cb, 0x1975, 0xb9b1, 0x3432, 0x3432, + 0xb8a1, 0xb6c4, 0x38c9, 0x38c9, 0x39c6, 0xb48c, 0x3040, 0x3040); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfne.vv v0, v4, v6, v0.t"); + VSET(1, e16, m2); + VCMP_U16(16, v0, 0x8888); + + VSET(16, e32, m4); + // 0x00000000, 0.09933749, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0x3dcb7174, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5d88a4); + // 0x00000000, 0.09933749, 0.39402914, -0.81853813, + // 0.96037650, -0.81018746, -0.44735566, -0.25510681, + // -0.30920035, -0.31596854, 0.19188073, -0.29310879, + // 0.22002794, 0.48599416, -0.80913633, -0.30138883 + VLOAD_32(v12, 0x00000000, 0x3dcb7174, 0x3ec9be30, 0xbf518bb7, 0x3f75db3c, + 0xbf4f6872, 0xbee50bcd, 0xbe829d5c, 0xbe9e4f82, 0xbea1c6a1, + 0x3e447c62, 0xbe96125b, 0x3e614f01, 0x3ef8d43a, 0xbf4f238f, + 0xbe9a4fa3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfne.vv v0, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(17, v0, 0x8888); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.8792039527057112, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, -0.7793965434104730 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfec227053ec5198, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0xbfe8f0d105120796); + // 0.8643613633211786, 0.4842301798024149, -0.8792039527057112, + // 0.9703747829163081, -0.1308855743137316, -0.3798019472030296, + // -0.8792039527057112, -0.1745056251010144, + // -0.3736408604742532, 0.4947226024634424, + // -0.9079294226891812, -0.9490909352855985, 0.6283940115157876, + // 0.1053912590957002, -0.5927175227484118, -0.3032110323317654 + VLOAD_64(v24, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0xbfec227053ec5198, + 0x9fee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0xbfec227053ec5198, 0xbfc6563348637140, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfe2f78abcff0ede, + 0xbfd367cf3ee9af68); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfne.vv v0, v16, v24, v0.t"); + VSET(1, e16, m2); + VCMP_U16(18, v0, 0x8888); +}; + +// Test sNaN/qNaN behaviour +void TEST_CASE7(void) { + CLEAR_FFLAGS; + // First, give only qNaN (no exception is generated) + VSET(16, e16, m2); + CHECK_FFLAGS(0); + VLOAD_16(v4, qNaNh, qNaNh, 0x39cb, qNaNh, 0x1975, 0xb9b1, 0x3af6, 0x3af6, + 0xb8a1, 0xb6c4, 0x3993, 0x3993, qNaNh, 0xb48c, qNaNh, qNaNh); + VLOAD_16(v6, 0x33ca, qNaNh, qNaNh, 0x39cb, 0x1975, 0xb9b1, 0x3432, 0x3432, + 0xb8a1, 0xb6c4, 0x38c9, 0x38c9, 0x39c6, qNaNh, qNaNh, 0x3040); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfne.vv v0, v4, v6"); + VSET(1, e16, m2); + VCMP_U16(19, v0, 0xfccf); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3f75db3c, qNaNf, qNaNf, qNaNf, 0x3f75db3c, 0xbf4f6872, + 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, 0xbf6bd1a2, + 0x3f036ba4, qNaNf, qNaNf, 0x3f3110b0, qNaNf); + VLOAD_32(v16, 0x3f75db3c, 0x3dcb7174, qNaNf, 0xbf518bb7, 0x3f75db3c, + 0xbf4f6872, 0xbee50bcd, 0xbe829d5c, 0xbe9e4f82, 0xbea1c6a1, + 0x3e447c62, 0xbe96125b, qNaNf, 0x3ef8d43a, qNaNf, qNaNf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfne.vv v0, v8, v16"); + VSET(1, e16, m2); + VCMP_U16(20, v0, 0xfcce); + + VSET(16, e64, m8); + VLOAD_64(v16, qNaNd, qNaNd, 0x3fed8915c5665532, 0xbfec227053ec5198, + 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, 0x3fc41b3c98507fe0, + 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, 0x3fdfa988fd8b0a24, + 0xbfd367cf3ee9af68, 0x3feccb416af162fc, qNaNd, qNaNd, + 0xbfd2cb447b63f610, qNaNd); + VLOAD_64(v24, qNaNd, 0x3fdefda0947f3460, qNaNd, 0x9fee55c27d3d743e, + 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, 0xbfec227053ec5198, + 0xbfc6563348637140, 0xbfd7e9bb5b0beaf8, 0x3fdfa988fd8b0a24, + 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, qNaNd, 0x3fbafaebeb19acf0, + qNaNd, qNaNd); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfne.vv v0, v16, v24"); + VSET(1, e16, m2); + VCMP_U16(21, v0, 0xfccf); + CHECK_FFLAGS(0); + + // Give sNaN (Invalid operation) + VSET(16, e32, m4); + VLOAD_32(v8, 0x3f75db3c, sNaNf, sNaNf, qNaNf, 0x3f75db3c, 0xbf4f6872, + 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, 0xbf6bd1a2, + 0x3f036ba4, qNaNf, qNaNf, 0x3f3110b0, qNaNf); + VLOAD_32(v12, 0x3f75db3c, 0x3dcb7174, qNaNf, 0xbf518bb7, 0x3f75db3c, + 0xbf4f6872, 0xbee50bcd, 0xbe829d5c, 0xbe9e4f82, 0xbea1c6a1, + 0x3e447c62, 0xbe96125b, qNaNf, 0x3ef8d43a, qNaNf, qNaNf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfne.vv v0, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(22, v0, 0xfcce); + CHECK_FFLAGS(NV); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + // TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmin.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmin.c new file mode 100644 index 000000000..fe3e183c6 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmin.c @@ -0,0 +1,181 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + asm volatile("vmin.vv v2, v4, v6"); + VCMP_I16(1, v2, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, + -19900, 50, -80, 400, -19900); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vmin.vv v4, v8, v12"); + VCMP_I32(2, v4, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, + -19900, 50, -80, 400, -19900); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vmin.vv v8, v16, v24"); + VCMP_I64(3, v8, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, + -19900, 50, -80, 400, -19900); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vmin.vv v2, v4, v6, v0.t"); + VCMP_I16(4, v2, 0xbeef, 0xbeef, 400, -19900, 0xbeef, 0xbeef, 400, -19900, + 0xbeef, 0xbeef, 400, -19900, 0xbeef, 0xbeef, 400, -19900); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vmin.vv v4, v8, v12, v0.t"); + VCMP_I32(5, v4, 0xdeadbeef, 0xdeadbeef, 400, -19900, 0xdeadbeef, 0xdeadbeef, + 400, -19900, 0xdeadbeef, 0xdeadbeef, 400, -19900, 0xdeadbeef, + 0xdeadbeef, 400, -19900); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vmin.vv v8, v16, v24, v0.t"); + VCMP_I64(6, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, -19900, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, -19900, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, -19900, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, -19900); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + asm volatile("vmin.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(7, v1, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, + 40); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + asm volatile("vmin.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(8, v2, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, + -25, 40); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + asm volatile("vmin.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(9, v4, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, + -25, 40); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + asm volatile("vmin.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(10, v8, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, + -25, 40); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef, 0xef, 0xef, 0xef, 0xef); + asm volatile("vmin.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(11, v1, 0xef, 0xef, -25, 40, 0xef, 0xef, -25, 40, 0xef, 0xef, -25, 40, + 0xef, 0xef, -25, 40); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vmin.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(12, v2, 0xbeef, 0xbeef, -25, 40, 0xbeef, 0xbeef, -25, 40, 0xbeef, + 0xbeef, -25, 40, 0xbeef, 0xbeef, -25, 40); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vmin.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(13, v4, 0xdeadbeef, 0xdeadbeef, -25, 40, 0xdeadbeef, 0xdeadbeef, -25, + 40, 0xdeadbeef, 0xdeadbeef, -25, 40, 0xdeadbeef, 0xdeadbeef, -25, + 40); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vmin.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(14, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, -25, 40, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, -25, 40, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, -25, 40, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + -25, 40); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vminu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vminu.c new file mode 100644 index 000000000..5472f5cb0 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vminu.c @@ -0,0 +1,176 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + asm volatile("vminu.vv v2, v4, v6"); + VCMP_U16(1, v2, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900, + 50, 80, 400, 19900); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vminu.vv v4, v8, v12"); + VCMP_U32(2, v4, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900, + 50, 80, 400, 19900); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vminu.vv v8, v16, v24"); + VCMP_U64(3, v8, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900, + 50, 80, 400, 19900); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vminu.vv v2, v4, v6, v0.t"); + VCMP_U16(4, v2, 0xbeef, 0xbeef, 400, 19900, 0xbeef, 0xbeef, 400, 19900, + 0xbeef, 0xbeef, 400, 19900, 0xbeef, 0xbeef, 400, 19900); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vminu.vv v4, v8, v12, v0.t"); + VCMP_U32(5, v4, 0xdeadbeef, 0xdeadbeef, 400, 19900, 0xdeadbeef, 0xdeadbeef, + 400, 19900, 0xdeadbeef, 0xdeadbeef, 400, 19900, 0xdeadbeef, + 0xdeadbeef, 400, 19900); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vminu.vv v8, v16, v24, v0.t"); + VCMP_U64(6, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, 19900, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, 19900, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, 19900, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, 19900); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + asm volatile("vminu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(7, v1, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + asm volatile("vminu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(8, v2, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + asm volatile("vminu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(9, v4, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + asm volatile("vminu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(10, v8, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef, 0xef, 0xef, 0xef, 0xef); + asm volatile("vminu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(11, v1, 0xef, 0xef, 25, 40, 0xef, 0xef, 25, 40, 0xef, 0xef, 25, 40, + 0xef, 0xef, 25, 40); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vminu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(12, v2, 0xbeef, 0xbeef, 25, 40, 0xbeef, 0xbeef, 25, 40, 0xbeef, + 0xbeef, 25, 40, 0xbeef, 0xbeef, 25, 40); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vminu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(13, v4, 0xdeadbeef, 0xdeadbeef, 25, 40, 0xdeadbeef, 0xdeadbeef, 25, + 40, 0xdeadbeef, 0xdeadbeef, 25, 40, 0xdeadbeef, 0xdeadbeef, 25, 40); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vminu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(14, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 25, 40, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 25, 40, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 25, 40, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 25, 40); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnand.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnand.c new file mode 100644 index 000000000..61db49ecc --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnand.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x7B, 0xDE); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0x32, 0x10); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0xFF, 0xFF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0xF2, 0x1F); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x7B, 0xFE); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnor.c new file mode 100644 index 000000000..15322ac72 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnor.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x32, 0x10); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0x00, 0x00); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0x32, 0x10); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0x30, 0x00); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x32, 0xF0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmor.c new file mode 100644 index 000000000..8a1693082 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmor.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0xCD, 0xEF); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0xFF, 0xFF); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0xCD, 0xEF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0xCF, 0xFF); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0xCD, 0xEF); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmornot.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmornot.c new file mode 100644 index 000000000..8e9497283 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmornot.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0xFF, 0xFF); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0xCD, 0xEF); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0xFF, 0xFF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0xFD, 0xEF); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0xFF, 0xFF); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbc.c new file mode 100644 index 000000000..00a4e6a62 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbc.c @@ -0,0 +1,160 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xef, 10, 0xff, 16, 0xef, 10, 0xff, 16, 0xef, 10, 0xff, 16, + 0xef, 10, 0xff); + VLOAD_8(v2, 4, 0xef, 12, 0x80, 4, 0xef, 12, 0x80, 4, 0xef, 12, 0x80, 4, 0xef, + 12, 0x80); + VLOAD_8(v0, 0x99, 0x99); + asm volatile("vmsbc.vvm v3, v1, v2, v0"); + VSET(2, e8, m1); + VCMP_U8(1, v3, 0x44, 0x44); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xbeef, 10, 0xffff, 16, 0xbeef, 10, 0xffff); + VLOAD_16(v2, 4, 0xbeef, 12, 0x8000, 4, 0xbeef, 12, 0x8000); + VLOAD_8(v0, 0x99); + VCLEAR(v3); + asm volatile("vmsbc.vvm v3, v1, v2, v0"); + VSET(1, e8, m1); + VCMP_U8(2, v3, 0x44); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xdeadbeef, 10, 0xffffffff); + VLOAD_32(v2, 4, 0xdeadbeef, 12, 0x80000000); + VLOAD_8(v0, 0x09); + VCLEAR(v3); + asm volatile("vmsbc.vvm v3, v1, v2, v0"); + VSET(1, e8, m1); + VCMP_U8(3, v3, 0x04); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xdeadbeefdeadbeef); + VLOAD_64(v2, 4, 0xdeadbeefdeadbeef); + VLOAD_8(v0, 0x3); + VCLEAR(v3); + asm volatile("vmsbc.vvm v3, v1, v2, v0"); + VSET(1, e8, m1); + VCMP_U8(4, v3, 0x02); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xef, 10, 0xff, 16, 0xef, 10, 0xff, 16, 0xef, 10, 0xff, 16, + 0xef, 10, 0xff); + VLOAD_8(v2, 4, 0xef, 12, 0x80, 4, 0xef, 12, 0x80, 4, 0xef, 12, 0x80, 4, 0xef, + 12, 0x80); + asm volatile("vmsbc.vv v3, v1, v2"); + VSET(2, e8, m1); + VCMP_U8(5, v3, 0x44, 0x44); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xbeef, 10, 0xffff, 16, 0xbeef, 10, 0xffff); + VLOAD_16(v2, 4, 0xbeef, 12, 0x8000, 4, 0xbeef, 12, 0x8000); + VCLEAR(v3); + asm volatile("vmsbc.vv v3, v1, v2"); + VSET(1, e8, m1); + VCMP_U8(6, v3, 0x44); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xdeadbeef, 10, 0xffffffff); + VLOAD_32(v2, 4, 0xdeadbeef, 12, 0x80000000); + VCLEAR(v3); + asm volatile("vmsbc.vv v3, v1, v2"); + VSET(1, e8, m1); + VCMP_U8(7, v3, 0x04); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xdeadbeefdeadbeef); + VLOAD_64(v2, 4, 0xdeadbeefdeadbeef); + VCLEAR(v3); + asm volatile("vmsbc.vv v3, v1, v2"); + VSET(1, e8, m1); + VCMP_U8(8, v3, 0x00); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 20; + + VSET(16, e8, m1); + VLOAD_8(v1, 20, 10, 30, 25, 20, 10, 30, 25, 20, 10, 30, 25, 20, 10, 30, 25); + VLOAD_8(v0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0); + asm volatile("vmsbc.vxm v3, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v3, 0x22, 0x22); + + VSET(8, e16, m1); + VLOAD_16(v1, 20, 10, 30, 25, 20, 10, 30, 25); + VLOAD_16(v0, 8, 0, 0, 0, 8, 0, 0, 0); + VCLEAR(v2); + asm volatile("vmsbc.vxm v2, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(10, v2, 0x22); + + VSET(4, e32, m1); + VLOAD_32(v1, 20, 10, 30, 25); + VLOAD_32(v0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0); + VCLEAR(v2); + asm volatile("vmsbc.vxm v2, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(11, v2, 0x02); + + VSET(2, e64, m1); + VLOAD_64(v1, 20, 10); + VLOAD_64(v0, 8, 0); + VCLEAR(v2); + asm volatile("vmsbc.vxm v2, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(12, v2, 0x02); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 20; + + VSET(16, e8, m1); + VLOAD_8(v1, 20, 10, 30, 25, 20, 10, 30, 25, 20, 10, 30, 25, 20, 10, 30, 25); + asm volatile("vmsbc.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v3, 0x22, 0x22); + + VSET(8, e16, m1); + VLOAD_16(v1, 20, 10, 30, 25, 20, 10, 30, 25); + VCLEAR(v2); + asm volatile("vmsbc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(14, v2, 0x22); + + VSET(4, e32, m1); + VLOAD_32(v1, 20, 10, 30, 25); + VCLEAR(v2); + asm volatile("vmsbc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(15, v2, 0x02); + + VSET(2, e64, m1); + VLOAD_64(v1, 20, 10); + VCLEAR(v2); + asm volatile("vmsbc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(16, v2, 0x02); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbf.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbf.c new file mode 100644 index 000000000..2f8346db3 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbf.c @@ -0,0 +1,33 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(8, e8, m1); + VLOAD_8(v3, 8, 0, 0, 0, 0, 0, 0, 0); + __asm__ volatile("vmsbf.m v2, v3"); + VCMP_U8(1, v2, 7, 0, 0, 0, 0, 0, 0, 0); +} + +void TEST_CASE2() { + VSET(8, e8, m1); + VLOAD_8(v3, 8, 0, 0, 0, 0, 0, 0, 0); + VLOAD_8(v0, 3, 0, 0, 0, 0, 0, 0, 0); + VCLEAR(v2); + __asm__ volatile("vmsbf.m v2, v3, v0.t"); + VCMP_U8(2, v2, 3, 0, 0, 0, 0, 0, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmseq.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmseq.c new file mode 100644 index 000000000..824221d88 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmseq.c @@ -0,0 +1,306 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, + 0x0f, 0xf2, 0x01, 0xf0, 0x0f); + VCLEAR(v1); + asm volatile("vmseq.vv v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0xcc, 0xcc); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_16(v6, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, + 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f); + VCLEAR(v2); + asm volatile("vmseq.vv v2, v4, v6"); + VSET(2, e8, m1); + VCMP_U8(2, v2, 0xcc, 0xcc); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_32(v12, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, + 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VCLEAR(v4); + asm volatile("vmseq.vv v4, v8, v12"); + VSET(2, e8, m1); + VCMP_U8(3, v4, 0xcc, 0xcc); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_64(v24, 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, 0x0100000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, + 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VCLEAR(v8); + asm volatile("vmseq.vv v8, v16, v24"); + VSET(2, e8, m8); + VCMP_U8(4, v8, 0xcc, 0xcc); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, + 0x0f, 0xf2, 0x01, 0xf0, 0x0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v1); + asm volatile("vmseq.vv v1, v2, v3, v0.t"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x88, 0x88); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_16(v8, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, + 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v2); + asm volatile("vmseq.vv v2, v4, v8, v0.t"); + VSET(2, e8, m1); + VCMP_U8(6, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_32(v12, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, + 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v4); + asm volatile("vmseq.vv v4, v8, v12, v0.t"); + VSET(2, e8, m1); + VCMP_U8(7, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_64(v24, 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, 0x0100000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, + 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v8); + asm volatile("vmseq.vv v8, v16, v24, v0.t"); + VSET(2, e8, m8); + VCMP_U8(8, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 0x00000000ffffffff; + + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VCLEAR(v1); + asm volatile("vmseq.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v1, 0x11, 0x11); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VCLEAR(v2); + asm volatile("vmseq.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(10, v2, 0x11, 0x11); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VCLEAR(v4); + asm volatile("vmseq.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(11, v4, 0x11, 0x11); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VCLEAR(v8); + asm volatile("vmseq.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(12, v8, 0x00, 0x00); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 0x00000000ffffffff; + + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v1); + asm volatile("vmseq.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v1, 0x10, 0x10); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v2); + asm volatile("vmseq.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(14, v2, 0x10, 0x10); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v4); + asm volatile("vmseq.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(15, v4, 0x10, 0x10); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v8); + asm volatile("vmseq.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(16, v8, 0x00, 0x00); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, + 0x0f, 0x0f, 0x00, 0xf0, 0x0f); + VCLEAR(v1); + asm volatile("vmseq.vi v1, v2, 15"); + VSET(2, e8, m1); + VCMP_U8(17, v1, 0x99, 0x99); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f, + 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f); + VCLEAR(v2); + asm volatile("vmseq.vi v2, v4, 15"); + VSET(2, e8, m1); + VCMP_U8(18, v2, 0x11, 0x11); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x0000000f, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VCLEAR(v4); + asm volatile("vmseq.vi v4, v8, 15"); + VSET(2, e8, m1); + VCMP_U8(19, v4, 0x11, 0x11); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VCLEAR(v8); + asm volatile("vmseq.vi v8, v16, 15"); + VSET(2, e8, m8); + VCMP_U8(20, v8, 0x11, 0x11); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, + 0x0f, 0x0f, 0x00, 0xf0, 0x0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v1); + asm volatile("vmseq.vi v1, v2, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(21, v1, 0x10, 0x10); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f, + 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v2); + asm volatile("vmseq.vi v2, v4, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(22, v2, 0x10, 0x10); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x0000000f, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v4); + asm volatile("vmseq.vi v4, v8, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(23, v4, 0x10, 0x10); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v8); + asm volatile("vmseq.vi v8, v16, 15, v0.t"); + VSET(2, e8, m8); + VCMP_U8(24, v8, 0x10, 0x10); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgt.c new file mode 100644 index 000000000..27107bc4e --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgt.c @@ -0,0 +1,168 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VCLEAR(v1); + asm volatile("vmsgt.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x99, 0x99); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v2); + asm volatile("vmsgt.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(2, v2, 0x99, 0x99); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v4); + asm volatile("vmsgt.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(3, v4, 0x99, 0x99); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v8); + asm volatile("vmsgt.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(4, v8, 0x99, 0x99); +}; + +void TEST_CASE2(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsgt.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x88, 0x88); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsgt.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(6, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsgt.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(7, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsgt.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(8, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VCLEAR(v1); + asm volatile("vmsgt.vi v1, v2, 15"); + VSET(2, e8, m1); + VCMP_U8(9, v1, 0x99, 0x99); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v2); + asm volatile("vmsgt.vi v2, v4, 15"); + VSET(2, e8, m1); + VCMP_U8(10, v2, 0x99, 0x99); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v4); + asm volatile("vmsgt.vi v4, v8, 15"); + VSET(2, e8, m1); + VCMP_U8(11, v4, 0x99, 0x99); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v8); + asm volatile("vmsgt.vi v8, v16, 15"); + VSET(2, e8, m8); + VCMP_U8(12, v8, 0x99, 0x99); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0x88, 0x88); + VCLEAR(v1); + asm volatile("vmsgt.vi v1, v2, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(13, v1, 0x88, 0x88); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0x88, 0x88); + VCLEAR(v2); + asm volatile("vmsgt.vi v2, v4, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(14, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0x88, 0x88); + VCLEAR(v4); + asm volatile("vmsgt.vi v4, v8, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(15, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0x88, 0x88); + VCLEAR(v8); + asm volatile("vmsgt.vi v8, v16, 15, v0.t"); + VSET(2, e8, m8); + VCMP_U8(16, v8, 0x88, 0x88); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgtu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgtu.c new file mode 100644 index 000000000..d235f2ff1 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgtu.c @@ -0,0 +1,168 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VCLEAR(v1); + asm volatile("vmsgtu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x99, 0x99); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v2); + asm volatile("vmsgtu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(2, v2, 0x99, 0x99); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v4); + asm volatile("vmsgtu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(3, v4, 0x99, 0x99); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v8); + asm volatile("vmsgtu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(4, v8, 0x99, 0x99); +}; + +void TEST_CASE2(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsgtu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x88, 0x88); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsgtu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(6, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsgtu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(7, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsgtu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(8, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VCLEAR(v1); + asm volatile("vmsgtu.vi v1, v2, 15"); + VSET(2, e8, m1); + VCMP_U8(9, v1, 0xDD, 0xDD); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v2); + asm volatile("vmsgtu.vi v2, v4, 15"); + VSET(2, e8, m1); + VCMP_U8(10, v2, 0xDD, 0xDD); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v4); + asm volatile("vmsgtu.vi v4, v8, 15"); + VSET(2, e8, m1); + VCMP_U8(11, v4, 0xDD, 0xDD); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v8); + asm volatile("vmsgtu.vi v8, v16, 15"); + VSET(2, e8, m8); + VCMP_U8(12, v8, 0xDD, 0xDD); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsgtu.vi v1, v2, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(13, v1, 0xCC, 0xCC); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsgtu.vi v2, v4, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(14, v2, 0xCC, 0xCC); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsgtu.vi v4, v8, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(15, v4, 0xCC, 0xCC); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsgtu.vi v8, v16, 15, v0.t"); + VSET(2, e8, m8); + VCMP_U8(16, v8, 0xCC, 0xCC); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsif.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsif.c new file mode 100644 index 000000000..9ea465b69 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsif.c @@ -0,0 +1,33 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(8, e8, m1); + VLOAD_8(v3, 8, 0, 0, 0, 0, 0, 0, 0); + __asm__ volatile("vmsif.m v2, v3"); + VCMP_U8(1, v2, 15, 0, 0, 0, 0, 0, 0, 0); +} + +void TEST_CASE2() { + VSET(8, e8, m1); + VLOAD_8(v3, 8, 0, 0, 0, 0, 0, 0, 0); + VLOAD_8(v0, 11, 0, 0, 0, 0, 0, 0, 0); + VCLEAR(v2); + __asm__ volatile("vmsif.m v2, v3, v0.t"); + VCMP_U8(2, v2, 11, 0, 0, 0, 0, 0, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsle.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsle.c new file mode 100644 index 000000000..fc57ec628 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsle.c @@ -0,0 +1,237 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 50, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VCLEAR(v2); + asm volatile("vmsle.vv v2, v4, v6"); + VSET(2, e8, m1); + VCMP_U8(1, v2, 0xAB, 0xAA); + + VSET(16, e32, m4); + VLOAD_32(v8, 50, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v4); + asm volatile("vmsle.vv v4, v8, v12"); + VSET(2, e8, m1); + VCMP_U8(2, v4, 0xAB, 0xAA); + + VSET(16, e64, m8); + VLOAD_64(v16, 50, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v8); + asm volatile("vmsle.vv v8, v16, v24"); + VSET(2, e8, m8); + VCMP_U8(3, v8, 0xAB, 0xAA); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsle.vv v2, v4, v6, v0.t"); + VSET(2, e8, m1); + VCMP_U8(4, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsle.vv v4, v8, v12, v0.t"); + VSET(2, e8, m1); + VCMP_U8(5, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsle.vv v8, v16, v24, v0.t"); + VSET(2, e8, m8); + VCMP_U8(6, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VCLEAR(v1); + asm volatile("vmsle.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(7, v1, 0x66, 0x66); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v2); + asm volatile("vmsle.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(8, v2, 0x66, 0x66); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v4); + asm volatile("vmsle.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v4, 0x66, 0x66); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v8); + asm volatile("vmsle.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(10, v8, 0x66, 0x66); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsle.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(11, v1, 0x44, 0x44); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsle.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(12, v2, 0x44, 0x44); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsle.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v4, 0x44, 0x44); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsle.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(14, v8, 0x44, 0x44); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VCLEAR(v1); + asm volatile("vmsle.vi v1, v2, 15"); + VSET(2, e8, m1); + VCMP_U8(15, v1, 0x66, 0x66); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v2); + asm volatile("vmsle.vi v2, v4, 15"); + VSET(2, e8, m1); + VCMP_U8(16, v2, 0x66, 0x66); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v4); + asm volatile("vmsle.vi v4, v8, 15"); + VSET(2, e8, m1); + VCMP_U8(17, v4, 0x66, 0x66); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v8); + asm volatile("vmsle.vi v8, v16, 15"); + VSET(2, e8, m8); + VCMP_U8(18, v8, 0x66, 0x66); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsle.vi v1, v2, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(19, v1, 0x44, 0x44); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsle.vi v2, v4, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(20, v2, 0x44, 0x44); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsle.vi v4, v8, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(21, v4, 0x44, 0x44); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsle.vi v8, v16, 15, v0.t"); + VSET(2, e8, m8); + VCMP_U8(22, v8, 0x44, 0x44); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsleu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsleu.c new file mode 100644 index 000000000..af4f083e4 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsleu.c @@ -0,0 +1,237 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 50, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VCLEAR(v2); + asm volatile("vmsleu.vv v2, v4, v6"); + VSET(2, e8, m1); + VCMP_U8(1, v2, 0xAB, 0xAA); + + VSET(16, e32, m4); + VLOAD_32(v8, 50, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v4); + asm volatile("vmsleu.vv v4, v8, v12"); + VSET(2, e8, m1); + VCMP_U8(2, v4, 0xAB, 0xAA); + + VSET(16, e64, m8); + VLOAD_64(v16, 50, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v8); + asm volatile("vmsleu.vv v8, v16, v24"); + VSET(2, e8, m8); + VCMP_U8(3, v8, 0xAB, 0xAA); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsleu.vv v2, v4, v6, v0.t"); + VSET(2, e8, m1); + VCMP_U8(4, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsleu.vv v4, v8, v12, v0.t"); + VSET(2, e8, m1); + VCMP_U8(5, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsleu.vv v8, v16, v24, v0.t"); + VSET(2, e8, m8); + VCMP_U8(6, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VCLEAR(v1); + asm volatile("vmsleu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(7, v1, 0x66, 0x66); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v2); + asm volatile("vmsleu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(8, v2, 0x66, 0x66); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v4); + asm volatile("vmsleu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v4, 0x66, 0x66); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v8); + asm volatile("vmsleu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(10, v8, 0x66, 0x66); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsleu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(11, v1, 0x44, 0x44); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsleu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(12, v2, 0x44, 0x44); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsleu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v4, 0x44, 0x44); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsleu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(14, v8, 0x44, 0x44); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VCLEAR(v1); + asm volatile("vmsleu.vi v1, v2, 15"); + VSET(2, e8, m1); + VCMP_U8(15, v1, 0x22, 0x22); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v2); + asm volatile("vmsleu.vi v2, v4, 15"); + VSET(2, e8, m1); + VCMP_U8(16, v2, 0x22, 0x22); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v4); + asm volatile("vmsleu.vi v4, v8, 15"); + VSET(2, e8, m1); + VCMP_U8(17, v4, 0x22, 0x22); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v8); + asm volatile("vmsleu.vi v8, v16, 15"); + VSET(2, e8, m8); + VCMP_U8(18, v8, 0x22, 0x22); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsleu.vi v1, v2, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(19, v1, 0x00, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsleu.vi v2, v4, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(20, v2, 0x00, 0x00); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsleu.vi v4, v8, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(21, v4, 0x00, 0x00); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsleu.vi v8, v16, 15, v0.t"); + VSET(2, e8, m8); + VCMP_U8(22, v8, 0x00, 0x00); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmslt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmslt.c new file mode 100644 index 000000000..749d90f75 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmslt.c @@ -0,0 +1,163 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VCLEAR(v2); + asm volatile("vmslt.vv v2, v4, v6"); + VSET(2, e8, m1); + VCMP_U8(1, v2, 0xAA, 0xAA); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v4); + asm volatile("vmslt.vv v4, v8, v12"); + VSET(2, e8, m1); + VCMP_U8(2, v4, 0xAA, 0xAA); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v8); + asm volatile("vmslt.vv v8, v16, v24"); + VSET(2, e8, m8); + VCMP_U8(3, v8, 0xAA, 0xAA); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmslt.vv v2, v4, v6, v0.t"); + VSET(2, e8, m1); + VCMP_U8(4, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmslt.vv v4, v8, v12, v0.t"); + VSET(2, e8, m1); + VCMP_U8(5, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmslt.vv v8, v16, v24, v0.t"); + VSET(2, e8, m8); + VCMP_U8(6, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VCLEAR(v1); + asm volatile("vmslt.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(7, v1, 0x66, 0x66); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v2); + asm volatile("vmslt.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(8, v2, 0x66, 0x66); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v4); + asm volatile("vmslt.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v4, 0x66, 0x66); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v8); + asm volatile("vmslt.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(10, v8, 0x66, 0x66); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmslt.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(11, v1, 0x44, 0x44); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmslt.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(12, v2, 0x44, 0x44); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmslt.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v4, 0x44, 0x44); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmslt.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(14, v8, 0x44, 0x44); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsltu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsltu.c new file mode 100644 index 000000000..6c9c7b314 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsltu.c @@ -0,0 +1,163 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VCLEAR(v2); + asm volatile("vmsltu.vv v2, v4, v6"); + VSET(2, e8, m1); + VCMP_U8(1, v2, 0xAA, 0xAA); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v4); + asm volatile("vmsltu.vv v4, v8, v12"); + VSET(2, e8, m1); + VCMP_U8(2, v4, 0xAA, 0xAA); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v8); + asm volatile("vmsltu.vv v8, v16, v24"); + VSET(2, e8, m8); + VCMP_U8(3, v8, 0xAA, 0xAA); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsltu.vv v2, v4, v6, v0.t"); + VSET(2, e8, m1); + VCMP_U8(4, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsltu.vv v4, v8, v12, v0.t"); + VSET(2, e8, m1); + VCMP_U8(5, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsltu.vv v8, v16, v24, v0.t"); + VSET(2, e8, m8); + VCMP_U8(6, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VCLEAR(v1); + asm volatile("vmsltu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(7, v1, 0x66, 0x66); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v2); + asm volatile("vmsltu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(8, v2, 0x66, 0x66); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v4); + asm volatile("vmsltu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v4, 0x66, 0x66); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v8); + asm volatile("vmsltu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(10, v8, 0x66, 0x66); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsltu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(11, v1, 0x44, 0x44); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsltu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(12, v2, 0x44, 0x44); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsltu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v4, 0x44, 0x44); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsltu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(14, v8, 0x44, 0x44); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsne.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsne.c new file mode 100644 index 000000000..ddf5c0025 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsne.c @@ -0,0 +1,306 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, + 0x0f, 0xf2, 0x01, 0xf0, 0x0f); + VCLEAR(v1); + asm volatile("vmsne.vv v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x33, 0x33); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_16(v8, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, + 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f); + VCLEAR(v2); + asm volatile("vmsne.vv v2, v4, v8"); + VSET(2, e8, m1); + VCMP_U8(2, v2, 0x33, 0x33); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_32(v12, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, + 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VCLEAR(v4); + asm volatile("vmsne.vv v4, v8, v12"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0x33, 0x33); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_64(v24, 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, 0x0100000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, + 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VCLEAR(v8); + asm volatile("vmsne.vv v8, v16, v24"); + VSET(2, e8, m8); + VCMP_U8(4, v8, 0x33, 0x33); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, + 0x0f, 0xf2, 0x01, 0xf0, 0x0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v1); + asm volatile("vmsne.vv v1, v2, v3, v0.t"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x22, 0x22); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_16(v6, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, + 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v2); + asm volatile("vmsne.vv v2, v4, v6, v0.t"); + VSET(2, e8, m1); + VCMP_U8(6, v2, 0x22, 0x22); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_32(v12, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, + 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v4); + asm volatile("vmsne.vv v4, v8, v12, v0.t"); + VSET(2, e8, m1); + VCMP_U8(7, v4, 0x22, 0x22); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_64(v24, 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, 0x0100000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, + 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v8); + asm volatile("vmsne.vv v8, v16, v24, v0.t"); + VSET(2, e8, m8); + VCMP_U8(8, v8, 0x22, 0x22); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 0x00000000ffffffff; + + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VCLEAR(v1); + asm volatile("vmsne.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v1, 0xee, 0xee); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VCLEAR(v2); + asm volatile("vmsne.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(10, v2, 0xee, 0xee); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VCLEAR(v4); + asm volatile("vmsne.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(11, v4, 0xee, 0xee); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VCLEAR(v8); + asm volatile("vmsne.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(12, v8, 0xff, 0xff); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 0x00000000ffffffff; + + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v1); + asm volatile("vmsne.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v1, 0x00, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v2); + asm volatile("vmsne.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(14, v2, 0x00, 0x00); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v4); + asm volatile("vmsne.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(15, v4, 0x00, 0x00); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v8); + asm volatile("vmsne.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(16, v8, 0x10, 0x10); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, + 0x0f, 0x0f, 0x00, 0xf0, 0x0f); + VCLEAR(v1); + asm volatile("vmsne.vi v1, v2, 15"); + VSET(2, e8, m1); + VCMP_U8(17, v1, 0x66, 0x66); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f, + 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f); + VCLEAR(v2); + asm volatile("vmsne.vi v2, v4, 15"); + VSET(2, e8, m1); + VCMP_U8(18, v2, 0xee, 0xee); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x0000000f, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VCLEAR(v4); + asm volatile("vmsne.vi v4, v8, 15"); + VSET(2, e8, m1); + VCMP_U8(19, v4, 0xee, 0xee); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VCLEAR(v8); + asm volatile("vmsne.vi v8, v16, 15"); + VSET(2, e8, m8); + VCMP_U8(20, v8, 0xee, 0xee); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, + 0x0f, 0x0f, 0x00, 0xf0, 0x0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v1); + asm volatile("vmsne.vi v1, v2, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(21, v1, 0x00, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f, + 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v2); + asm volatile("vmsne.vi v2, v4, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(22, v2, 0x00, 0x00); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x0000000f, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v4); + asm volatile("vmsne.vi v4, v8, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(23, v4, 0x00, 0x00); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v8); + asm volatile("vmsne.vi v8, v16, 15, v0.t"); + VSET(2, e8, m8); + VCMP_U8(24, v8, 0x00, 0x00); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsof.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsof.c new file mode 100644 index 000000000..b5dc5aae1 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsof.c @@ -0,0 +1,33 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v3, 8, 0, 0, 0, 0, 0, 0, 0); + __asm__ volatile("vmsof.m v2, v3"); + VCMP_U8(1, v2, 8, 0, 0, 0, 0, 0, 0, 0); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v3, 0, 0, 0, 1, 0, 0, 0, 0); + VLOAD_8(v0, 3, 0, 0, 0, 0, 0, 0, 0); + VCLEAR(v2); + __asm__ volatile("vmsof.m v2, v3, v0.t"); + VCMP_U8(2, v2, 0, 0, 0, 0, 0, 0, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmul.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmul.c new file mode 100644 index 000000000..0ed9ce5cd --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmul.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xb3, 0x5d, 0x3d, 0xa4, 0xbf, 0xc7, 0x6b, 0x95, 0xf9, 0x64, 0x52, + 0x57, 0xbc, 0x1f, 0xd5, 0x13); + VLOAD_8(v3, 0x46, 0x37, 0xf5, 0x2b, 0x55, 0x05, 0xcb, 0x76, 0x31, 0x30, 0x78, + 0xb3, 0x6a, 0xae, 0x5a, 0xe1); + asm volatile("vmul.vv v1, v2, v3"); + VCMP_I8(1, v1, 0xf2, 0xfb, 0x61, 0x8c, 0x6b, 0xe3, 0xd9, 0xae, 0xa9, 0xc0, + 0x70, 0xd5, 0xd8, 0x12, 0xe2, 0xb3); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x8132, 0x94b5, 0x245c, 0xd15d, 0xbfca, 0x18b6, 0xd5ba, 0x9299, + 0xccbd, 0x9ad4, 0xce46, 0xfbba, 0x10cc, 0xc463, 0x5298, 0x7b3e); + VLOAD_16(v6, 0xede6, 0x010b, 0xa570, 0x21c5, 0xfe5a, 0x5386, 0x16c9, 0x45fb, + 0x1439, 0x436f, 0x6f56, 0x90f7, 0x77c0, 0x0751, 0x64c3, 0x36e8); + asm volatile("vmul.vv v2, v4, v6"); + VCMP_I16(2, v2, 0x5cec, 0x18c7, 0x3440, 0x1991, 0xd904, 0xf144, 0xcb0a, + 0xf903, 0x5a15, 0x9dec, 0xa584, 0x8076, 0x6d00, 0xd853, 0x49c8, + 0xc430); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x9c2bdc58, 0xe3995899, 0xbbbc0eda, 0x0729ff92, 0xa757a2c5, + 0xd5f3a23e, 0x9a295390, 0xb2367b2e, 0xfee5b6a2, 0x07cb59c1, + 0x6bf5cf9a, 0x7d75506b, 0x013c1e90, 0x600f9ca8, 0x6d4d0522, + 0x0a225ff0); + VLOAD_32(v12, 0xc51e02f8, 0xae06b334, 0x397b1ec7, 0xc46f34fe, 0x4f9db2ab, + 0x957c2534, 0x70f8e127, 0xa79ebcec, 0x0a542044, 0x20e6ac3e, + 0xd61caed7, 0x6f4e7820, 0x27c56901, 0x0aaf1d61, 0xa95c6f5c, + 0x5b7aedf3); + asm volatile("vmul.vv v4, v8, v12"); + VCMP_I32(3, v4, 0x2c862540, 0x85aefa14, 0xa5ab1776, 0x3be33adc, 0x5487b397, + 0x57f7ea98, 0xc23d4af0, 0x55135668, 0xad00c308, 0x46f368be, + 0x2f640656, 0x91f63560, 0x1e952e90, 0xd18163a8, 0xf71f9638, + 0x79d240d0); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x077de9270ce00632, 0x2dadf4e020f3d47a, 0xb54ca84f9fe0573b, + 0x7be639dfdb02db6f, 0x61bb44569da93eff, 0xcd7f973ce822182b, + 0x5434a22e7432397c, 0xcaadfd89d8dd1ad5, 0x5474c56d9089d672, + 0x700e415c07b99bf9, 0xb89d409d4323a9c8, 0x68ccc7411db0ab09, + 0xdf4fe3fa4e113e98, 0xa98a2e2575b04c41, 0x26ddf248ccb4a7aa, + 0xdda1822d4972ee47); + VLOAD_64(v24, 0xcd2888c8bb07b736, 0xf20013627ac47d4f, 0xc05dbcd989ef329c, + 0xc272db2ebcf7cfad, 0x8869302db041176d, 0xd3d90968a9ee01c2, + 0x9cdb9f91a3381f51, 0x99ad38b70907ee8d, 0xf7d629b266c67cf1, + 0x706f9b996cdd60f2, 0x4caa2335622bd6a0, 0x94171c9dfbbb186f, + 0x7b6e42290f54ecc6, 0xa545b8670143bfbc, 0x9f430bf94b2805c9, + 0xb45030fc2b4cef12); + asm volatile("vmul.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x90d27e278d0d0c8c, 0x5ea9d3e60b6623a6, 0x6823b3e240d3adf4, + 0xc0dcea378c760b03, 0x17692726a477bb93, 0x784c7f2ee6e87b96, + 0xd1aae9975ffa343c, 0xfdcd46ca398ccd51, 0x405f01791dce1952, + 0x16063fbe99e7d162, 0xc9d244cddacf4d00, 0x22024848323600e7, + 0xb6dfea3bb8ea8990, 0x566db9e82c5f7ebc, 0x5fc0f2db41adf67a, + 0xdaab68ca209d09fe); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xb3, 0x5d, 0x3d, 0xa4, 0xbf, 0xc7, 0x6b, 0x95, 0xf9, 0x64, 0x52, + 0x57, 0xbc, 0x1f, 0xd5, 0x13); + VLOAD_8(v3, 0x46, 0x37, 0xf5, 0x2b, 0x55, 0x05, 0xcb, 0x76, 0x31, 0x30, 0x78, + 0xb3, 0x6a, 0xae, 0x5a, 0xe1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmul.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0xfb, 0, 0x8c, 0, 0xe3, 0, 0xae, 0, 0xc0, 0, 0xd5, 0, 0x12, + 0, 0xb3); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x8132, 0x94b5, 0x245c, 0xd15d, 0xbfca, 0x18b6, 0xd5ba, 0x9299, + 0xccbd, 0x9ad4, 0xce46, 0xfbba, 0x10cc, 0xc463, 0x5298, 0x7b3e); + VLOAD_16(v6, 0xede6, 0x010b, 0xa570, 0x21c5, 0xfe5a, 0x5386, 0x16c9, 0x45fb, + 0x1439, 0x436f, 0x6f56, 0x90f7, 0x77c0, 0x0751, 0x64c3, 0x36e8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmul.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0x18c7, 0, 0x1991, 0, 0xf144, 0, 0xf903, 0, 0x9dec, 0, + 0x8076, 0, 0xd853, 0, 0xc430); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x9c2bdc58, 0xe3995899, 0xbbbc0eda, 0x0729ff92, 0xa757a2c5, + 0xd5f3a23e, 0x9a295390, 0xb2367b2e, 0xfee5b6a2, 0x07cb59c1, + 0x6bf5cf9a, 0x7d75506b, 0x013c1e90, 0x600f9ca8, 0x6d4d0522, + 0x0a225ff0); + VLOAD_32(v12, 0xc51e02f8, 0xae06b334, 0x397b1ec7, 0xc46f34fe, 0x4f9db2ab, + 0x957c2534, 0x70f8e127, 0xa79ebcec, 0x0a542044, 0x20e6ac3e, + 0xd61caed7, 0x6f4e7820, 0x27c56901, 0x0aaf1d61, 0xa95c6f5c, + 0x5b7aedf3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmul.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0x85aefa14, 0, 0x3be33adc, 0, 0x57f7ea98, 0, 0x55135668, 0, + 0x46f368be, 0, 0x91f63560, 0, 0xd18163a8, 0, 0x79d240d0); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x077de9270ce00632, 0x2dadf4e020f3d47a, 0xb54ca84f9fe0573b, + 0x7be639dfdb02db6f, 0x61bb44569da93eff, 0xcd7f973ce822182b, + 0x5434a22e7432397c, 0xcaadfd89d8dd1ad5, 0x5474c56d9089d672, + 0x700e415c07b99bf9, 0xb89d409d4323a9c8, 0x68ccc7411db0ab09, + 0xdf4fe3fa4e113e98, 0xa98a2e2575b04c41, 0x26ddf248ccb4a7aa, + 0xdda1822d4972ee47); + VLOAD_64(v24, 0xcd2888c8bb07b736, 0xf20013627ac47d4f, 0xc05dbcd989ef329c, + 0xc272db2ebcf7cfad, 0x8869302db041176d, 0xd3d90968a9ee01c2, + 0x9cdb9f91a3381f51, 0x99ad38b70907ee8d, 0xf7d629b266c67cf1, + 0x706f9b996cdd60f2, 0x4caa2335622bd6a0, 0x94171c9dfbbb186f, + 0x7b6e42290f54ecc6, 0xa545b8670143bfbc, 0x9f430bf94b2805c9, + 0xb45030fc2b4cef12); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmul.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0x5ea9d3e60b6623a6, 0, 0xc0dcea378c760b03, 0, + 0x784c7f2ee6e87b96, 0, 0xfdcd46ca398ccd51, 0, 0x16063fbe99e7d162, 0, + 0x22024848323600e7, 0, 0x566db9e82c5f7ebc, 0, 0xdaab68ca209d09fe); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x80, 0xb2, 0xb6, 0xd7, 0x4f, 0xbe, 0xee, 0x53, 0xab, 0x57, 0xe4, + 0x28, 0x6a, 0x91, 0x14, 0x4f); + int64_t scalar = 5; + asm volatile("vmul.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0x80, 0x7a, 0x8e, 0x33, 0x8b, 0xb6, 0xa6, 0x9f, 0x57, 0xb3, + 0x74, 0xc8, 0x12, 0xd5, 0x64, 0x8b); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xbab0, 0x83a5, 0x06b6, 0x22ba, 0x91b8, 0x7720, 0xc2c7, 0x3494, + 0xd281, 0x6d38, 0x378d, 0xa91d, 0xd731, 0xa4c7, 0x4d8f, 0x2422); + scalar = -5383; + asm volatile("vmul.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0x7530, 0xdd7d, 0xe306, 0xcaea, 0xebf8, 0x1e20, 0x598f, + 0x6bf4, 0xa979, 0x6b78, 0xea25, 0xff35, 0x18a9, 0x2b8f, 0x2617, + 0x3912); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x8bb4a8bc, 0x9799b344, 0xcd8c1672, 0xeb2d7c0f, 0x55474d7d, + 0x3dae9eaf, 0xc19a3519, 0x6922f03c, 0x42edfa01, 0x1f60b344, + 0x82f31d5e, 0x0faa2e5c, 0x74e95cfa, 0x9fcdae3b, 0xe6c4e0a0, + 0x45549cbc); + scalar = 6474219; + asm volatile("vmul.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0xaf4a8094, 0x77dff36c, 0x44dc1ca6, 0x16e6a8c5, 0xee2546bf, + 0x78e111a5, 0x1fd15ef3, 0xe8a9a314, 0xfe2147eb, 0x5a8cf36c, + 0x5536c34a, 0xbed6ca74, 0x23eca37e, 0xe2314329, 0x6857d2e0, + 0x13b37c94); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xc238e0a3df21299c, 0xb642655c3ab064d5, 0xd19f84bab77e5602, + 0x4e6e3c114a19f160, 0xfd403cbcc59407a1, 0xef3e81a68ae0e48c, + 0xd93a7b1ab54d024e, 0x5f7460aa9f4c4920, 0x4c91150cd4b54f60, + 0x18f2a6528629633f, 0x201b8bdb3c140400, 0x6be03c1074d46ada, + 0xcd0e6874555602d4, 0xb70264bd366ff52f, 0xc0b1fa64cec9368d, + 0x13e86249a0235941); + scalar = -598189234597999223; + asm volatile("vmul.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0x61ead1213f09307c, 0x7d03f4c84c5e86fd, 0x4aa0acc4e01fa112, + 0x77bc957fdeec0c60, 0x762b14c112e60229, 0xbac65562e2366aec, + 0xcc243dd1e80ab1be, 0xa871135122a1c220, 0x3d0db00992575a60, + 0x8b07763affcd8fb7, 0xc8377fc475ea2400, 0xd8827c839711c0aa, + 0x5a5779ec4f334774, 0x243fd844e74ed927, 0x49204b7602871a75, + 0x33ae5ea6d335d0c9); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x80, 0xb2, 0xb6, 0xd7, 0x4f, 0xbe, 0xee, 0x53, 0xab, 0x57, 0xe4, + 0x28, 0x6a, 0x91, 0x14, 0x4f); + int64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmul.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0x7a, 0, 0x33, 0, 0xb6, 0, 0x9f, 0, 0xb3, 0, 0xc8, 0, 0xd5, + 0, 0x8b); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xbab0, 0x83a5, 0x06b6, 0x22ba, 0x91b8, 0x7720, 0xc2c7, 0x3494, + 0xd281, 0x6d38, 0x378d, 0xa91d, 0xd731, 0xa4c7, 0x4d8f, 0x2422); + scalar = -5383; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmul.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0xdd7d, 0, 0xcaea, 0, 0x1e20, 0, 0x6bf4, 0, 0x6b78, 0, + 0xff35, 0, 0x2b8f, 0, 0x3912); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x8bb4a8bc, 0x9799b344, 0xcd8c1672, 0xeb2d7c0f, 0x55474d7d, + 0x3dae9eaf, 0xc19a3519, 0x6922f03c, 0x42edfa01, 0x1f60b344, + 0x82f31d5e, 0x0faa2e5c, 0x74e95cfa, 0x9fcdae3b, 0xe6c4e0a0, + 0x45549cbc); + scalar = 6474219; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmul.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0x77dff36c, 0, 0x16e6a8c5, 0, 0x78e111a5, 0, 0xe8a9a314, + 0, 0x5a8cf36c, 0, 0xbed6ca74, 0, 0xe2314329, 0, 0x13b37c94); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xc238e0a3df21299c, 0xb642655c3ab064d5, 0xd19f84bab77e5602, + 0x4e6e3c114a19f160, 0xfd403cbcc59407a1, 0xef3e81a68ae0e48c, + 0xd93a7b1ab54d024e, 0x5f7460aa9f4c4920, 0x4c91150cd4b54f60, + 0x18f2a6528629633f, 0x201b8bdb3c140400, 0x6be03c1074d46ada, + 0xcd0e6874555602d4, 0xb70264bd366ff52f, 0xc0b1fa64cec9368d, + 0x13e86249a0235941); + scalar = -598189234597999223; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmul.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0x7d03f4c84c5e86fd, 0, 0x77bc957fdeec0c60, 0, + 0xbac65562e2366aec, 0, 0xa871135122a1c220, 0, 0x8b07763affcd8fb7, 0, + 0xd8827c839711c0aa, 0, 0x243fd844e74ed927, 0, 0x33ae5ea6d335d0c9); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulh.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulh.c new file mode 100644 index 000000000..5ee5b94ef --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulh.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xc2, 0xf6, 0xad, 0x0f, 0xc6, 0xeb, 0xca, 0xf3, 0xf3, 0xd9, 0xf4, + 0xf6, 0x27, 0x57, 0x4f, 0xef); + VLOAD_8(v3, 0xf9, 0x0c, 0xa8, 0x05, 0x23, 0xff, 0x48, 0x74, 0xd4, 0x6b, 0x5b, + 0x07, 0x8b, 0x2e, 0x9e, 0x5f); + asm volatile("vmulh.vv v1, v2, v3"); + VCMP_I8(1, v1, 0x01, 0xff, 0x1c, 0x00, 0xf8, 0x00, 0xf0, 0xfa, 0x02, 0xef, + 0xfb, 0xff, 0xee, 0x0f, 0xe1, 0xf9); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x911a, 0x9f44, 0x3d2a, 0xa2a8, 0x5aae, 0x0231, 0x56fa, 0xb8b8, + 0x55df, 0x8a78, 0x413c, 0xeb32, 0x6bc4, 0x3e47, 0x3d79, 0x2c8f); + VLOAD_16(v6, 0x89fd, 0x6bb7, 0x4a94, 0x770c, 0x7c87, 0x8b01, 0xbb6a, 0x900d, + 0xb589, 0x709e, 0xc75d, 0xafa5, 0x4fd5, 0x2d77, 0x8dbf, 0x3a0a); + asm volatile("vmulh.vv v2, v4, v6"); + VCMP_I16(2, v2, 0x331f, 0xd74c, 0x11d1, 0xd497, 0x2c1c, 0xfeff, 0xe8b2, + 0x1f2b, 0xe705, 0xcc4b, 0xf191, 0x0687, 0x219b, 0x0b0f, 0xe490, + 0x0a1a); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xc66253f4, 0x710c314c, 0xa6fe579b, 0xa7947b70, 0xbf94259f, + 0x211088fe, 0x64bfd390, 0x1d49c8d8, 0x7a12a08a, 0x190ee9ae, + 0x361172f8, 0x52457515, 0x05d4b17b, 0x7bb6d43b, 0x96270cc7, + 0x62d35f88); + VLOAD_32(v12, 0xd14a266e, 0xe4f43ca5, 0x1c067312, 0xa1909d51, 0x35b8d1aa, + 0xdcd3e2ea, 0x05cec46d, 0xbe70ebd4, 0xe15e49c5, 0x81be068b, + 0x49fd9ad8, 0x6c2a5abd, 0x26216dd6, 0x9e3188ac, 0x14af13c4, + 0xd98c6d7f); + asm volatile("vmulh.vv v4, v8, v12"); + VCMP_I32(3, v4, 0x0a83425c, 0xf40e8502, 0xf6419389, 0x209df360, 0xf27b2982, + 0xfb750aac, 0x02491ecb, 0xf87fe57b, 0xf164b493, 0xf3a433c8, + 0x0fa089bb, 0x22c2e9f3, 0x00de5543, 0xd0bbf2cc, 0xf772a985, + 0xf128024f); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x9def74822cdf1a42, 0x29307e854b225449, 0x071cdf51785d150e, + 0xe8ced2e9009d363f, 0xa88c741be4e81893, 0x4a7655ec12afe587, + 0x50c5efa017138cb9, 0x88e076b6ef49619d, 0x5745683769adf333, + 0x5b3b01f4b1c4fd42, 0x8a3d55e48864d144, 0x2eac97fae4174cac, + 0xdb8804ccf6f55686, 0xf7bea87bac575241, 0x250ed7ddade1432d, + 0x06ae542295f32453); + VLOAD_64(v24, 0xc5c41e47b5f3de5f, 0xa03833fb95a7e7e2, 0x74f0573dba05b058, + 0x687968e9ba2a98ad, 0x29f4aaf3e5e4f2b6, 0x2c40a650d53f6f08, + 0x491da2c816388b78, 0x2822d8207421ec15, 0x5dd8d394b292512a, + 0x4169844eea56920d, 0x97183b6e1e85fd70, 0x224077bf8899614c, + 0x3a9c0520417d4f32, 0xee47b09a33f49fca, 0x3f9f1140fbd02e0a, + 0x6106ad88eabfc3e2); + asm volatile("vmulh.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x164eafe1cab0639c, 0xf096db86d4d06824, 0x033fc2aecddc0dd7, + 0xf68905ef31703000, 0xf1aaea11162383ae, 0x0cdf24ba4cf3be38, + 0x1711cb1d2f008de9, 0xed52dbcaa3de5ca2, 0x1ffe218cf60b6bf9, + 0x174f95d97aff7bf9, 0x3041b22ecc97909a, 0x063ead2a7756c9da, + 0xf7a6973e6c9e5ce6, 0x0092485623082173, 0x0935add704f8cec8, + 0x02883a7e75391040); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xc2, 0xf6, 0xad, 0x0f, 0xc6, 0xeb, 0xca, 0xf3, 0xf3, 0xd9, 0xf4, + 0xf6, 0x27, 0x57, 0x4f, 0xef); + VLOAD_8(v3, 0xf9, 0x0c, 0xa8, 0x05, 0x23, 0xff, 0x48, 0x74, 0xd4, 0x6b, 0x5b, + 0x07, 0x8b, 0x2e, 0x9e, 0x5f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmulh.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0xff, 0, 0x00, 0, 0x00, 0, 0xfa, 0, 0xef, 0, 0xff, 0, 0x0f, + 0, 0xf9); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x911a, 0x9f44, 0x3d2a, 0xa2a8, 0x5aae, 0x0231, 0x56fa, 0xb8b8, + 0x55df, 0x8a78, 0x413c, 0xeb32, 0x6bc4, 0x3e47, 0x3d79, 0x2c8f); + VLOAD_16(v6, 0x89fd, 0x6bb7, 0x4a94, 0x770c, 0x7c87, 0x8b01, 0xbb6a, 0x900d, + 0xb589, 0x709e, 0xc75d, 0xafa5, 0x4fd5, 0x2d77, 0x8dbf, 0x3a0a); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmulh.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0xd74c, 0, 0xd497, 0, 0xfeff, 0, 0x1f2b, 0, 0xcc4b, 0, + 0x0687, 0, 0x0b0f, 0, 0x0a1a); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xc66253f4, 0x710c314c, 0xa6fe579b, 0xa7947b70, 0xbf94259f, + 0x211088fe, 0x64bfd390, 0x1d49c8d8, 0x7a12a08a, 0x190ee9ae, + 0x361172f8, 0x52457515, 0x05d4b17b, 0x7bb6d43b, 0x96270cc7, + 0x62d35f88); + VLOAD_32(v12, 0xd14a266e, 0xe4f43ca5, 0x1c067312, 0xa1909d51, 0x35b8d1aa, + 0xdcd3e2ea, 0x05cec46d, 0xbe70ebd4, 0xe15e49c5, 0x81be068b, + 0x49fd9ad8, 0x6c2a5abd, 0x26216dd6, 0x9e3188ac, 0x14af13c4, + 0xd98c6d7f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmulh.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0xf40e8502, 0, 0x209df360, 0, 0xfb750aac, 0, 0xf87fe57b, 0, + 0xf3a433c8, 0, 0x22c2e9f3, 0, 0xd0bbf2cc, 0, 0xf128024f); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x9def74822cdf1a42, 0x29307e854b225449, 0x071cdf51785d150e, + 0xe8ced2e9009d363f, 0xa88c741be4e81893, 0x4a7655ec12afe587, + 0x50c5efa017138cb9, 0x88e076b6ef49619d, 0x5745683769adf333, + 0x5b3b01f4b1c4fd42, 0x8a3d55e48864d144, 0x2eac97fae4174cac, + 0xdb8804ccf6f55686, 0xf7bea87bac575241, 0x250ed7ddade1432d, + 0x06ae542295f32453); + VLOAD_64(v24, 0xc5c41e47b5f3de5f, 0xa03833fb95a7e7e2, 0x74f0573dba05b058, + 0x687968e9ba2a98ad, 0x29f4aaf3e5e4f2b6, 0x2c40a650d53f6f08, + 0x491da2c816388b78, 0x2822d8207421ec15, 0x5dd8d394b292512a, + 0x4169844eea56920d, 0x97183b6e1e85fd70, 0x224077bf8899614c, + 0x3a9c0520417d4f32, 0xee47b09a33f49fca, 0x3f9f1140fbd02e0a, + 0x6106ad88eabfc3e2); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmulh.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0xf096db86d4d06824, 0, 0xf68905ef31703000, 0, + 0x0cdf24ba4cf3be38, 0, 0xed52dbcaa3de5ca2, 0, 0x174f95d97aff7bf9, 0, + 0x063ead2a7756c9da, 0, 0x0092485623082173, 0, 0x02883a7e75391040); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x92, 0xce, 0xdd, 0x64, 0x60, 0x29, 0xa6, 0xd5, 0x07, 0x8c, 0x71, + 0x94, 0x95, 0xf6, 0xd4, 0xbd); + int64_t scalar = 5; + asm volatile("vmulh.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0xfd, 0xff, 0xff, 0x01, 0x01, 0x00, 0xfe, 0xff, 0x00, 0xfd, + 0x02, 0xfd, 0xfd, 0xff, 0xff, 0xfe); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x3b9b, 0x7758, 0x030f, 0x9f60, 0x13e2, 0x8f0d, 0xfc9d, 0x3922, + 0x3a43, 0x58b5, 0xb9e9, 0xa4e8, 0x4bac, 0x5636, 0x9f4a, 0xbd52); + scalar = -5383; + asm volatile("vmulh.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0xfb1a, 0xf632, 0xffbf, 0x07ef, 0xfe5d, 0x0947, 0x0047, + 0xfb4e, 0xfb36, 0xf8b6, 0x05c1, 0x077b, 0xf9c8, 0xf8eb, 0x07f1, + 0x057a); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x7efca225, 0xdbc0a9ca, 0x0cf02cf8, 0xc19bdc84, 0x7fa3ca90, + 0x3d878c29, 0x15809928, 0x7b0b7421, 0x48b872f5, 0xafbfeab4, + 0xe79dc9ba, 0xe60a8fc0, 0x1fd7e866, 0xed7df17c, 0x0684a7ee, + 0xb2b01a61); + scalar = 6474219; + asm volatile("vmulh.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0x003100de, 0xfff20329, 0x0004fe25, 0xffe7ec74, 0x00314160, + 0x0017be6b, 0x00084c30, 0x002f7b6f, 0x001c0ff7, 0xffe1082b, + 0xfff6972b, 0xfff5fb91, 0x000c49c4, 0xfff8db9d, 0x000283ec, + 0xffe22a6f); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x42e9b386e7453715, 0xd6aae3fda4b2f3e8, 0xfcbec1ad7996a0b2, + 0x4fcb68f516b589c9, 0x414b0eeb29c35e62, 0x038c6221829f4241, + 0xf2c2c11f26e326b0, 0xed9ad0ce4d50a009, 0xc57105a57d435897, + 0x90c1615935c1abd0, 0xf5b41f9a0a988065, 0xb09790bdcbecee7b, + 0x1d9da4f87df33b54, 0xe347aadb53bdc879, 0x7a39a7269cbae2a7, + 0x422ed2952e246f26); + scalar = -598189234597999223; + asm volatile("vmulh.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0xfdd4850b300f6008, 0x01571f899f226d57, 0x001b0534decdc9a2, + 0xfd6994f8de6e51aa, 0xfde1f73873e6758a, 0xffe28b043b9b8971, + 0x006de7f819baba3d, 0x0098b57f65f599e1, 0x01e62040839e971b, + 0x039b807e6f36fd81, 0x005579d7e0c206af, 0x0293356120e5cee9, + 0xff0a24a69d9af87b, 0x00ee6bb505683322, 0xfc095797a6198143, + 0xfdda946cdb2f169c); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x92, 0xce, 0xdd, 0x64, 0x60, 0x29, 0xa6, 0xd5, 0x07, 0x8c, 0x71, + 0x94, 0x95, 0xf6, 0xd4, 0xbd); + int64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmulh.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0xff, 0, 0x01, 0, 0x00, 0, 0xff, 0, 0xfd, 0, 0xfd, 0, 0xff, + 0, 0xfe); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x3b9b, 0x7758, 0x030f, 0x9f60, 0x13e2, 0x8f0d, 0xfc9d, 0x3922, + 0x3a43, 0x58b5, 0xb9e9, 0xa4e8, 0x4bac, 0x5636, 0x9f4a, 0xbd52); + scalar = -5383; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmulh.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0xf632, 0, 0x07ef, 0, 0x0947, 0, 0xfb4e, 0, 0xf8b6, 0, + 0x077b, 0, 0xf8eb, 0, 0x057a); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x7efca225, 0xdbc0a9ca, 0x0cf02cf8, 0xc19bdc84, 0x7fa3ca90, + 0x3d878c29, 0x15809928, 0x7b0b7421, 0x48b872f5, 0xafbfeab4, + 0xe79dc9ba, 0xe60a8fc0, 0x1fd7e866, 0xed7df17c, 0x0684a7ee, + 0xb2b01a61); + scalar = 6474219; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmulh.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0xfff20329, 0, 0xffe7ec74, 0, 0x0017be6b, 0, 0x002f7b6f, + 0, 0xffe1082b, 0, 0xfff5fb91, 0, 0xfff8db9d, 0, 0xffe22a6f); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x42e9b386e7453715, 0xd6aae3fda4b2f3e8, 0xfcbec1ad7996a0b2, + 0x4fcb68f516b589c9, 0x414b0eeb29c35e62, 0x038c6221829f4241, + 0xf2c2c11f26e326b0, 0xed9ad0ce4d50a009, 0xc57105a57d435897, + 0x90c1615935c1abd0, 0xf5b41f9a0a988065, 0xb09790bdcbecee7b, + 0x1d9da4f87df33b54, 0xe347aadb53bdc879, 0x7a39a7269cbae2a7, + 0x422ed2952e246f26); + scalar = -598189234597999223; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmulh.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0x01571f899f226d57, 0, 0xfd6994f8de6e51aa, 0, + 0xffe28b043b9b8971, 0, 0x0098b57f65f599e1, 0, 0x039b807e6f36fd81, 0, + 0x0293356120e5cee9, 0, 0x00ee6bb505683322, 0, 0xfdda946cdb2f169c); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhsu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhsu.c new file mode 100644 index 000000000..1bc3cc448 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhsu.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x53, 0x4f, 0xde, 0xea, 0x47, 0x3c, 0x41, 0xf8, 0xd6, 0xd0, 0x93, + 0x35, 0xfc, 0x70, 0x33, 0xe4); + VLOAD_8(v3, 0xaa, 0x24, 0xaa, 0xde, 0x92, 0x00, 0x7f, 0xe5, 0xb3, 0xf8, 0xa0, + 0xa8, 0xbb, 0xc6, 0x65, 0x81); + asm volatile("vmulhsu.vv v1, v2, v3"); + VCMP_I8(1, v1, 0x37, 0x0b, 0xe9, 0xec, 0x28, 0x00, 0x20, 0xf8, 0xe2, 0xd1, + 0xbb, 0x22, 0xfd, 0x56, 0x14, 0xf1); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x0f50, 0x47f9, 0xa4ca, 0xf94d, 0x720c, 0x444c, 0x3681, 0x96bd, + 0x5d37, 0xd64e, 0xe792, 0xdb64, 0xfaa6, 0xafe6, 0xf4e8, 0xe5ea); + VLOAD_16(v6, 0x7784, 0xa42e, 0x499b, 0x0c01, 0x9d2b, 0x600d, 0x2bbd, 0xcb41, + 0xdda1, 0xb5d7, 0xafbc, 0xc74f, 0xab45, 0x986f, 0xf0f2, 0xcf3c); + asm volatile("vmulhsu.vv v2, v4, v6"); + VCMP_I16(2, v2, 0x0726, 0x2e28, 0xe5c6, 0xffaf, 0x4604, 0x199f, 0x094f, + 0xac6d, 0x50b3, 0xe262, 0xef3a, 0xe37f, 0xfc6b, 0xd04d, 0xf58f, + 0xeae2); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xeded4bf3, 0xc9e27167, 0x4175509c, 0x80a3ae04, 0x9f1b2c07, + 0x87ea397b, 0x862e2800, 0x3cd09f37, 0x9a313d78, 0x596661ee, + 0x31f99717, 0x64e65802, 0xbd567027, 0xf7c459be, 0x57b6d9cd, + 0x94bc3eb4); + VLOAD_32(v12, 0xa147b233, 0x19880f3d, 0x8dd8815e, 0xbc318dca, 0x2c436b94, + 0x29ba4191, 0x637f89b7, 0xe39d7818, 0xf48ff2d6, 0xb1dc7c7e, + 0xfa5da298, 0x5c1aae36, 0x83e04069, 0xecf36c08, 0x40d2e3a3, + 0xe7468a97); + asm volatile("vmulhsu.vv v4, v8, v12"); + VCMP_I32(3, v4, 0xf49d2cff, 0xfa9a5a26, 0x2444f976, 0xa25f8c94, 0xef3f26f6, + 0xec6d24a0, 0xd0a728d5, 0x361265a6, 0x9ebdaf85, 0x3e1cc92b, + 0x30e004f5, 0x244d4baf, 0xdda8d640, 0xf8612ea2, 0x1635f870, + 0x9f184dfb); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x2b1f761d24dcff24, 0x1174fcea60fbf146, 0xaa5068c22e71489d, + 0x422ad458a7cbf321, 0x01e0f752e5d8bb37, 0xe7762162abff4c4c, + 0x36279dbbf009199d, 0x188dda33d835d9e4, 0xa0f5a67450e87d77, + 0xb43066649033e7ac, 0xb47ff6241cc77c2c, 0xfce0bafc1d36b615, + 0x045b90f3b63e0f7f, 0x514e5121be1f02e5, 0x06c9e97573723e47, + 0x406879d908a80b41); + VLOAD_64(v24, 0xd50adee8d491db29, 0xc7d423514dd58616, 0x5b22f7a3971e17f9, + 0xb9ad8b0339e659cd, 0x5af15755f3954b0f, 0x6b2fb3e49bd48e69, + 0x084244757fba5561, 0xf2d5b41ee89411fa, 0x8585111aaee16c07, + 0xcd1a427644b0ad59, 0x2356738fd6b04f3a, 0x89d936a76f0a518a, + 0x5f2df66443ff24b3, 0x6cbfcf273c43ae6b, 0xabb59d9f05a03eef, + 0xb84832df19fc19b6); + asm volatile("vmulhsu.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x23e3020d5d8e40d8, 0x0da067e42d62fa2a, 0xe17ee107c3fdd97f, + 0x2ffdce53a7ef7aa4, 0x00aadc600f6180bd, 0xf5b9cd660e9f294b, + 0x01bf419feafa3fe5, 0x174a979243e0945b, 0xce6e38c0508aba17, + 0xc342fb3a620dde75, 0xf593ff8eafcca075, 0xfe519de4c807844e, + 0x019ec3149daf2fc0, 0x2289f5738e0e6d23, 0x048dafe18fe3288b, + 0x2e5d41c2cc9b604f); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x53, 0x4f, 0xde, 0xea, 0x47, 0x3c, 0x41, 0xf8, 0xd6, 0xd0, 0x93, + 0x35, 0xfc, 0x70, 0x33, 0xe4); + VLOAD_8(v3, 0xaa, 0x24, 0xaa, 0xde, 0x92, 0x00, 0x7f, 0xe5, 0xb3, 0xf8, 0xa0, + 0xa8, 0xbb, 0xc6, 0x65, 0x81); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmulhsu.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0x0b, 0, 0xec, 0, 0x00, 0, 0xf8, 0, 0xd1, 0, 0x22, 0, 0x56, + 0, 0xf1); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x0f50, 0x47f9, 0xa4ca, 0xf94d, 0x720c, 0x444c, 0x3681, 0x96bd, + 0x5d37, 0xd64e, 0xe792, 0xdb64, 0xfaa6, 0xafe6, 0xf4e8, 0xe5ea); + VLOAD_16(v6, 0x7784, 0xa42e, 0x499b, 0x0c01, 0x9d2b, 0x600d, 0x2bbd, 0xcb41, + 0xdda1, 0xb5d7, 0xafbc, 0xc74f, 0xab45, 0x986f, 0xf0f2, 0xcf3c); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmulhsu.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0x2e28, 0, 0xffaf, 0, 0x199f, 0, 0xac6d, 0, 0xe262, 0, + 0xe37f, 0, 0xd04d, 0, 0xeae2); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xeded4bf3, 0xc9e27167, 0x4175509c, 0x80a3ae04, 0x9f1b2c07, + 0x87ea397b, 0x862e2800, 0x3cd09f37, 0x9a313d78, 0x596661ee, + 0x31f99717, 0x64e65802, 0xbd567027, 0xf7c459be, 0x57b6d9cd, + 0x94bc3eb4); + VLOAD_32(v12, 0xa147b233, 0x19880f3d, 0x8dd8815e, 0xbc318dca, 0x2c436b94, + 0x29ba4191, 0x637f89b7, 0xe39d7818, 0xf48ff2d6, 0xb1dc7c7e, + 0xfa5da298, 0x5c1aae36, 0x83e04069, 0xecf36c08, 0x40d2e3a3, + 0xe7468a97); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmulhsu.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0xfa9a5a26, 0, 0xa25f8c94, 0, 0xec6d24a0, 0, 0x361265a6, 0, + 0x3e1cc92b, 0, 0x244d4baf, 0, 0xf8612ea2, 0, 0x9f184dfb); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x2b1f761d24dcff24, 0x1174fcea60fbf146, 0xaa5068c22e71489d, + 0x422ad458a7cbf321, 0x01e0f752e5d8bb37, 0xe7762162abff4c4c, + 0x36279dbbf009199d, 0x188dda33d835d9e4, 0xa0f5a67450e87d77, + 0xb43066649033e7ac, 0xb47ff6241cc77c2c, 0xfce0bafc1d36b615, + 0x045b90f3b63e0f7f, 0x514e5121be1f02e5, 0x06c9e97573723e47, + 0x406879d908a80b41); + VLOAD_64(v24, 0xd50adee8d491db29, 0xc7d423514dd58616, 0x5b22f7a3971e17f9, + 0xb9ad8b0339e659cd, 0x5af15755f3954b0f, 0x6b2fb3e49bd48e69, + 0x084244757fba5561, 0xf2d5b41ee89411fa, 0x8585111aaee16c07, + 0xcd1a427644b0ad59, 0x2356738fd6b04f3a, 0x89d936a76f0a518a, + 0x5f2df66443ff24b3, 0x6cbfcf273c43ae6b, 0xabb59d9f05a03eef, + 0xb84832df19fc19b6); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmulhsu.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0x0da067e42d62fa2a, 0, 0x2ffdce53a7ef7aa4, 0, + 0xf5b9cd660e9f294b, 0, 0x174a979243e0945b, 0, 0xc342fb3a620dde75, 0, + 0xfe519de4c807844e, 0, 0x2289f5738e0e6d23, 0, 0x2e5d41c2cc9b604f); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x21, 0x87, 0xa0, 0xa8, 0x6a, 0x6f, 0x6a, 0x6b, 0x74, 0x99, 0x37, + 0xa4, 0xdc, 0x4f, 0xc3, 0x55); + uint64_t scalar = 5; + asm volatile("vmulhsu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0x00, 0xfd, 0xfe, 0xfe, 0x02, 0x02, 0x02, 0x02, 0x02, 0xfd, + 0x01, 0xfe, 0xff, 0x01, 0xfe, 0x01); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x62b8, 0xc1e3, 0xb151, 0x08ce, 0x06c4, 0x1d2f, 0x7448, 0xfcd5, + 0x398c, 0xb933, 0x436d, 0x748f, 0x58d9, 0x1cd6, 0x86db, 0x20f2); + scalar = 816; + asm volatile("vmulhsu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0x013a, 0xff3a, 0xff05, 0x001c, 0x0015, 0x005d, 0x0172, + 0xfff5, 0x00b7, 0xff1e, 0x00d6, 0x0173, 0x011b, 0x005b, 0xfe7d, + 0x0069); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xa7cac3f7, 0xb3894e05, 0xbac8e70b, 0x05479577, 0x19d8bf63, + 0xb952c1ad, 0x9eaa74c0, 0x9e38d5c8, 0x51c77b3b, 0xa5f44521, + 0x65042faa, 0x8e7e5345, 0x76ae481c, 0x0ab27b6f, 0xa388cf2b, + 0x58218f7f); + scalar = 7389998; + asm volatile("vmulhsu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0xffd92575, 0xffde51c5, 0xffe1831f, 0x00025357, 0x000b6288, + 0xffe0de52, 0xffd5205d, 0xffd4ee51, 0x0024059f, 0xffd85637, + 0x002c7ed9, 0xffce00ba, 0x003446bb, 0x0004b63d, 0xffd7455b, + 0x0026d1e0); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x43c5f863d4be9b51, 0x70b017b4c5d0d11e, 0x9e008a07f48796fe, + 0x6f0fa9a63860308a, 0x07b5d372a7be167c, 0x234be9472899553e, + 0x25655d82cb668037, 0x959d6233470780ee, 0xf3d683308326232a, + 0x7b6dddfcd86d6737, 0x02b8177716c29a3e, 0x11220f42ce0594b4, + 0x8382e0c79caa1e6c, 0x0d1593d36c1dc00e, 0x9f8eb889cc8e98c6, + 0x37411f40369680d2); + scalar = 321156886679781445; + asm volatile("vmulhsu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0x012e0fe6705cf26d, 0x01f63e6c65840868, 0xfe4b3a837bcf749f, + 0x01eefe6ad67c584e, 0x00225d3cec11ae29, 0x009d50942207fb0e, + 0x00a6abfb9cc735df, 0xfe25d8c13270b026, 0xffc9cb59c445c91a, + 0x02261e05ece3e474, 0x000c1e7198242fc2, 0x004c5c6f8fd9129e, + 0xfdd529022c30504c, 0x003a50e3baabab1e, 0xfe522930314d6d7d, + 0x00f6440fa9591c62); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x21, 0x87, 0xa0, 0xa8, 0x6a, 0x6f, 0x6a, 0x6b, 0x74, 0x99, 0x37, + 0xa4, 0xdc, 0x4f, 0xc3, 0x55); + uint64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmulhsu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0xfd, 0, 0xfe, 0, 0x02, 0, 0x02, 0, 0xfd, 0, 0xfe, 0, 0x01, + 0, 0x01); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x62b8, 0xc1e3, 0xb151, 0x08ce, 0x06c4, 0x1d2f, 0x7448, 0xfcd5, + 0x398c, 0xb933, 0x436d, 0x748f, 0x58d9, 0x1cd6, 0x86db, 0x20f2); + scalar = 816; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmulhsu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0xff3a, 0, 0x001c, 0, 0x005d, 0, 0xfff5, 0, 0xff1e, 0, + 0x0173, 0, 0x005b, 0, 0x0069); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xa7cac3f7, 0xb3894e05, 0xbac8e70b, 0x05479577, 0x19d8bf63, + 0xb952c1ad, 0x9eaa74c0, 0x9e38d5c8, 0x51c77b3b, 0xa5f44521, + 0x65042faa, 0x8e7e5345, 0x76ae481c, 0x0ab27b6f, 0xa388cf2b, + 0x58218f7f); + scalar = 7389998; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmulhsu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0xffde51c5, 0, 0x00025357, 0, 0xffe0de52, 0, 0xffd4ee51, + 0, 0xffd85637, 0, 0xffce00ba, 0, 0x0004b63d, 0, 0x0026d1e0); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x43c5f863d4be9b51, 0x70b017b4c5d0d11e, 0x9e008a07f48796fe, + 0x6f0fa9a63860308a, 0x07b5d372a7be167c, 0x234be9472899553e, + 0x25655d82cb668037, 0x959d6233470780ee, 0xf3d683308326232a, + 0x7b6dddfcd86d6737, 0x02b8177716c29a3e, 0x11220f42ce0594b4, + 0x8382e0c79caa1e6c, 0x0d1593d36c1dc00e, 0x9f8eb889cc8e98c6, + 0x37411f40369680d2); + scalar = 321156886679781445; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmulhsu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0x01f63e6c65840868, 0, 0x01eefe6ad67c584e, 0, + 0x009d50942207fb0e, 0, 0xfe25d8c13270b026, 0, 0x02261e05ece3e474, 0, + 0x004c5c6f8fd9129e, 0, 0x003a50e3baabab1e, 0, 0x00f6440fa9591c62); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhu.c new file mode 100644 index 000000000..bfcc137ea --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhu.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x3b, 0xb2, 0xd1, 0x3e, 0x39, 0x2c, 0x08, 0xc5, 0xbf, 0x54, 0x6c, + 0xde, 0x87, 0xcb, 0x7a, 0x83); + VLOAD_8(v3, 0x55, 0xde, 0xf4, 0x14, 0x3c, 0xed, 0x47, 0x1b, 0xca, 0x0b, 0xc4, + 0xe3, 0xd8, 0x8f, 0xa0, 0x0d); + asm volatile("vmulhu.vv v1, v2, v3"); + VCMP_U8(1, v1, 0x13, 0x9a, 0xc7, 0x04, 0x0d, 0x28, 0x02, 0x14, 0x96, 0x03, + 0x52, 0xc4, 0x71, 0x71, 0x4c, 0x06); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xe6e1, 0x02a1, 0x2911, 0xe3c3, 0xe141, 0x69e6, 0x4133, 0xf783, + 0x91ef, 0x1897, 0xf0bb, 0x0e07, 0xb8eb, 0x3f5a, 0x9f5d, 0xa626); + VLOAD_16(v6, 0x4fcb, 0x8a38, 0xbaa0, 0x8a97, 0xe409, 0x558e, 0x582b, 0x62b1, + 0xf7bb, 0x181f, 0x2b5a, 0xdf85, 0x44f3, 0x27fe, 0x8412, 0xcda0); + asm volatile("vmulhu.vv v2, v4, v6"); + VCMP_U16(2, v2, 0x47f6, 0x016b, 0x1df0, 0x7b4d, 0xc8a5, 0x2364, 0x1674, + 0x5f6b, 0x8d38, 0x0251, 0x28c4, 0x0c3f, 0x31cd, 0x09e5, 0x5237, + 0x8574); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xf129e694, 0x8dfc72a3, 0xc9911598, 0xd20083ec, 0xe7f36604, + 0x1ab510aa, 0xc290b86c, 0xa7e9a02e, 0x5c3f3bb3, 0x70a3dfae, + 0x16baad22, 0x21758cfb, 0x09033e60, 0x8b31075e, 0x6439b7bf, + 0xead33cf0); + VLOAD_32(v12, 0x3f2ef56d, 0x12649032, 0x6c0a880b, 0x7cb2477a, 0x41525037, + 0x02a39cfa, 0xf7595181, 0x0c230035, 0x86cf9ea9, 0x0f66ddd3, + 0x13351370, 0xbe489ce5, 0x4127f488, 0xe6b5e1b3, 0xc6918270, + 0xccc8626a); + asm volatile("vmulhu.vv v4, v8, v12"); + VCMP_U32(3, v4, 0x3b858c79, 0x0a3386a5, 0x55117fe4, 0x664a7ee4, 0x3b2f618b, + 0x00467bcb, 0xbbfd8432, 0x07f5e895, 0x3093e98c, 0x06c6dd00, + 0x01b49139, 0x18debc33, 0x024b3af0, 0x7d70f100, 0x4dbd9bdf, + 0xbbd823dc); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x4aee1e4f9775ff4b, 0x045a804c3d3e7dc0, 0x1a2f38060efcd306, + 0x34e09e5173ee6301, 0xd1f03c2e38769683, 0x1b1f454816d4ed10, + 0xed4a4f231da4abb3, 0xc87b025e6da277dc, 0x8da43ddf6feb6aae, + 0x7dcf10ced634db74, 0x736fd9583bc2de91, 0xa66de0036d350cbc, + 0x40bf5ec7afca9ec2, 0x5bb552a7b134ba79, 0x6ae5d02d7c121603, + 0x8a7621ad8d6f104a); + VLOAD_64(v24, 0x8f2c0088bea2739e, 0x4ed8c54dad60d3cb, 0x51e715e5cf56b2e6, + 0xa1b1262536ea3c57, 0x67f334468e5cde4f, 0x8ae5618164bd63fd, + 0x2f8be93c1d7807c3, 0x0444a9f4ccff2a2c, 0x6cac5e35bf847d59, + 0x1d92c5117b87a392, 0x124597d21d757a4e, 0x4ec5a9fb5b8a6591, + 0xb5b4189dd6080734, 0xf75ddacea0effff6, 0x5c3cb19fbc1c7580, + 0xff93a562f06d3641); + asm volatile("vmulhu.vv v8, v16, v24"); + VCMP_U64(4, v8, 0x29e7e403b1955330, 0x015742ce71e2c757, 0x08609392d9402e03, + 0x2165dabfb788d03d, 0x553f1a1e61409141, 0x0eb728a66479b5fb, + 0x2c125410c5448322, 0x0357b1cf05241ad9, 0x3c20a893e10635bb, + 0x0e8895d7f39e953c, 0x083d3ee38137c9b0, 0x3335fb506009220b, + 0x2df4dacbb013b6b0, 0x589d920140d7dd8c, 0x2683eed8bb77fa43, + 0x8a3b86d4dd8169cf); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x3b, 0xb2, 0xd1, 0x3e, 0x39, 0x2c, 0x08, 0xc5, 0xbf, 0x54, 0x6c, + 0xde, 0x87, 0xcb, 0x7a, 0x83); + VLOAD_8(v3, 0x55, 0xde, 0xf4, 0x14, 0x3c, 0xed, 0x47, 0x1b, 0xca, 0x0b, 0xc4, + 0xe3, 0xd8, 0x8f, 0xa0, 0x0d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmulhu.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0, 0x9a, 0, 0x04, 0, 0x28, 0, 0x14, 0, 0x03, 0, 0xc4, 0, 0x71, + 0, 0x06); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xe6e1, 0x02a1, 0x2911, 0xe3c3, 0xe141, 0x69e6, 0x4133, 0xf783, + 0x91ef, 0x1897, 0xf0bb, 0x0e07, 0xb8eb, 0x3f5a, 0x9f5d, 0xa626); + VLOAD_16(v6, 0x4fcb, 0x8a38, 0xbaa0, 0x8a97, 0xe409, 0x558e, 0x582b, 0x62b1, + 0xf7bb, 0x181f, 0x2b5a, 0xdf85, 0x44f3, 0x27fe, 0x8412, 0xcda0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmulhu.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0, 0x016b, 0, 0x7b4d, 0, 0x2364, 0, 0x5f6b, 0, 0x0251, 0, + 0x0c3f, 0, 0x09e5, 0, 0x8574); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xf129e694, 0x8dfc72a3, 0xc9911598, 0xd20083ec, 0xe7f36604, + 0x1ab510aa, 0xc290b86c, 0xa7e9a02e, 0x5c3f3bb3, 0x70a3dfae, + 0x16baad22, 0x21758cfb, 0x09033e60, 0x8b31075e, 0x6439b7bf, + 0xead33cf0); + VLOAD_32(v12, 0x3f2ef56d, 0x12649032, 0x6c0a880b, 0x7cb2477a, 0x41525037, + 0x02a39cfa, 0xf7595181, 0x0c230035, 0x86cf9ea9, 0x0f66ddd3, + 0x13351370, 0xbe489ce5, 0x4127f488, 0xe6b5e1b3, 0xc6918270, + 0xccc8626a); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmulhu.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0, 0x0a3386a5, 0, 0x664a7ee4, 0, 0x00467bcb, 0, 0x07f5e895, 0, + 0x06c6dd00, 0, 0x18debc33, 0, 0x7d70f100, 0, 0xbbd823dc); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x4aee1e4f9775ff4b, 0x045a804c3d3e7dc0, 0x1a2f38060efcd306, + 0x34e09e5173ee6301, 0xd1f03c2e38769683, 0x1b1f454816d4ed10, + 0xed4a4f231da4abb3, 0xc87b025e6da277dc, 0x8da43ddf6feb6aae, + 0x7dcf10ced634db74, 0x736fd9583bc2de91, 0xa66de0036d350cbc, + 0x40bf5ec7afca9ec2, 0x5bb552a7b134ba79, 0x6ae5d02d7c121603, + 0x8a7621ad8d6f104a); + VLOAD_64(v24, 0x8f2c0088bea2739e, 0x4ed8c54dad60d3cb, 0x51e715e5cf56b2e6, + 0xa1b1262536ea3c57, 0x67f334468e5cde4f, 0x8ae5618164bd63fd, + 0x2f8be93c1d7807c3, 0x0444a9f4ccff2a2c, 0x6cac5e35bf847d59, + 0x1d92c5117b87a392, 0x124597d21d757a4e, 0x4ec5a9fb5b8a6591, + 0xb5b4189dd6080734, 0xf75ddacea0effff6, 0x5c3cb19fbc1c7580, + 0xff93a562f06d3641); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmulhu.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0, 0x015742ce71e2c757, 0, 0x2165dabfb788d03d, 0, + 0x0eb728a66479b5fb, 0, 0x0357b1cf05241ad9, 0, 0x0e8895d7f39e953c, 0, + 0x3335fb506009220b, 0, 0x589d920140d7dd8c, 0, 0x8a3b86d4dd8169cf); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x5c, 0x3c, 0x86, 0x65, 0x41, 0x38, 0x20, 0x9e, 0x88, 0x28, 0x19, + 0xc2, 0x5f, 0xa3, 0x7c, 0xca); + uint64_t scalar = 5; + asm volatile("vmulhu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x00, 0x03, 0x02, 0x00, + 0x00, 0x03, 0x01, 0x03, 0x02, 0x03); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x4e7f, 0xfe41, 0x1346, 0x6c1a, 0x38ce, 0x5fa7, 0x5e39, 0xf7a2, + 0x61aa, 0x0a3a, 0xfe0a, 0x30f1, 0x5852, 0xbb6b, 0x42f7, 0x58d9); + scalar = 816; + asm volatile("vmulhu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0x00fa, 0x032a, 0x003d, 0x0158, 0x00b5, 0x0130, 0x012c, + 0x0315, 0x0137, 0x0020, 0x0329, 0x009c, 0x0119, 0x0255, 0x00d5, + 0x011b); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x15c6221c, 0x0d704417, 0x3d90ffd1, 0x4e168273, 0xc3bd5e20, + 0xd75f62df, 0x3002ed42, 0x74269b1d, 0xc77bc0dd, 0x36f2552d, + 0x71b5888c, 0x02eb291b, 0x790cb3b1, 0xa3cf03c4, 0x8f90730a, + 0xf41b555a); + scalar = 7389998; + asm volatile("vmulhu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0x00099748, 0x0005eb5c, 0x001b1e60, 0x00226562, 0x00563815, + 0x005eddef, 0x001525e2, 0x00332972, 0x0057de3b, 0x001833e9, + 0x0032161d, 0x0001491b, 0x003551d9, 0x00482775, 0x003f3ca7, + 0x006b8612); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x96304201a90be11f, 0x11654d4226322e4b, 0xe16e5cf2c1183b63, + 0x447b5f4710764817, 0xb62589a3d309672c, 0x5ddec2e6716fd0d3, + 0xf31034a096a6d0fa, 0x9cb4dca46ce577f7, 0x30cf2e2dc6773d82, + 0x6129247d49c42f4b, 0x3d9ee22336a4e216, 0x3c9b9d533797be90, + 0x0c0c54042a20ddc8, 0xf309bda968a3a583, 0x550697570a1e9645, + 0x5beaf5933973231f); + scalar = 321156886679781445; + asm volatile("vmulhu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0x029d61da2f470da8, 0x004d882170361dd2, 0x03ecbc09716942cd, + 0x013138661b0ea1a1, 0x032bd162449d3f20, 0x01a25fd52874e6a2, + 0x043b51fe85cf352c, 0x02ba6ebb77802a7c, 0x00d98a5bba81dc57, + 0x01b10a47f99f8c44, 0x0112a3d22e03b6e9, 0x010e20461059ad6b, + 0x0035b2b00a44dfe2, 0x043b352e6dc32a00, 0x017af48bc4f5ad70, + 0x0199ac3dc8053978); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x5c, 0x3c, 0x86, 0x65, 0x41, 0x38, 0x20, 0x9e, 0x88, 0x28, 0x19, + 0xc2, 0x5f, 0xa3, 0x7c, 0xca); + uint64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmulhu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0, 0x01, 0, 0x01, 0, 0x01, 0, 0x03, 0, 0x00, 0, 0x03, 0, 0x03, + 0, 0x03); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x4e7f, 0xfe41, 0x1346, 0x6c1a, 0x38ce, 0x5fa7, 0x5e39, 0xf7a2, + 0x61aa, 0x0a3a, 0xfe0a, 0x30f1, 0x5852, 0xbb6b, 0x42f7, 0x58d9); + scalar = 816; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmulhu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0, 0x032a, 0, 0x0158, 0, 0x0130, 0, 0x0315, 0, 0x0020, 0, + 0x009c, 0, 0x0255, 0, 0x011b); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x15c6221c, 0x0d704417, 0x3d90ffd1, 0x4e168273, 0xc3bd5e20, + 0xd75f62df, 0x3002ed42, 0x74269b1d, 0xc77bc0dd, 0x36f2552d, + 0x71b5888c, 0x02eb291b, 0x790cb3b1, 0xa3cf03c4, 0x8f90730a, + 0xf41b555a); + scalar = 7389998; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmulhu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0, 0x0005eb5c, 0, 0x00226562, 0, 0x005eddef, 0, 0x00332972, + 0, 0x001833e9, 0, 0x0001491b, 0, 0x00482775, 0, 0x006b8612); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x96304201a90be11f, 0x11654d4226322e4b, 0xe16e5cf2c1183b63, + 0x447b5f4710764817, 0xb62589a3d309672c, 0x5ddec2e6716fd0d3, + 0xf31034a096a6d0fa, 0x9cb4dca46ce577f7, 0x30cf2e2dc6773d82, + 0x6129247d49c42f4b, 0x3d9ee22336a4e216, 0x3c9b9d533797be90, + 0x0c0c54042a20ddc8, 0xf309bda968a3a583, 0x550697570a1e9645, + 0x5beaf5933973231f); + scalar = 321156886679781445; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmulhu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0, 0x004d882170361dd2, 0, 0x013138661b0ea1a1, 0, + 0x01a25fd52874e6a2, 0, 0x02ba6ebb77802a7c, 0, 0x01b10a47f99f8c44, 0, + 0x010e20461059ad6b, 0, 0x043b352e6dc32a00, 0, 0x0199ac3dc8053978); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmv.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmv.c new file mode 100644 index 000000000..94d99133a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmv.c @@ -0,0 +1,104 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.v v3, v1"); + VCMP_U8(1, v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vmv.v.v v6, v2"); + VCMP_U16(2, v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vmv.v.v v12, v4"); + VCMP_U32(3, v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vmv.v.v v24, v8"); + VCMP_U64(4, v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +void TEST_CASE2() { + const uint64_t scalar = 0x00000000deadbeef; + + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.x v3, %[A]" ::[A] "r"(scalar)); + VCMP_U8(5, v3, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.x v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(6, v4, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.x v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(7, v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.x v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(8, v16, 0x00000000deadbeef, 0x00000000deadbeef, 0x00000000deadbeef, + 0x00000000deadbeef, 0x00000000deadbeef, 0x00000000deadbeef, + 0x00000000deadbeef, 0x00000000deadbeef, 0x00000000deadbeef, + 0x00000000deadbeef, 0x00000000deadbeef, 0x00000000deadbeef, + 0x00000000deadbeef, 0x00000000deadbeef, 0x00000000deadbeef, + 0x00000000deadbeef); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.i v3, -9"); + VCMP_U8(9, v3, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, + -9); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.i v4, -10"); + VCMP_U16(10, v4, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, + -10, -10, -10, -10); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.i v8, -11"); + VCMP_U32(11, v8, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, + -11, -11, -11, -11); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.i v16, -12"); + VCMP_U64(12, v16, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvnrr.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvnrr.c new file mode 100644 index 000000000..634172c9e --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvnrr.c @@ -0,0 +1,146 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// +// For simplicity, this test depends on vl1r and vs1r + +#include "vector_macros.h" + +uint64_t counter; + +// Maximum size: (VLEN/8 Bytes * (MAX_LMUL == 8)) = VLEN +// Define VLEN before compiling me +// #define VLEN 128 +uint8_t gold_vec_8b[VLEN]; +uint8_t buf_vec_8b[VLEN]; + +/////////// +// vmv1r // +/////////// + +// 1 whole register load +void TEST_CASE1(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 8); + // Set vl and vtype to super short values + VSET(1, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Move the content to another register + asm volatile("vmv1r.v v1, v16"); + // Check that the whole register was loaded + asm volatile("vs1r.v v1, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 0, buf_vec_8b, gold_vec_8b, VLEN / 8); +} + +/////////// +// vmv2r // +/////////// + +// 2 whole registers load +void TEST_CASE2(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 4); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 4); + // Set vl and vtype to super short values + VSET(1, e64, m4); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl2re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Move the content to another register + asm volatile("vmv2r.v v2, v16"); + // Check that the whole register was loaded + asm volatile("vs2r.v v2, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 1, buf_vec_8b, gold_vec_8b, VLEN / 4); +} + +/////////// +// vmv4r // +/////////// + +// 4 whole registers load +void TEST_CASE3(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 2); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 2); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl4re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Move the content to another register + asm volatile("vmv4r.v v4, v16"); + // Check that the whole register was loaded + asm volatile("vs4r.v v4, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 2, buf_vec_8b, gold_vec_8b, VLEN / 2); +} + +/////////// +// vmv8r // +/////////// + +// 8 whole registers load +void TEST_CASE4(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + VCLEAR(v24); + // Load a buffer from memory - whole register load + asm volatile("vl8re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Move the content to another register + asm volatile("vmv8r.v v8, v16"); + // Check that the whole register was loaded + asm volatile("vs8r.v v8, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 3, buf_vec_8b, gold_vec_8b, VLEN); +} + +//////////// +// Others // +//////////// + +// Check with initial vl == 0 +void TEST_CASE5(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 8); + // Set vl and vtype to super short values + VSET(0, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Move the content to another register + asm volatile("vmv1r.v v1, v16"); + // Check that the whole register was loaded + asm volatile("vs1r.v v1, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 4, buf_vec_8b, gold_vec_8b, VLEN / 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvsx.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvsx.c new file mode 100644 index 000000000..11dfda779 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvsx.c @@ -0,0 +1,75 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +int8_t scalar_8b; +int16_t scalar_16b; +int32_t scalar_32b; +int64_t scalar_64b; + +void TEST_CASE1() { + scalar_8b = 55 << 0; + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.s.x v1, %0" ::"r"(scalar_8b)); + VCMP_I8(1, v1, scalar_8b); + + scalar_16b = 55 << 8; + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.s.x v2, %0" ::"r"(scalar_16b)); + VCMP_I16(2, v2, scalar_16b); + + scalar_32b = 55 << 16; + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.s.x v4, %0" ::"r"(scalar_32b)); + VCMP_I32(3, v4, scalar_32b); + + scalar_64b = 55 << 32; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.s.x v8, %0" ::"r"(scalar_64b)); + VCMP_I64(4, v8, scalar_64b); +} + +// Check special cases +void TEST_CASE2() { + scalar_64b = 55 << 32; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + asm volatile("vmv.s.x v1, %0" ::"r"(scalar_64b)); + VSET(1, e64, m1); + VCMP_I64(5, v1, scalar_64b); + + scalar_64b = 55 << 32; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET_ZERO(e64, m1); + asm volatile("vmv.s.x v1, %0" ::"r"(scalar_64b)); + VSET(1, e64, m1); + VCMP_I64(6, v1, 1); + + scalar_64b = 55 << 32; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET_ZERO(e64, m8); + asm volatile("vmv.s.x v1, %0" ::"r"(scalar_64b)); + VSET(1, e64, m1); + VCMP_I64(7, v1, 1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvxs.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvxs.c new file mode 100644 index 000000000..2561c5b89 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvxs.c @@ -0,0 +1,72 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +int8_t scalar_8b; +int16_t scalar_16b; +int32_t scalar_32b; +int64_t scalar_64b; + +void TEST_CASE1() { + scalar_8b = 0; + VSET(16, e8, m1); + VLOAD_8(v1, 55 << 0, 22, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.x.s %0, v1" : "=r"(scalar_8b)); + XCMP(1, scalar_8b, 55 << 0); + + scalar_16b = 0; + VSET(16, e16, m2); + VLOAD_16(v2, 55 << 8, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.x.s %0, v2" : "=r"(scalar_16b)); + XCMP(2, scalar_16b, 55 << 8); + + scalar_32b = 0; + VSET(16, e32, m4); + VLOAD_32(v4, 55 << 16, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.x.s %0, v4" : "=r"(scalar_32b)); + XCMP(3, scalar_32b, 55 << 16); + + scalar_64b = 0; + VSET(16, e64, m8); + VLOAD_64(v8, 55 << 30, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.x.s %0, v8" : "=r"(scalar_64b)); + XCMP(4, scalar_64b, 55 << 30); +} + +// Check special cases +void TEST_CASE2() { + scalar_64b = 0; + VSET(16, e64, m1); + VLOAD_64(v1, 55 << 30, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + asm volatile("vmv.x.s %0, v1" : "=r"(scalar_64b)); + XCMP(5, scalar_64b, 55 << 30); + + scalar_64b = 0; + VSET(16, e64, m1); + VLOAD_64(v1, 55 << 30, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET_ZERO(e64, m1); + asm volatile("vmv.x.s %0, v1" : "=r"(scalar_64b)); + XCMP(6, scalar_64b, 55 << 30); + + scalar_64b = 0; + VSET(16, e64, m1); + VLOAD_64(v1, 55 << 30, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET_ZERO(e64, m8); + asm volatile("vmv.x.s %0, v1" : "=r"(scalar_64b)); + XCMP(7, scalar_64b, 55 << 30); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxnor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxnor.c new file mode 100644 index 000000000..fa0998a8d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxnor.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0xB6, 0x31); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0xCD, 0xEF); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0x32, 0x10); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0x3D, 0xE0); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0xB6, 0xF1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxor.c new file mode 100644 index 000000000..6a561d752 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxor.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x49, 0xCE); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0x32, 0x10); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0xCD, 0xEF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0xC2, 0x1F); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x49, 0xEE); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclip.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclip.c new file mode 100644 index 000000000..f66136ec6 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclip.c @@ -0,0 +1,78 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Muhammad Ijaz + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + VLOAD_8(v4, 7, 7, 7, 7); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclip.wv v1, v2, v4"); + VCMP_I8(1, v1, 6, 0xff, 0xff, 0); +} + +void TEST_CASE2() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + VLOAD_8(v4, 7, 7, 7, 7); + VLOAD_8(v0, 0x5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclip.wv v1, v2, v4, v0.t"); + VCMP_I8(2, v1, 6, 0, 0xff, 0); +} + +void TEST_CASE3() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + int8_t scalar = 7; + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclip.wx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(3, v1, 6, 0xff, 0xff, 0); +} + +void TEST_CASE4() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + int8_t scalar = 7; + VLOAD_8(v0, 0x5, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclip.wx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(4, v1, 6, 0, 0xff, 0); +} + +void TEST_CASE5() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclip.wi v1, v2, 7"); + VCMP_I8(5, v1, 6, 0xff, 0xff, 0); +} + +void TEST_CASE6() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + VLOAD_8(v0, 0x5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclip.wi v1, v2, 7, v0.t"); + VCMP_I8(6, v1, 6, 0, 0xff, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclipu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclipu.c new file mode 100644 index 000000000..e98ac0889 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclipu.c @@ -0,0 +1,78 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Muhammad Ijaz + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + VLOAD_8(v4, 7, 7, 7, 7); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclipu.wv v1, v2, v4"); + VCMP_U8(1, v1, 6, 0xff, 0xff, 0); +} + +void TEST_CASE2() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + VLOAD_8(v4, 7, 7, 7, 7); + VLOAD_8(v0, 0x5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclipu.wv v1, v2, v4, v0.t"); + VCMP_U8(2, v1, 6, 0, 0xff, 0); +} + +void TEST_CASE3() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + int8_t scalar = 7; + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclipu.wx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(3, v1, 6, 0xff, 0xff, 0); +} + +void TEST_CASE4() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + int8_t scalar = 7; + VLOAD_8(v0, 0x5, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclipu.wx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(4, v1, 6, 0, 0xff, 0); +} + +void TEST_CASE5() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclipu.wi v1, v2, 7"); + VCMP_U8(5, v1, 6, 0xff, 0xff, 0); +} + +void TEST_CASE6() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + VLOAD_8(v0, 0x5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclipu.wi v1, v2, 7, v0.t"); + VCMP_U8(6, v1, 6, 0, 0xff, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsac.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsac.c new file mode 100644 index 000000000..7c13461a5 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsac.c @@ -0,0 +1,292 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v3, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, + 0x59, 0x2b, 0xe3, 0x33, 0xb9); + VLOAD_8(v2, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, + 0x18, 0xe6, 0x44, 0x57, 0xaf); + VLOAD_8(v1, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, + 0x4e, 0x2e, 0x7d, 0x13, 0x5a); + asm volatile("vnmsac.vv v1, v2, v3"); + VCMP_U8(1, v1, 0x4e, 0xb4, 0x9c, 0x04, 0x86, 0x53, 0xdb, 0x87, 0x81, 0xe2, + 0x65, 0xf6, 0x8c, 0x31, 0xbe, 0xe3); + + VSET(16, e16, m2); + VLOAD_16(v6, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, + 0x14a1, 0xef2d, 0x3376, 0x371a, 0x4fc8, 0xbcca, 0xccd7, 0x9097); + VLOAD_16(v4, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, + 0x9500, 0x3aae, 0x0637, 0xeadc, 0x7523, 0xa53c, 0xecc7, 0xaccc); + VLOAD_16(v2, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, + 0x3588, 0x8d19, 0x65d9, 0x6458, 0xfbff, 0xf949, 0x34a4, 0x0710); + asm volatile("vnmsac.vv v2, v4, v6"); + VCMP_U16(2, v2, 0xd8a3, 0xdadf, 0x2ba2, 0x5c17, 0x5c48, 0x4091, 0x7106, + 0x7c52, 0x8088, 0xca83, 0x937f, 0x4600, 0xaba7, 0x87f1, 0xc583, + 0x5abc); + + VSET(16, e32, m4); + VLOAD_32(v12, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, + 0x5f1b57db, 0xdb2cdc06, 0xab577f4a, 0x214746ac, 0xd3a08c15, + 0x35887ce9, 0x9d5a0f65, 0x76adea2b, 0x91b7f299, 0x6e2977fe, + 0xdcbcb1d7); + VLOAD_32(v8, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, + 0x7633680e, 0xf1a79717, 0xe62e371e, 0x0fc25b48, 0x11067f38, + 0xc654ccb4, 0x6702a66c, 0x7a0b229d, 0x25c2b688, 0x82b68b3d, + 0x4224aa5e); + VLOAD_32(v4, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, + 0x0840b4f8, 0xfb0a701b, 0x1b5361d7, 0xd10c9064, 0xa899d63d, + 0xbb1779fd, 0x1b35390c, 0xd04c0f6c, 0xd8c9db62, 0x90a09dc8, + 0x463438b4); + asm volatile("vnmsac.vv v4, v8, v12"); + VCMP_U32(3, v4, 0x0e7d6e68, 0x92cacfc8, 0x726ad6fd, 0x6ff2f953, 0xe1d4d1c6, + 0x466feefe, 0xa6512191, 0xdf6d912b, 0x7c898c04, 0x0a7ec6a5, + 0x2121fa29, 0x91713c70, 0x60b0bd0d, 0xb3a6341a, 0x82041c42, + 0x7a9625c2); + + VSET(16, e64, m8); + VLOAD_64(v24, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, + 0x0ced1db5e1b7da8f, 0x58993c2ae4a62e89, 0x864439a0768dce1b, + 0x7882d6539128d119, 0xfe6469348911945b, 0x6da189493780c328, + 0xf8c4931b61dc54dc, 0xd5ac914ccbf735f0, 0xba0a5bf3b2b528d1, + 0x74d814e6ebcebe81, 0xfc44af3a74cfee8c, 0xea0cb63d1bf7d5dc, + 0xbe98c841d80bd077); + VLOAD_64(v16, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, + 0xd886035fdcfe3186, 0x694b857cb1cc3bc3, 0x52376b9a996e1925, + 0x5785c45d41ec230d, 0x950f08c23f6da73d, 0xe5dc4e9a35834719, + 0x9a08d0e965e96a19, 0xb80c2107151bdcf6, 0xd0612e4d4bc314b7, + 0xdfb23a142b750482, 0xedc5e4b79881bdaf, 0x72c493d9df55bf13, + 0xfd4b1328b8f7773a); + VLOAD_64(v8, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, + 0xf12edb410132b013, 0xc475df4b52276fe9, 0x069e283bf74ca195, + 0x8dd5189f3a66f166, 0x297726422e620380, 0x7b74d167bd1b22fd, + 0x08e88e9642656a52, 0x0ab0c3f0f7ddeb66, 0x00b155918c8646c0, + 0x84d4df4b2a3768c7, 0xc31234734867ae09, 0x79320b8da693a84e, + 0x30727b2d1bccd396); + asm volatile("vnmsac.vv v8, v16, v24"); + VCMP_U64(4, v8, 0x231a763b4759b083, 0x68b16397da83d642, 0x7081592414ce4cdc, + 0x084dc189ec3eea39, 0x72cb55f70cac6a8e, 0xc713321c491334ae, + 0xa21580bb2ab1e821, 0x3d84da5e7dab4cd1, 0x735a758175effc15, + 0x91e8df24708208d6, 0x5133f0cd25a5d6c6, 0x2f1a889653a2c559, + 0x5f9f0ac7744ba745, 0xba1bbac1969a4055, 0x275025160493a4fa, + 0x21c02801006747a0); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v3, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, + 0x59, 0x2b, 0xe3, 0x33, 0xb9); + VLOAD_8(v2, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, + 0x18, 0xe6, 0x44, 0x57, 0xaf); + VLOAD_8(v1, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, + 0x4e, 0x2e, 0x7d, 0x13, 0x5a); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0x53, 0xb4, 0x2c, 0x04, 0x4a, 0x53, 0xa3, 0x87, 0x7e, 0xe2, + 0x4c, 0xf6, 0x2e, 0x31, 0x13, 0xe3); + + VSET(16, e16, m2); + VLOAD_16(v6, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, + 0x14a1, 0xef2d, 0x3376, 0x371a, 0x4fc8, 0xbcca, 0xccd7, 0x9097); + VLOAD_16(v4, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, + 0x9500, 0x3aae, 0x0637, 0xeadc, 0x7523, 0xa53c, 0xecc7, 0xaccc); + VLOAD_16(v2, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, + 0x3588, 0x8d19, 0x65d9, 0x6458, 0xfbff, 0xf949, 0x34a4, 0x0710); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0xb917, 0xdadf, 0x0f0c, 0x5c17, 0xe0b6, 0x4091, 0x5c69, + 0x7c52, 0x3588, 0xca83, 0x65d9, 0x4600, 0xfbff, 0x87f1, 0x34a4, + 0x5abc); + + VSET(16, e32, m4); + VLOAD_32(v12, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, + 0x5f1b57db, 0xdb2cdc06, 0xab577f4a, 0x214746ac, 0xd3a08c15, + 0x35887ce9, 0x9d5a0f65, 0x76adea2b, 0x91b7f299, 0x6e2977fe, + 0xdcbcb1d7); + VLOAD_32(v8, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, + 0x7633680e, 0xf1a79717, 0xe62e371e, 0x0fc25b48, 0x11067f38, + 0xc654ccb4, 0x6702a66c, 0x7a0b229d, 0x25c2b688, 0x82b68b3d, + 0x4224aa5e); + VLOAD_32(v4, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, + 0x0840b4f8, 0xfb0a701b, 0x1b5361d7, 0xd10c9064, 0xa899d63d, + 0xbb1779fd, 0x1b35390c, 0xd04c0f6c, 0xd8c9db62, 0x90a09dc8, + 0x463438b4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0x3a582428, 0x92cacfc8, 0xb445799b, 0x6ff2f953, 0x51a7fe9e, + 0x466feefe, 0xfb0a701b, 0xdf6d912b, 0xd10c9064, 0x0a7ec6a5, + 0xbb1779fd, 0x91713c70, 0xd04c0f6c, 0xb3a6341a, 0x90a09dc8, + 0x7a9625c2); + + VSET(16, e64, m8); + VLOAD_64(v24, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, + 0x0ced1db5e1b7da8f, 0x58993c2ae4a62e89, 0x864439a0768dce1b, + 0x7882d6539128d119, 0xfe6469348911945b, 0x6da189493780c328, + 0xf8c4931b61dc54dc, 0xd5ac914ccbf735f0, 0xba0a5bf3b2b528d1, + 0x74d814e6ebcebe81, 0xfc44af3a74cfee8c, 0xea0cb63d1bf7d5dc, + 0xbe98c841d80bd077); + VLOAD_64(v16, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, + 0xd886035fdcfe3186, 0x694b857cb1cc3bc3, 0x52376b9a996e1925, + 0x5785c45d41ec230d, 0x950f08c23f6da73d, 0xe5dc4e9a35834719, + 0x9a08d0e965e96a19, 0xb80c2107151bdcf6, 0xd0612e4d4bc314b7, + 0xdfb23a142b750482, 0xedc5e4b79881bdaf, 0x72c493d9df55bf13, + 0xfd4b1328b8f7773a); + VLOAD_64(v8, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, + 0xf12edb410132b013, 0xc475df4b52276fe9, 0x069e283bf74ca195, + 0x8dd5189f3a66f166, 0x297726422e620380, 0x7b74d167bd1b22fd, + 0x08e88e9642656a52, 0x0ab0c3f0f7ddeb66, 0x00b155918c8646c0, + 0x84d4df4b2a3768c7, 0xc31234734867ae09, 0x79320b8da693a84e, + 0x30727b2d1bccd396); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0xc265b2d19ad92bbb, 0x68b16397da83d642, 0xe490f5981f64a313, + 0x084dc189ec3eea39, 0xc475df4b52276fe9, 0xc713321c491334ae, + 0x8dd5189f3a66f166, 0x3d84da5e7dab4cd1, 0x7b74d167bd1b22fd, + 0x91e8df24708208d6, 0x0ab0c3f0f7ddeb66, 0x2f1a889653a2c559, + 0x84d4df4b2a3768c7, 0xba1bbac1969a4055, 0x79320b8da693a84e, + 0x21c02801006747a0); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v3, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, + 0x90, 0xa2, 0x67, 0x3d, 0xf5); + VLOAD_8(v1, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, + 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); + asm volatile("vnmsac.vx v1, %[A], v3" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0x24, 0x10, 0xdd, 0xab, 0x83, 0xf3, 0xf8, 0x13, 0x0d, 0xcf, + 0x84, 0x4f, 0xb6, 0xda, 0xee, 0x0e); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, + 0xf384, 0xfde3, 0x9cd1, 0xf20b, 0xa41b, 0x949a, 0x5886, 0xa1a9); + VLOAD_16(v2, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, + 0xd024, 0x40a2, 0xdd99, 0xd517, 0xf00a, 0xae8d, 0x79a5, 0x519d); + asm volatile("vnmsac.vx v2, %[A], v4" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0x5e06, 0xd169, 0xecdc, 0xfb44, 0x7912, 0x3f75, 0x4c1d, + 0x86fd, 0x4cc0, 0xd0d7, 0x4c50, 0x5a64, 0xa3c7, 0x60c3, 0xe34f, + 0x9a3c); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, + 0x75d0b8f6, 0x38c88af2, 0x96bb5c03, 0x6f61c0a9, 0x3626dd69, + 0x31aba619, 0x7d974a55, 0xbc63c280, 0x502334bf, 0x9451b955, + 0x6a8bae19); + VLOAD_32(v4, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, + 0x050956bf, 0x1ef799f5, 0x43c0f327, 0x4bfd5a25, 0x7be439e4, + 0xed89a52a, 0x2bbf028d, 0x872392b9, 0x0ad55495, 0x865c7264, + 0xfcce4b64); + asm volatile("vnmsac.vx v4, %[A], v8" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0x6b20ab9a, 0x9dfe886c, 0xf983030d, 0xddd6bece, 0xf2a7f2ad, + 0x2f0a66ed, 0x59240bcf, 0x915f2166, 0x8c4ace02, 0x802d8981, + 0xf9fc8b37, 0xa3f70986, 0xced98739, 0xa4fbf240, 0x4249945d, + 0xdd51d971); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v16, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, + 0xe81a26a17fef30f2, 0x33a71ede19aec0aa, 0x49598be14c5bc1cd, + 0x1c27bde3f488bfc6, 0x4188f9b8611e5d90, 0xd53289cca28a3b6b, + 0x3b435e1078e3bee9, 0x5e3f4c08c869abf4, 0x3c004920e9c39fb6, + 0x4b42a451b264b153, 0x110a6db11a7c2801, 0x7c0f358ac41d49fa, + 0x6e8c6ae4d14bd1a5); + VLOAD_64(v8, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, + 0xf774b16ef94a22ea, 0x87df3c47c8113e43, 0x38487d57a064f677, + 0x358706b57ce6d6c7, 0xda111b3ac946811c, 0xe9ffed5b39f1ea1d, + 0x3c7e5a675c779870, 0x5d2ea63ac910e42e, 0xb3e832dbe2332203, + 0x05d366b426005f47, 0x00b3b58815a860d8, 0x023bbf8109263e1d, + 0x5fbc2f647d6c1153); + asm volatile("vnmsac.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0x60dd7609c833e03a, 0x79abe3a30a816ca0, 0x983ca4c97f204385, + 0x59a9303f04932768, 0x59c29659b4d00149, 0x59a15b1bb66f16c2, + 0x37b0445a8ebaa7d1, 0x65e0c3ab56fa1f0c, 0x94a740971b1d6eda, + 0xa1c7ff743113d8bf, 0xe8198a4799a97a9a, 0x5ccf06fd8751eb9d, + 0xa36557d05e8802dc, 0x10aae67f31dc2b4f, 0xe878939fd1287553, + 0x594538a8571dbf06); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v3, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, + 0x90, 0xa2, 0x67, 0x3d, 0xf5); + VLOAD_8(v1, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, + 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vx v1, %[A], v3, v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0xfa, 0x10, 0x2a, 0xab, 0xe7, 0xf3, 0x8c, 0x13, 0x40, 0xcf, + 0x50, 0x4f, 0xe0, 0xda, 0x1f, 0x0e); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, + 0xf384, 0xfde3, 0x9cd1, 0xf20b, 0xa41b, 0x949a, 0x5886, 0xa1a9); + VLOAD_16(v2, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, + 0xd024, 0x40a2, 0xdd99, 0xd517, 0xf00a, 0xae8d, 0x79a5, 0x519d); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0xe886, 0xd169, 0x1857, 0xfb44, 0x522e, 0x3f75, 0xa6c2, + 0x86fd, 0xd024, 0xd0d7, 0xdd99, 0x5a64, 0xf00a, 0x60c3, 0x79a5, + 0x9a3c); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, + 0x75d0b8f6, 0x38c88af2, 0x96bb5c03, 0x6f61c0a9, 0x3626dd69, + 0x31aba619, 0x7d974a55, 0xbc63c280, 0x502334bf, 0x9451b955, + 0x6a8bae19); + VLOAD_32(v4, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, + 0x050956bf, 0x1ef799f5, 0x43c0f327, 0x4bfd5a25, 0x7be439e4, + 0xed89a52a, 0x2bbf028d, 0x872392b9, 0x0ad55495, 0x865c7264, + 0xfcce4b64); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vx v4, %[A], v8, v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0xcbd8537e, 0x9dfe886c, 0x60cf8444, 0xddd6bece, 0x8f2a8694, + 0x2f0a66ed, 0x1ef799f5, 0x915f2166, 0x4bfd5a25, 0x802d8981, + 0xed89a52a, 0xa3f70986, 0x872392b9, 0xa4fbf240, 0x865c7264, + 0xdd51d971); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v16, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, + 0xe81a26a17fef30f2, 0x33a71ede19aec0aa, 0x49598be14c5bc1cd, + 0x1c27bde3f488bfc6, 0x4188f9b8611e5d90, 0xd53289cca28a3b6b, + 0x3b435e1078e3bee9, 0x5e3f4c08c869abf4, 0x3c004920e9c39fb6, + 0x4b42a451b264b153, 0x110a6db11a7c2801, 0x7c0f358ac41d49fa, + 0x6e8c6ae4d14bd1a5); + VLOAD_64(v8, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, + 0xf774b16ef94a22ea, 0x87df3c47c8113e43, 0x38487d57a064f677, + 0x358706b57ce6d6c7, 0xda111b3ac946811c, 0xe9ffed5b39f1ea1d, + 0x3c7e5a675c779870, 0x5d2ea63ac910e42e, 0xb3e832dbe2332203, + 0x05d366b426005f47, 0x00b3b58815a860d8, 0x023bbf8109263e1d, + 0x5fbc2f647d6c1153); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0x3b0c6a3a651beecc, 0x79abe3a30a816ca0, 0xda94340ac428ca78, + 0x59a9303f04932768, 0x87df3c47c8113e43, 0x59a15b1bb66f16c2, + 0x358706b57ce6d6c7, 0x65e0c3ab56fa1f0c, 0xe9ffed5b39f1ea1d, + 0xa1c7ff743113d8bf, 0x5d2ea63ac910e42e, 0x5ccf06fd8751eb9d, + 0x05d366b426005f47, 0x10aae67f31dc2b4f, 0x023bbf8109263e1d, + 0x594538a8571dbf06); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsub.c new file mode 100644 index 000000000..14739529a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsub.c @@ -0,0 +1,292 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v1, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, + 0x59, 0x2b, 0xe3, 0x33, 0xb9); + VLOAD_8(v2, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, + 0x18, 0xe6, 0x44, 0x57, 0xaf); + VLOAD_8(v3, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, + 0x4e, 0x2e, 0x7d, 0x13, 0x5a); + asm volatile("vnmsub.vv v1, v2, v3"); + VCMP_U8(1, v1, 0x4e, 0xb4, 0x9c, 0x04, 0x86, 0x53, 0xdb, 0x87, 0x81, 0xe2, + 0x65, 0xf6, 0x8c, 0x31, 0xbe, 0xe3); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, + 0x14a1, 0xef2d, 0x3376, 0x371a, 0x4fc8, 0xbcca, 0xccd7, 0x9097); + VLOAD_16(v4, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, + 0x9500, 0x3aae, 0x0637, 0xeadc, 0x7523, 0xa53c, 0xecc7, 0xaccc); + VLOAD_16(v6, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, + 0x3588, 0x8d19, 0x65d9, 0x6458, 0xfbff, 0xf949, 0x34a4, 0x0710); + asm volatile("vnmsub.vv v2, v4, v6"); + VCMP_U16(2, v2, 0xd8a3, 0xdadf, 0x2ba2, 0x5c17, 0x5c48, 0x4091, 0x7106, + 0x7c52, 0x8088, 0xca83, 0x937f, 0x4600, 0xaba7, 0x87f1, 0xc583, + 0x5abc); + + VSET(16, e32, m4); + VLOAD_32(v4, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, + 0x5f1b57db, 0xdb2cdc06, 0xab577f4a, 0x214746ac, 0xd3a08c15, + 0x35887ce9, 0x9d5a0f65, 0x76adea2b, 0x91b7f299, 0x6e2977fe, + 0xdcbcb1d7); + VLOAD_32(v8, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, + 0x7633680e, 0xf1a79717, 0xe62e371e, 0x0fc25b48, 0x11067f38, + 0xc654ccb4, 0x6702a66c, 0x7a0b229d, 0x25c2b688, 0x82b68b3d, + 0x4224aa5e); + VLOAD_32(v12, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, + 0x0840b4f8, 0xfb0a701b, 0x1b5361d7, 0xd10c9064, 0xa899d63d, + 0xbb1779fd, 0x1b35390c, 0xd04c0f6c, 0xd8c9db62, 0x90a09dc8, + 0x463438b4); + asm volatile("vnmsub.vv v4, v8, v12"); + VCMP_U32(3, v4, 0x0e7d6e68, 0x92cacfc8, 0x726ad6fd, 0x6ff2f953, 0xe1d4d1c6, + 0x466feefe, 0xa6512191, 0xdf6d912b, 0x7c898c04, 0x0a7ec6a5, + 0x2121fa29, 0x91713c70, 0x60b0bd0d, 0xb3a6341a, 0x82041c42, + 0x7a9625c2); + + VSET(16, e64, m8); + VLOAD_64(v8, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, + 0x0ced1db5e1b7da8f, 0x58993c2ae4a62e89, 0x864439a0768dce1b, + 0x7882d6539128d119, 0xfe6469348911945b, 0x6da189493780c328, + 0xf8c4931b61dc54dc, 0xd5ac914ccbf735f0, 0xba0a5bf3b2b528d1, + 0x74d814e6ebcebe81, 0xfc44af3a74cfee8c, 0xea0cb63d1bf7d5dc, + 0xbe98c841d80bd077); + VLOAD_64(v16, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, + 0xd886035fdcfe3186, 0x694b857cb1cc3bc3, 0x52376b9a996e1925, + 0x5785c45d41ec230d, 0x950f08c23f6da73d, 0xe5dc4e9a35834719, + 0x9a08d0e965e96a19, 0xb80c2107151bdcf6, 0xd0612e4d4bc314b7, + 0xdfb23a142b750482, 0xedc5e4b79881bdaf, 0x72c493d9df55bf13, + 0xfd4b1328b8f7773a); + VLOAD_64(v24, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, + 0xf12edb410132b013, 0xc475df4b52276fe9, 0x069e283bf74ca195, + 0x8dd5189f3a66f166, 0x297726422e620380, 0x7b74d167bd1b22fd, + 0x08e88e9642656a52, 0x0ab0c3f0f7ddeb66, 0x00b155918c8646c0, + 0x84d4df4b2a3768c7, 0xc31234734867ae09, 0x79320b8da693a84e, + 0x30727b2d1bccd396); + asm volatile("vnmsub.vv v8, v16, v24"); + VCMP_U64(4, v8, 0x231a763b4759b083, 0x68b16397da83d642, 0x7081592414ce4cdc, + 0x084dc189ec3eea39, 0x72cb55f70cac6a8e, 0xc713321c491334ae, + 0xa21580bb2ab1e821, 0x3d84da5e7dab4cd1, 0x735a758175effc15, + 0x91e8df24708208d6, 0x5133f0cd25a5d6c6, 0x2f1a889653a2c559, + 0x5f9f0ac7744ba745, 0xba1bbac1969a4055, 0x275025160493a4fa, + 0x21c02801006747a0); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v1, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, + 0x59, 0x2b, 0xe3, 0x33, 0xb9); + VLOAD_8(v2, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, + 0x18, 0xe6, 0x44, 0x57, 0xaf); + VLOAD_8(v3, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, + 0x4e, 0x2e, 0x7d, 0x13, 0x5a); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0x41, 0xb4, 0xd0, 0x04, 0xc4, 0x53, 0x91, 0x87, 0x7b, 0xe2, + 0x85, 0xf6, 0x2b, 0x31, 0x33, 0xe3); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, + 0x14a1, 0xef2d, 0x3376, 0x371a, 0x4fc8, 0xbcca, 0xccd7, 0x9097); + VLOAD_16(v4, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, + 0x9500, 0x3aae, 0x0637, 0xeadc, 0x7523, 0xa53c, 0xecc7, 0xaccc); + VLOAD_16(v6, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, + 0x3588, 0x8d19, 0x65d9, 0x6458, 0xfbff, 0xf949, 0x34a4, 0x0710); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0x9904, 0xdadf, 0xa6cb, 0x5c17, 0x227e, 0x4091, 0x3eeb, + 0x7c52, 0x14a1, 0xca83, 0x3376, 0x4600, 0x4fc8, 0x87f1, 0xccd7, + 0x5abc); + + VSET(16, e32, m4); + VLOAD_32(v4, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, + 0x5f1b57db, 0xdb2cdc06, 0xab577f4a, 0x214746ac, 0xd3a08c15, + 0x35887ce9, 0x9d5a0f65, 0x76adea2b, 0x91b7f299, 0x6e2977fe, + 0xdcbcb1d7); + VLOAD_32(v8, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, + 0x7633680e, 0xf1a79717, 0xe62e371e, 0x0fc25b48, 0x11067f38, + 0xc654ccb4, 0x6702a66c, 0x7a0b229d, 0x25c2b688, 0x82b68b3d, + 0x4224aa5e); + VLOAD_32(v12, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, + 0x0840b4f8, 0xfb0a701b, 0x1b5361d7, 0xd10c9064, 0xa899d63d, + 0xbb1779fd, 0x1b35390c, 0xd04c0f6c, 0xd8c9db62, 0x90a09dc8, + 0x463438b4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0xe6f4ff60, 0x92cacfc8, 0x30f2ea92, 0x6ff2f953, 0x815c1c28, + 0x466feefe, 0xdb2cdc06, 0xdf6d912b, 0x214746ac, 0x0a7ec6a5, + 0x35887ce9, 0x91713c70, 0x76adea2b, 0xb3a6341a, 0x6e2977fe, + 0x7a9625c2); + + VSET(16, e64, m8); + VLOAD_64(v8, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, + 0x0ced1db5e1b7da8f, 0x58993c2ae4a62e89, 0x864439a0768dce1b, + 0x7882d6539128d119, 0xfe6469348911945b, 0x6da189493780c328, + 0xf8c4931b61dc54dc, 0xd5ac914ccbf735f0, 0xba0a5bf3b2b528d1, + 0x74d814e6ebcebe81, 0xfc44af3a74cfee8c, 0xea0cb63d1bf7d5dc, + 0xbe98c841d80bd077); + VLOAD_64(v16, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, + 0xd886035fdcfe3186, 0x694b857cb1cc3bc3, 0x52376b9a996e1925, + 0x5785c45d41ec230d, 0x950f08c23f6da73d, 0xe5dc4e9a35834719, + 0x9a08d0e965e96a19, 0xb80c2107151bdcf6, 0xd0612e4d4bc314b7, + 0xdfb23a142b750482, 0xedc5e4b79881bdaf, 0x72c493d9df55bf13, + 0xfd4b1328b8f7773a); + VLOAD_64(v24, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, + 0xf12edb410132b013, 0xc475df4b52276fe9, 0x069e283bf74ca195, + 0x8dd5189f3a66f166, 0x297726422e620380, 0x7b74d167bd1b22fd, + 0x08e88e9642656a52, 0x0ab0c3f0f7ddeb66, 0x00b155918c8646c0, + 0x84d4df4b2a3768c7, 0xc31234734867ae09, 0x79320b8da693a84e, + 0x30727b2d1bccd396); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0xb8d79a755b98580a, 0x68b16397da83d642, 0x670688aed7c97cdd, + 0x084dc189ec3eea39, 0x58993c2ae4a62e89, 0xc713321c491334ae, + 0x7882d6539128d119, 0x3d84da5e7dab4cd1, 0x6da189493780c328, + 0x91e8df24708208d6, 0xd5ac914ccbf735f0, 0x2f1a889653a2c559, + 0x74d814e6ebcebe81, 0xba1bbac1969a4055, 0xea0cb63d1bf7d5dc, + 0x21c02801006747a0); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v1, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, + 0x90, 0xa2, 0x67, 0x3d, 0xf5); + VLOAD_8(v3, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, + 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); + asm volatile("vnmsub.vx v1, %[A], v3" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0x24, 0x10, 0xdd, 0xab, 0x83, 0xf3, 0xf8, 0x13, 0x0d, 0xcf, + 0x84, 0x4f, 0xb6, 0xda, 0xee, 0x0e); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v2, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, + 0xf384, 0xfde3, 0x9cd1, 0xf20b, 0xa41b, 0x949a, 0x5886, 0xa1a9); + VLOAD_16(v4, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, + 0xd024, 0x40a2, 0xdd99, 0xd517, 0xf00a, 0xae8d, 0x79a5, 0x519d); + asm volatile("vnmsub.vx v2, %[A], v4" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0x5e06, 0xd169, 0xecdc, 0xfb44, 0x7912, 0x3f75, 0x4c1d, + 0x86fd, 0x4cc0, 0xd0d7, 0x4c50, 0x5a64, 0xa3c7, 0x60c3, 0xe34f, + 0x9a3c); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v4, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, + 0x75d0b8f6, 0x38c88af2, 0x96bb5c03, 0x6f61c0a9, 0x3626dd69, + 0x31aba619, 0x7d974a55, 0xbc63c280, 0x502334bf, 0x9451b955, + 0x6a8bae19); + VLOAD_32(v8, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, + 0x050956bf, 0x1ef799f5, 0x43c0f327, 0x4bfd5a25, 0x7be439e4, + 0xed89a52a, 0x2bbf028d, 0x872392b9, 0x0ad55495, 0x865c7264, + 0xfcce4b64); + asm volatile("vnmsub.vx v4, %[A], v8" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0x6b20ab9a, 0x9dfe886c, 0xf983030d, 0xddd6bece, 0xf2a7f2ad, + 0x2f0a66ed, 0x59240bcf, 0x915f2166, 0x8c4ace02, 0x802d8981, + 0xf9fc8b37, 0xa3f70986, 0xced98739, 0xa4fbf240, 0x4249945d, + 0xdd51d971); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v8, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, + 0xe81a26a17fef30f2, 0x33a71ede19aec0aa, 0x49598be14c5bc1cd, + 0x1c27bde3f488bfc6, 0x4188f9b8611e5d90, 0xd53289cca28a3b6b, + 0x3b435e1078e3bee9, 0x5e3f4c08c869abf4, 0x3c004920e9c39fb6, + 0x4b42a451b264b153, 0x110a6db11a7c2801, 0x7c0f358ac41d49fa, + 0x6e8c6ae4d14bd1a5); + VLOAD_64(v16, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, + 0xf774b16ef94a22ea, 0x87df3c47c8113e43, 0x38487d57a064f677, + 0x358706b57ce6d6c7, 0xda111b3ac946811c, 0xe9ffed5b39f1ea1d, + 0x3c7e5a675c779870, 0x5d2ea63ac910e42e, 0xb3e832dbe2332203, + 0x05d366b426005f47, 0x00b3b58815a860d8, 0x023bbf8109263e1d, + 0x5fbc2f647d6c1153); + asm volatile("vnmsub.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0x60dd7609c833e03a, 0x79abe3a30a816ca0, 0x983ca4c97f204385, + 0x59a9303f04932768, 0x59c29659b4d00149, 0x59a15b1bb66f16c2, + 0x37b0445a8ebaa7d1, 0x65e0c3ab56fa1f0c, 0x94a740971b1d6eda, + 0xa1c7ff743113d8bf, 0xe8198a4799a97a9a, 0x5ccf06fd8751eb9d, + 0xa36557d05e8802dc, 0x10aae67f31dc2b4f, 0xe878939fd1287553, + 0x594538a8571dbf06); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v1, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, + 0x90, 0xa2, 0x67, 0x3d, 0xf5); + VLOAD_8(v3, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, + 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vx v1, %[A], v3, v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0x5e, 0x10, 0xa9, 0xab, 0x14, 0xf3, 0x84, 0x13, 0xd7, 0xcf, + 0x5c, 0x4f, 0xa2, 0xda, 0x3d, 0x0e); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v2, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, + 0xf384, 0xfde3, 0x9cd1, 0xf20b, 0xa41b, 0x949a, 0x5886, 0xa1a9); + VLOAD_16(v4, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, + 0xd024, 0x40a2, 0xdd99, 0xd517, 0xf00a, 0xae8d, 0x79a5, 0x519d); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0xfe80, 0xd169, 0x5313, 0xfb44, 0xecfc, 0x3f75, 0xcc0d, + 0x86fd, 0xf384, 0xd0d7, 0x9cd1, 0x5a64, 0xa41b, 0x60c3, 0x5886, + 0x9a3c); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v4, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, + 0x75d0b8f6, 0x38c88af2, 0x96bb5c03, 0x6f61c0a9, 0x3626dd69, + 0x31aba619, 0x7d974a55, 0xbc63c280, 0x502334bf, 0x9451b955, + 0x6a8bae19); + VLOAD_32(v8, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, + 0x050956bf, 0x1ef799f5, 0x43c0f327, 0x4bfd5a25, 0x7be439e4, + 0xed89a52a, 0x2bbf028d, 0x872392b9, 0x0ad55495, 0x865c7264, + 0xfcce4b64); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vx v4, %[A], v8, v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0x48da7aac, 0x9dfe886c, 0xf49f26e5, 0xddd6bece, 0x40ca82f5, + 0x2f0a66ed, 0x38c88af2, 0x915f2166, 0x6f61c0a9, 0x802d8981, + 0x31aba619, 0xa3f70986, 0xbc63c280, 0xa4fbf240, 0x9451b955, + 0xdd51d971); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v8, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, + 0xe81a26a17fef30f2, 0x33a71ede19aec0aa, 0x49598be14c5bc1cd, + 0x1c27bde3f488bfc6, 0x4188f9b8611e5d90, 0xd53289cca28a3b6b, + 0x3b435e1078e3bee9, 0x5e3f4c08c869abf4, 0x3c004920e9c39fb6, + 0x4b42a451b264b153, 0x110a6db11a7c2801, 0x7c0f358ac41d49fa, + 0x6e8c6ae4d14bd1a5); + VLOAD_64(v16, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, + 0xf774b16ef94a22ea, 0x87df3c47c8113e43, 0x38487d57a064f677, + 0x358706b57ce6d6c7, 0xda111b3ac946811c, 0xe9ffed5b39f1ea1d, + 0x3c7e5a675c779870, 0x5d2ea63ac910e42e, 0xb3e832dbe2332203, + 0x05d366b426005f47, 0x00b3b58815a860d8, 0x023bbf8109263e1d, + 0x5fbc2f647d6c1153); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0x93adc14539897782, 0x79abe3a30a816ca0, 0x55e01165195d2d9b, + 0x59a9303f04932768, 0x33a71ede19aec0aa, 0x59a15b1bb66f16c2, + 0x1c27bde3f488bfc6, 0x65e0c3ab56fa1f0c, 0xd53289cca28a3b6b, + 0xa1c7ff743113d8bf, 0x5e3f4c08c869abf4, 0x5ccf06fd8751eb9d, + 0x4b42a451b264b153, 0x10aae67f31dc2b4f, 0x7c0f358ac41d49fa, + 0x594538a8571dbf06); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsra.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsra.c new file mode 100644 index 000000000..63627ba9b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsra.c @@ -0,0 +1,242 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, + 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vnsra.wv v4, v2, v3"); + VCMP_U8(1, v4, 0x00, 0x80, 0xC0, 0xE0, 0xFE, 0xFF, 0xFF, 0x00, 0x00, 0x80, + 0xC0, 0xE0, 0xFE, 0xFF, 0xFF, 0x00); + + VSET(16, e16, m2); + VLOAD_32(v4, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000); + VLOAD_16(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vnsra.wv v12, v4, v8"); + VCMP_U16(2, v12, 0x0000, 0x8000, 0xC000, 0xE000, 0xFE00, 0xFFFE, 0xFFFF, + 0x0000, 0x0000, 0x8000, 0xC000, 0xE000, 0xFE00, 0xFFFE, 0xFFFF, + 0x0000); + + VSET(16, e32, m4); + VLOAD_64(v8, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000); + VLOAD_32(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vnsra.wv v24, v8, v16"); + VCMP_U32(3, v24, 0x00000000, 0x80000000, 0xC0000000, 0xE0000000, 0xFE000000, + 0xFFFE0000, 0xFFFFFFFE, 0xFFFFFFFF, 0x00000000, 0x80000000, + 0xC0000000, 0xE0000000, 0xFE000000, 0xFFFE0000, 0xFFFFFFFE, + 0xFFFFFFFF); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, + 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00); + VLOAD_8(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vnsra.wv v6, v2, v4, v0.t"); + VCMP_U8(4, v6, 0x00, 0x80, 0x00, 0xE0, 0x00, 0xFF, 0x00, 0x00, 0x00, 0x80, + 0x00, 0xE0, 0x00, 0xFF, 0x00, 0x00); + + VSET(16, e16, m2); + VLOAD_32(v4, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000); + VLOAD_16(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vnsra.wv v12, v4, v8, v0.t"); + VCMP_U16(5, v12, 0x0000, 0x8000, 0x0000, 0xE000, 0x0000, 0xFFFE, 0x0000, + 0x0000, 0x0000, 0x8000, 0x0000, 0xE000, 0x0000, 0xFFFE, 0x0000, + 0x0000); + + VSET(16, e32, m4); + VLOAD_64(v8, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000); + VLOAD_32(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vnsra.wv v24, v8, v16, v0.t"); + VCMP_U32(6, v24, 0x00000000, 0x80000000, 0x00000000, 0xE0000000, 0x00000000, + 0xFFFE0000, 0x00000000, 0xFFFFFFFF, 0x00000000, 0x80000000, + 0x00000000, 0xE0000000, 0x00000000, 0xFFFE0000, 0x00000000, + 0xFFFFFFFF); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vnsra.wx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(7, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, + 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vnsra.wx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(8, v8, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vnsra.wx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(9, v16, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0xFFFFFFFF); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vnsra.wx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(10, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, + 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vnsra.wx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(11, v8, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vnsra.wx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(12, v16, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, + 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, + 0xFFFFFFFF); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vnsra.wi v4, v2, 2"); + VCMP_U8(13, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, + 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vnsra.wi v8, v4, 2"); + VCMP_U16(14, v8, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vnsra.wi v16, v8, 2"); + VCMP_U32(15, v16, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0xFFFFFFFF); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vnsra.wi v4, v2, 2, v0.t"); + VCMP_U8(16, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, + 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vnsra.wi v8, v4, 2, v0.t"); + VCMP_U16(17, v8, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vnsra.wi v16, v8, 2, v0.t"); + VCMP_U32(18, v16, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, + 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, + 0xFFFFFFFF); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsrl.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsrl.c new file mode 100644 index 000000000..a0f4b77bb --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsrl.c @@ -0,0 +1,242 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, + 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vnsrl.wv v4, v2, v3"); + VCMP_U8(1, v4, 0x00, 0x80, 0xC0, 0xE0, 0xFE, 0x01, 0x01, 0x00, 0x00, 0x80, + 0xC0, 0xE0, 0xFE, 0x01, 0x01, 0x00); + + VSET(16, e16, m2); + VLOAD_32(v4, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000); + VLOAD_16(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vnsrl.wv v12, v4, v8"); + VCMP_U16(2, v12, 0x0000, 0x8000, 0xC000, 0xE000, 0xFE00, 0xFFFE, 0x0001, + 0x0000, 0x0000, 0x8000, 0xC000, 0xE000, 0xFE00, 0xFFFE, 0x0001, + 0x0000); + + VSET(16, e32, m4); + VLOAD_64(v8, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000); + VLOAD_32(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vnsrl.wv v24, v8, v16"); + VCMP_U32(3, v24, 0x00000000, 0x80000000, 0xC0000000, 0xE0000000, 0xFE000000, + 0xFFFE0000, 0xFFFFFFFE, 0xFFFFFFFF, 0x00000000, 0x80000000, + 0xC0000000, 0xE0000000, 0xFE000000, 0xFFFE0000, 0xFFFFFFFE, + 0xFFFFFFFF); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, + 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00); + VLOAD_8(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vnsrl.wv v8, v2, v4, v0.t"); + VCMP_U8(4, v8, 0x00, 0x80, 0x00, 0xE0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x80, + 0x00, 0xE0, 0x00, 0x01, 0x00, 0x00); + + VSET(16, e16, m2); + VLOAD_32(v4, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000); + VLOAD_16(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vnsrl.wv v12, v4, v8, v0.t"); + VCMP_U16(5, v12, 0x0000, 0x8000, 0x0000, 0xE000, 0x0000, 0xFFFE, 0x0000, + 0x0000, 0x0000, 0x8000, 0x0000, 0xE000, 0x0000, 0xFFFE, 0x0000, + 0x0000); + + VSET(16, e32, m4); + VLOAD_64(v8, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000); + VLOAD_32(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vnsrl.wv v24, v8, v16, v0.t"); + VCMP_U32(6, v24, 0x00000000, 0x80000000, 0x00000000, 0xE0000000, 0x00000000, + 0xFFFE0000, 0x00000000, 0xFFFFFFFF, 0x00000000, 0x80000000, + 0x00000000, 0xE0000000, 0x00000000, 0xFFFE0000, 0x00000000, + 0xFFFFFFFF); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vnsrl.wx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(7, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, + 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vnsrl.wx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(8, v8, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vnsrl.wx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(9, v16, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0xFFFFFFFF); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vnsrl.wx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(10, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, + 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vnsrl.wx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(11, v8, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vnsrl.wx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(12, v16, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, + 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, + 0xFFFFFFFF); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vnsrl.wi v4, v2, 2"); + VCMP_U8(13, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, + 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vnsrl.wi v8, v4, 2"); + VCMP_U16(14, v8, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vnsrl.wi v16, v8, 2"); + VCMP_U32(15, v16, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0xFFFFFFFF); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vnsrl.wi v4, v2, 2, v0.t"); + VCMP_U8(16, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, + 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vnsrl.wi v8, v4, 2, v0.t"); + VCMP_U16(17, v8, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vnsrl.wi v16, v8, 2, v0.t"); + VCMP_U32(18, v16, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, + 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, + 0xFFFFFFFF); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vor.c new file mode 100644 index 000000000..dcd162e63 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vor.c @@ -0,0 +1,309 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v3, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + 0xf0); + asm volatile("vor.vv v1, v2, v3"); + VCMP_U8(1, v1, 0xff, 0x03, 0xf0, 0xff, 0x03, 0xf0, 0xff, 0x03, 0xf0, 0xff, + 0x03, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + 0xf0f0, 0xff00, 0x0003, 0xf0f0); + asm volatile("vor.vv v2, v4, v6"); + VCMP_U16(2, v2, 0xffff, 0x0003, 0xf0f0, 0xffff, 0x0003, 0xf0f0, 0xffff, + 0x0003, 0xf0f0, 0xffff, 0x0003, 0xf0f0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_32(v12, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, + 0x00000003, 0xf0f0f0f0); + asm volatile("vor.vv v4, v8, v12"); + VCMP_U32(3, v4, 0xffffffff, 0x00000003, 0xf0f0f0f0, 0xffffffff, 0x00000003, + 0xf0f0f0f0, 0xffffffff, 0x00000003, 0xf0f0f0f0, 0xffffffff, + 0x00000003, 0xf0f0f0f0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); + asm volatile("vor.vv v8, v16, v24"); + VCMP_U64(4, v8, 0xffffffffffffffff, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); +} + +void TEST_CASE2() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v3, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vor.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, + 0xef, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + 0xf0f0, 0xff00, 0x0003, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vor.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0xffff, 0xbeef, 0xf0f0, 0xffff, 0xbeef, 0xf0f0, 0xffff, + 0xbeef, 0xf0f0, 0xffff, 0xbeef, 0xf0f0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_32(v12, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, + 0x00000003, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vor.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0xffffffff, 0xdeadbeef, 0xf0f0f0f0, 0xffffffff, 0xdeadbeef, + 0xf0f0f0f0, 0xffffffff, 0xdeadbeef, 0xf0f0f0f0, 0xffffffff, + 0xdeadbeef, 0xf0f0f0f0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vor.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0); +} + +void TEST_CASE3() { + const uint64_t scalar = 0x0ff00ff00ff00ff0; + + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + asm volatile("vor.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, + 0xf1, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + asm volatile("vor.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0xffff, 0x0ff1, 0xfff0, 0xffff, 0x0ff1, 0xfff0, 0xffff, + 0x0ff1, 0xfff0, 0xffff, 0x0ff1, 0xfff0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + asm volatile("vor.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0xffffffff, 0x0ff00ff1, 0xfff0fff0, 0xffffffff, 0x0ff00ff1, + 0xfff0fff0, 0xffffffff, 0x0ff00ff1, 0xfff0fff0, 0xffffffff, + 0x0ff00ff1, 0xfff0fff0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + asm volatile("vor.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0xffffffffffffffff, 0x0ff00ff00ff00ff1, 0xfff0fff0fff0fff0, + 0xffffffffffffffff, 0x0ff00ff00ff00ff1, 0xfff0fff0fff0fff0, + 0xffffffffffffffff, 0x0ff00ff00ff00ff1, 0xfff0fff0fff0fff0, + 0xffffffffffffffff, 0x0ff00ff00ff00ff1, 0xfff0fff0fff0fff0); +} + +void TEST_CASE4() { + const uint64_t scalar = 0x0ff00ff00ff00ff0; + + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vor.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, + 0xef, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vor.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0xffff, 0xbeef, 0xfff0, 0xffff, 0xbeef, 0xfff0, 0xffff, + 0xbeef, 0xfff0, 0xffff, 0xbeef, 0xfff0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vor.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0xffffffff, 0xdeadbeef, 0xfff0fff0, 0xffffffff, 0xdeadbeef, + 0xfff0fff0, 0xffffffff, 0xdeadbeef, 0xfff0fff0, 0xffffffff, + 0xdeadbeef, 0xfff0fff0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vor.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xfff0fff0fff0fff0, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xfff0fff0fff0fff0, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xfff0fff0fff0fff0, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xfff0fff0fff0fff0); +} + +void TEST_CASE5() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + asm volatile("vor.vi v1, v2, 15"); + VCMP_U8(17, v1, 0xff, 0x0f, 0xff, 0xff, 0x0f, 0xff, 0xff, 0x0f, 0xff, 0xff, + 0x0f, 0xff); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + asm volatile("vor.vi v2, v4, 15"); + VCMP_U16(18, v2, 0xffff, 0x000f, 0xf0ff, 0xffff, 0x000f, 0xf0ff, 0xffff, + 0x000f, 0xf0ff, 0xffff, 0x000f, 0xf0ff); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + asm volatile("vor.vi v4, v8, 15"); + VCMP_U32(19, v4, 0xffffffff, 0x0000000f, 0xf0f0f0ff, 0xffffffff, 0x0000000f, + 0xf0f0f0ff, 0xffffffff, 0x0000000f, 0xf0f0f0ff, 0xffffffff, + 0x0000000f, 0xf0f0f0ff); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + asm volatile("vor.vi v8, v16, 15"); + VCMP_U64(20, v8, 0xffffffffffffffff, 0x000000000000000f, 0xf0f0f0f0f0f0f0ff, + 0xffffffffffffffff, 0x000000000000000f, 0xf0f0f0f0f0f0f0ff, + 0xffffffffffffffff, 0x000000000000000f, 0xf0f0f0f0f0f0f0ff, + 0xffffffffffffffff, 0x000000000000000f, 0xf0f0f0f0f0f0f0ff); +} + +void TEST_CASE6() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vor.vi v1, v2, 15, v0.t"); + VCMP_U8(21, v1, 0xff, 0xef, 0xff, 0xff, 0xef, 0xff, 0xff, 0xef, 0xff, 0xff, + 0xef, 0xff); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vor.vi v2, v4, 15, v0.t"); + VCMP_U16(22, v2, 0xffff, 0xbeef, 0xf0ff, 0xffff, 0xbeef, 0xf0ff, 0xffff, + 0xbeef, 0xf0ff, 0xffff, 0xbeef, 0xf0ff); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vor.vi v4, v8, 15, v0.t"); + VCMP_U32(23, v4, 0xffffffff, 0xdeadbeef, 0xf0f0f0ff, 0xffffffff, 0xdeadbeef, + 0xf0f0f0ff, 0xffffffff, 0xdeadbeef, 0xf0f0f0ff, 0xffffffff, + 0xdeadbeef, 0xf0f0f0ff); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vor.vi v8, v16, 15, v0.t"); + VCMP_U64(24, v8, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vpopc_m.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vpopc_m.c new file mode 100644 index 000000000..14fa78e6b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vpopc_m.c @@ -0,0 +1,30 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(4, e32, m1); + VLOAD_U32(v2, 7, 0, 0, 0); + VLOAD_U32(v0, 5, 0, 0, 0); + volatile uint32_t scalar = 1337; + volatile uint32_t OUP[] = {0, 0, 0, 0}; + __asm__ volatile( + "vpopc.m %[A], v2, v0.t \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); + XCMP(1, OUP[0], 2); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredand.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredand.c new file mode 100644 index 000000000..2f01159dc --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredand.c @@ -0,0 +1,93 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(12, e8, m1); + VLOAD_8(v1, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, + 0xf0); + VLOAD_8(v2, 0xf0); + asm volatile("vredand.vs v3, v1, v2"); + VCMP_U8(1, v3, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v2, 0xffff, 0x0301, 0xf1f0, 0xffff, 0x0101, 0xf7f0, 0xffff, 0x0701, + 0xfff0, 0xffff, 0x0101, 0xf1f0); + VLOAD_16(v4, 0xefff); + asm volatile("vredand.vs v6, v2, v4"); + VCMP_U16(2, v6, 0x0100); + + VSET(12, e32, m4); + VLOAD_32(v4, 0xffffffff, 0x100ff001, 0xf0f0f0f0, 0xffffffff, 0x100ff001, + 0xf0f0f0f0, 0xffffffff, 0x100ff001, 0xf0f0f0f0, 0xffffffff, + 0x100ff001, 0xf0f0f0f0); + VLOAD_32(v8, 0x00f010f0); + asm volatile("vredand.vs v12, v4, v8"); + VCMP_U32(3, v12, 0x00001000); + + VSET(12, e64, m8); + VLOAD_64(v8, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v16, 0xfffffffffffffff7); + asm volatile("vredand.vs v24, v8, v16"); + VCMP_U64(4, v24, 0x1000000000000000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(12, e8, m1); + VLOAD_8(v0, 0xf7, 0xff); + VLOAD_8(v1, 0xff, 0xf1, 0xff, 0x00, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, + 0xf0); + VLOAD_8(v2, 0xf0); + VLOAD_8(v3, 1); + asm volatile("vredand.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 0xf0); + + VSET(12, e16, m2); + VLOAD_8(v0, 0x00, 0x08); + VLOAD_16(v2, 0xffff, 0x0301, 0xf1f0, 0xffff, 0x0101, 0xf7f0, 0xffff, 0x9701, + 0xfff0, 0xffff, 0x0101, 0xf1f0); + VLOAD_16(v4, 0xefff); + VLOAD_16(v6, 1); + asm volatile("vredand.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 0xe1f0); + + VSET(12, e32, m4); + VLOAD_8(v0, 0xfe, 0xff); + VLOAD_32(v4, 0x00000000, 0x100ff001, 0xf0f0f0f0, 0xffffffff, 0x100ff001, + 0xf0f0f0f0, 0xffffffff, 0x100ff001, 0xf0f0f0f0, 0xffffffff, + 0x100ff001, 0xf0f0f0f0); + VLOAD_32(v8, 0x00f010f0); + VLOAD_32(v12, 1); + asm volatile("vredand.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0x00001000); + + VSET(12, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v16, 0xfffffffffffffff7); + VLOAD_64(v24, 1); + asm volatile("vredand.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0x1000000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmax.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmax.c new file mode 100644 index 000000000..b00d0885c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmax.c @@ -0,0 +1,79 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, -7, 8, 1, 9, 3, 4, 5, -6, 7, 8); + VLOAD_8(v2, -1); + asm volatile("vredmax.vs v3, v1, v2"); + VCMP_U8(1, v3, 9); + + VSET(16, e16, m2); + VLOAD_16(v2, -1, 2, -3, 4, 5, 6, 7, 8, 1, 2, 3, -4, 5, 6, 7, 8); + VLOAD_16(v4, 9); + asm volatile("vredmax.vs v6, v2, v4"); + VCMP_U16(2, v6, 9); + + VSET(16, e32, m4); + VLOAD_32(v4, 9, 2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, -5, 6, 7, 8); + VLOAD_32(v8, 1); + asm volatile("vredmax.vs v12, v4, v8"); + VCMP_U32(3, v12, 9); + + VSET(16, e64, m8); + VLOAD_64(v8, -1, 2, 3, -4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, -8); + VLOAD_64(v16, -1); + asm volatile("vredmax.vs v24, v8, v16"); + VCMP_U64(4, v24, 9); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0x03, 0x00); + VLOAD_8(v1, -1, 2, 3, -4, 5, 6, 7, 9, 1, -2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + VLOAD_8(v3, 1); + asm volatile("vredmax.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 2); + + VSET(16, e16, m2); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, -7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 9); + VLOAD_16(v6, 1); + asm volatile("vredmax.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 9); + + VSET(16, e32, m4); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_32(v4, -1, 2, 3, 4, 5, 6, 7, -8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1); + VLOAD_32(v12, 1); + asm volatile("vredmax.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 8); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 1, -2, 3, 4, 5, 6, -7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 4); + VLOAD_64(v24, 1); + asm volatile("vredmax.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmaxu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmaxu.c new file mode 100644 index 000000000..9e7deed64 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmaxu.c @@ -0,0 +1,106 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 9, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + asm volatile("vredmaxu.vs v3, v1, v2"); + VCMP_U8(1, v3, 9); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 9); + asm volatile("vredmaxu.vs v6, v2, v4"); + VCMP_U16(2, v6, 9); + + VSET(16, e32, m4); + VLOAD_32(v4, 9, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1); + asm volatile("vredmaxu.vs v12, v4, v8"); + VCMP_U32(3, v12, 9); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1); + asm volatile("vredmaxu.vs v24, v8, v16"); + VCMP_U64(4, v24, 9); +} +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0x03, 0x00); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + VLOAD_8(v3, 1); + asm volatile("vredmaxu.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 2); + + VSET(16, e16, m2); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 9); + VLOAD_16(v6, 1); + asm volatile("vredmaxu.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 9); + + VSET(16, e32, m4); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1); + VLOAD_32(v12, 1); + asm volatile("vredmaxu.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 8); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 4); + VLOAD_64(v24, 1); + asm volatile("vredmaxu.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 8); +} + +// Naive test with negative values +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 9, -3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + asm volatile("vredmaxu.vs v3, v1, v2"); + VCMP_U8(9, v3, -3); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, -9); + asm volatile("vredmaxu.vs v6, v2, v4"); + VCMP_U16(10, v6, -9); + + VSET(16, e32, m4); + VLOAD_32(v4, 9, 2, 3, 4, -5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1); + asm volatile("vredmaxu.vs v12, v4, v8"); + VCMP_U32(11, v12, -5); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, -4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, -1); + asm volatile("vredmaxu.vs v24, v8, v16"); + VCMP_U64(12, v24, -1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmin.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmin.c new file mode 100644 index 000000000..c3415b626 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmin.c @@ -0,0 +1,78 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 0, 1, 9, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + asm volatile("vredmin.vs v3, v1, v2"); + VCMP_U8(1, v3, 0); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, -3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 0); + asm volatile("vredmin.vs v6, v2, v4"); + VCMP_U16(2, v6, -3); + + VSET(16, e32, m4); + VLOAD_32(v4, 9, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, -1); + asm volatile("vredmin.vs v12, v4, v8"); + VCMP_U32(3, v12, -1); + + VSET(16, e64, m8); + VLOAD_64(v8, -1, 2, 3, 4, 5, -6, 7, -9, -1, -2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, -1); + asm volatile("vredmin.vs v24, v8, v16"); + VCMP_U64(4, v24, -9); +} +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0x03, 0x00); + VLOAD_8(v1, 1, -2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + VLOAD_8(v3, 1); + asm volatile("vredmin.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, -2); + + VSET(16, e16, m2); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_16(v2, -1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 3); + VLOAD_16(v6, 1); + asm volatile("vredmin.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 3); + + VSET(16, e32, m4); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 8); + VLOAD_32(v12, 1); + asm volatile("vredmin.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 7); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 4); + VLOAD_64(v24, 1); + asm volatile("vredmin.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredminu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredminu.c new file mode 100644 index 000000000..5e3dc2254 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredminu.c @@ -0,0 +1,78 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 0, 1, 9, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + asm volatile("vredminu.vs v3, v1, v2"); + VCMP_U8(1, v3, 0); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, -3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 0); + asm volatile("vredminu.vs v6, v2, v4"); + VCMP_U16(2, v6, 0); + + VSET(16, e32, m4); + VLOAD_32(v4, 9, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, -1); + asm volatile("vredminu.vs v12, v4, v8"); + VCMP_U32(3, v12, 1); + + VSET(16, e64, m8); + VLOAD_64(v8, -1, 2, 3, 4, 5, -6, 7, -9, -1, -2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, -1); + asm volatile("vredminu.vs v24, v8, v16"); + VCMP_U64(4, v24, 2); +} +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0x03, 0x00); + VLOAD_8(v1, 1, -2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + VLOAD_8(v3, 1); + asm volatile("vredminu.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 1); + + VSET(16, e16, m2); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_16(v2, -1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 3); + VLOAD_16(v6, 1); + asm volatile("vredminu.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 3); + + VSET(16, e32, m4); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 8); + VLOAD_32(v12, 1); + asm volatile("vredminu.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 7); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 4); + VLOAD_64(v24, 1); + asm volatile("vredminu.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredor.c new file mode 100644 index 000000000..69ea3cb39 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredor.c @@ -0,0 +1,93 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(12, e8, m1); + VLOAD_8(v1, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, + 0x00); + VLOAD_8(v2, 0x10); + asm volatile("vredor.vs v3, v1, v2"); + VCMP_U8(1, v3, 0x11); + + VSET(12, e16, m2); + VLOAD_16(v2, 0x0000, 0x0301, 0x0100, 0x0000, 0x0101, 0x0700, 0x0000, 0x0701, + 0x0000, 0x0000, 0x0101, 0x0100); + VLOAD_16(v4, 0xe000); + asm volatile("vredor.vs v6, v2, v4"); + VCMP_U16(2, v6, 0xe701); + + VSET(12, e32, m4); + VLOAD_32(v4, 0x00000000, 0x10000001, 0x00000000, 0x00000000, 0x10000001, + 0x00000000, 0x00000000, 0x10000001, 0x00000000, 0x00000000, + 0x10000001, 0x00000000); + VLOAD_32(v8, 0x00001000); + asm volatile("vredor.vs v12, v4, v8"); + VCMP_U32(3, v12, 0x10001001); + + VSET(12, e64, m8); + VLOAD_64(v8, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + 0x0000000000000000, 0x1000000000000001, 0x0000000000000000); + VLOAD_64(v16, 0x0000000000000007); + asm volatile("vredor.vs v24, v8, v16"); + VCMP_U64(4, v24, 0x1000000000000007); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(12, e8, m1); + VLOAD_8(v0, 0x07, 0x00); + VLOAD_8(v1, 0x00, 0x01, 0x00, 0xff, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, + 0x00); + VLOAD_8(v2, 0x00); + VLOAD_8(v3, 1); + asm volatile("vredor.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 0x01); + + VSET(12, e16, m2); + VLOAD_8(v0, 0x00, 0x08); + VLOAD_16(v2, 0x0f00, 0x0301, 0x0100, 0x0000, 0x0101, 0x0700, 0x0000, 0x9701, + 0x0000, 0x0000, 0x0101, 0x0100); + VLOAD_16(v4, 0xe000); + VLOAD_16(v6, 1); + asm volatile("vredor.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 0xe100); + + VSET(12, e32, m4); + VLOAD_8(v0, 0x0e, 0x00); + VLOAD_32(v4, 0xf0000fff, 0x10000001, 0x00000000, 0x00000000, 0x10000001, + 0x00000000, 0x00000000, 0x10000001, 0x00000000, 0x00000000, + 0x10000001, 0x00000000); + VLOAD_32(v8, 0x00001000); + VLOAD_32(v12, 1); + asm volatile("vredor.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0x10001001); + + VSET(12, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 0x0000000000000000, 0x1000000000000001, 0x0000f00000000000, + 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + 0x0000000000000000, 0x1000000000000001, 0x0000000000000000); + VLOAD_64(v16, 0x0000000000000007); + VLOAD_64(v24, 1); + asm volatile("vredor.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0x1000000000000007); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredsum.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredsum.c new file mode 100644 index 000000000..26284c76a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredsum.c @@ -0,0 +1,178 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + asm volatile("vredsum.vs v3, v1, v2"); + VCMP_U8(1, v3, 73); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1); + asm volatile("vredsum.vs v6, v2, v4"); + VCMP_U16(2, v6, 73); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1); + asm volatile("vredsum.vs v12, v4, v8"); + VCMP_U32(3, v12, 73); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1); + asm volatile("vredsum.vs v24, v8, v16"); + VCMP_U64(4, v24, 73); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + VLOAD_8(v3, 1); + asm volatile("vredsum.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 37); + + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1); + VLOAD_16(v6, 1); + asm volatile("vredsum.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 37); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1); + VLOAD_32(v12, 1); + asm volatile("vredsum.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 37); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1); + VLOAD_64(v24, 1); + asm volatile("vredsum.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 37); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v3, v1, v2"); + VCMP_U8(9, v3, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v6, v2, v4"); + VCMP_U16(10, v6, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v12, v4, v8"); + VCMP_U32(11, v12, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U64(12, v24, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(15, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v3, v1, v2"); + VCMP_U8(13, v3, 65, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(1, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v6, v2, v4"); + VCMP_U16(14, v6, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(3, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v12, v4, v8"); + VCMP_U32(15, v12, 7, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(7, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v8, v16"); + VCMP_U64(16, v24, 29, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(15, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v8, v16"); + VCMP_U64(17, v24, 65, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(15, e8, m1); + VLOAD_8(v0, 0x00, 0x40); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 100, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v3, v1, v2, v0.t"); + VCMP_U8(18, v3, 107, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(1, e16, m1); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_16(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v3, v1, v2, v0.t"); + VCMP_U16(19, v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(3, e32, m1); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v3, v1, v2, v0.t"); + VCMP_U32(20, v3, 3, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredxor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredxor.c new file mode 100644 index 000000000..74b128fc1 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredxor.c @@ -0,0 +1,44 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(4, e8, m1); + VLOAD_8(v1, 0x00, 0x01, 0x01, 0x00); + VLOAD_8(v2, 0x11); + asm volatile("vredxor.vs v3, v1, v2"); + VCMP_U8(1, v3, 0x11); + + VSET(4, e16, m1); + VLOAD_16(v1, 0x8000, 0x0301, 0x0101, 0x0001); + VLOAD_16(v2, 0xe001); + asm volatile("vredxor.vs v3, v1, v2"); + VCMP_U16(2, v3, 0x6200); + + VSET(4, e32, m1); + VLOAD_32(v1, 0x00000001, 0x10000001, 0x00000000, 0x00000000); + VLOAD_32(v2, 0x00001000); + asm volatile("vredxor.vs v3, v1, v2"); + VCMP_U32(3, v3, 0x10001000); + + VSET(4, e64, m2); + VLOAD_64(v2, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + 0x0000000000000000); + VLOAD_64(v4, 0x0000000000000007); + asm volatile("vredxor.vs v6, v2, v4"); + VCMP_U64(4, v6, 0x1000000000000006); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrem.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrem.c new file mode 100644 index 000000000..d7bce332d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrem.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x9b, 0x28, 0xec, 0x86, 0x26, 0x85, 0xf7, 0x33, 0x46, 0x37, 0x2c, + 0x0c, 0x8e, 0xae, 0xa1, 0x93); + VLOAD_8(v3, 0x84, 0x5e, 0x3b, 0xdf, 0x10, 0xfc, 0x05, 0xcf, 0x42, 0xbe, 0x23, + 0xdb, 0x37, 0x78, 0xe2, 0x85); + asm volatile("vrem.vv v1, v2, v3"); + VCMP_I8(1, v1, 0x9b, 0x28, 0xec, 0xe9, 0x06, 0xfd, 0xfc, 0x02, 0x04, 0x37, + 0x09, 0x0c, 0xfc, 0xae, 0xfb, 0x93); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xb58f, 0xa184, 0xdcf9, 0xd084, 0xbbc6, 0xcf0e, 0xbbd4, 0xa20c, + 0xe04c, 0xd954, 0xda74, 0xa394, 0x207a, 0x8975, 0xddd3, 0x897d); + VLOAD_16(v6, 0x4534, 0xafd7, 0xf703, 0x92c2, 0x97e3, 0xd85a, 0x1540, 0x8c5c, + 0x4a71, 0x43a7, 0xe65d, 0x2bdc, 0x497b, 0x6aa0, 0x6071, 0xf431); + asm volatile("vrem.vv v2, v4, v6"); + VCMP_I16(2, v2, 0xfac3, 0xf1ad, 0xf7f0, 0xd084, 0xbbc6, 0xf6b4, 0xfb94, + 0xa20c, 0xe04c, 0xd954, 0xf417, 0xfb4c, 0x207a, 0xf415, 0xddd3, + 0xff93); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x620db972, 0x60b1f870, 0x7d1badcf, 0x90a85eb6, 0xca41954b, + 0x10dc3772, 0xf7749e82, 0x027ed4d3, 0xdcb6a562, 0xa979baf0, + 0xb480c184, 0x979555c6, 0x3f894108, 0x803bd362, 0x9038beec, + 0x22d7ca24); + VLOAD_32(v12, 0xb9b52c0c, 0x30b52d8c, 0x832f89ea, 0x95181d9c, 0x85a6a24f, + 0x2f2c64a7, 0xebe4120c, 0x83852646, 0xfb1857b5, 0x25400571, + 0xab2d7393, 0xddb87ac8, 0x01149cdf, 0x62b2c8dc, 0xaed39563, + 0x41ec046e); + asm volatile("vrem.vv v4, v8, v12"); + VCMP_I32(3, v4, 0x1bc2e57e, 0x2ffccae4, 0x004b37b9, 0xfb90411a, 0xca41954b, + 0x10dc3772, 0xf7749e82, 0x027ed4d3, 0xff0c3f6f, 0xf3f9c5d2, + 0xb480c184, 0xfe6be56e, 0x00ddb682, 0xe2ee9c3e, 0xe1652989, + 0x22d7ca24); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x9fc0a4e82116b913, 0xbd1a679edd2667e1, 0x559913931b91caf2, + 0xecfe6fb53a8b043d, 0xd8a48a111d080e66, 0x7baccec6b5a29e3e, + 0x8746dc00d1d7ff0b, 0x467babd497d4931b, 0x6f7f3e669faa900c, + 0x36e81d34c3ee3445, 0x99bcc4a480c648c5, 0xc8ae527a2cc4d908, + 0xce3b4c1da847fe6a, 0x3709710bc016c1fc, 0x81471426bbe09e45, + 0x0f0389282729456f); + VLOAD_64(v24, 0xf2473f877dd9c3df, 0xd2471da7c8ff8466, 0x7e93451b38765d03, + 0xf7e905f27777369f, 0x73cbef014fd0f311, 0x4c3e4fc36800b443, + 0x4c283e06a5067444, 0xdc8295e57f30e905, 0x08207a363067024e, + 0x42aba773f21efc47, 0x5f00e9093d50b50f, 0x5ff0dcd41bf799fa, + 0xe8c1d1110518742a, 0x34fe1a3555bf07f0, 0xd1bce4800f79700f, + 0xff00f7d87b2c7068); + asm volatile("vrem.vv v8, v16, v24"); + VCMP_I64(4, v8, 0xffcde833b0225dfa, 0xead349f71426e37b, 0x559913931b91caf2, + 0xfd2c63d04b9c96ff, 0xd8a48a111d080e66, 0x2f6e7f034da1e9fb, + 0xd36f1a0776de734f, 0x22fe41ba17057c20, 0x05d909a62a6f7216, + 0x36e81d34c3ee3445, 0xf8bdadadbe16fdd4, 0xc8ae527a2cc4d908, + 0xfcb7a9fb9e171616, 0x020b56d66a57ba0c, 0xddcd4b269cedbe27, + 0x00120ed75ec3db87); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x9b, 0x28, 0xec, 0x86, 0x26, 0x85, 0xf7, 0x33, 0x46, 0x37, 0x2c, + 0x0c, 0x8e, 0xae, 0xa1, 0x93); + VLOAD_8(v3, 0x84, 0x5e, 0x3b, 0xdf, 0x10, 0xfc, 0x05, 0xcf, 0x42, 0xbe, 0x23, + 0xdb, 0x37, 0x78, 0xe2, 0x85); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vrem.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0x28, 0, 0xe9, 0, 0xfd, 0, 0x02, 0, 0x37, 0, 0x0c, 0, 0xae, + 0, 0x93); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xb58f, 0xa184, 0xdcf9, 0xd084, 0xbbc6, 0xcf0e, 0xbbd4, 0xa20c, + 0xe04c, 0xd954, 0xda74, 0xa394, 0x207a, 0x8975, 0xddd3, 0x897d); + VLOAD_16(v6, 0x4534, 0xafd7, 0xf703, 0x92c2, 0x97e3, 0xd85a, 0x1540, 0x8c5c, + 0x4a71, 0x43a7, 0xe65d, 0x2bdc, 0x497b, 0x6aa0, 0x6071, 0xf431); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vrem.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0xf1ad, 0, 0xd084, 0, 0xf6b4, 0, 0xa20c, 0, 0xd954, 0, + 0xfb4c, 0, 0xf415, 0, 0xff93); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x620db972, 0x60b1f870, 0x7d1badcf, 0x90a85eb6, 0xca41954b, + 0x10dc3772, 0xf7749e82, 0x027ed4d3, 0xdcb6a562, 0xa979baf0, + 0xb480c184, 0x979555c6, 0x3f894108, 0x803bd362, 0x9038beec, + 0x22d7ca24); + VLOAD_32(v12, 0xb9b52c0c, 0x30b52d8c, 0x832f89ea, 0x95181d9c, 0x85a6a24f, + 0x2f2c64a7, 0xebe4120c, 0x83852646, 0xfb1857b5, 0x25400571, + 0xab2d7393, 0xddb87ac8, 0x01149cdf, 0x62b2c8dc, 0xaed39563, + 0x41ec046e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vrem.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0x2ffccae4, 0, 0xfb90411a, 0, 0x10dc3772, 0, 0x027ed4d3, 0, + 0xf3f9c5d2, 0, 0xfe6be56e, 0, 0xe2ee9c3e, 0, 0x22d7ca24); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x9fc0a4e82116b913, 0xbd1a679edd2667e1, 0x559913931b91caf2, + 0xecfe6fb53a8b043d, 0xd8a48a111d080e66, 0x7baccec6b5a29e3e, + 0x8746dc00d1d7ff0b, 0x467babd497d4931b, 0x6f7f3e669faa900c, + 0x36e81d34c3ee3445, 0x99bcc4a480c648c5, 0xc8ae527a2cc4d908, + 0xce3b4c1da847fe6a, 0x3709710bc016c1fc, 0x81471426bbe09e45, + 0x0f0389282729456f); + VLOAD_64(v24, 0xf2473f877dd9c3df, 0xd2471da7c8ff8466, 0x7e93451b38765d03, + 0xf7e905f27777369f, 0x73cbef014fd0f311, 0x4c3e4fc36800b443, + 0x4c283e06a5067444, 0xdc8295e57f30e905, 0x08207a363067024e, + 0x42aba773f21efc47, 0x5f00e9093d50b50f, 0x5ff0dcd41bf799fa, + 0xe8c1d1110518742a, 0x34fe1a3555bf07f0, 0xd1bce4800f79700f, + 0xff00f7d87b2c7068); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vrem.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0xead349f71426e37b, 0, 0xfd2c63d04b9c96ff, 0, + 0x2f6e7f034da1e9fb, 0, 0x22fe41ba17057c20, 0, 0x36e81d34c3ee3445, 0, + 0xc8ae527a2cc4d908, 0, 0x020b56d66a57ba0c, 0, 0x00120ed75ec3db87); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x5b, 0x3b, 0xc4, 0x95, 0x41, 0x71, 0x9b, 0x67, 0x84, 0x2e, 0x0a, + 0x2a, 0xb2, 0x57, 0xe5, 0x6c); + int64_t scalar = 5; + asm volatile("vrem.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0x01, 0x04, 0x00, 0xfe, 0x00, 0x03, 0xff, 0x03, 0xfc, 0x01, + 0x00, 0x02, 0xfd, 0x02, 0xfe, 0x03); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xc670, 0x8f3b, 0x200f, 0x52ea, 0xfdce, 0xcf06, 0x57f1, 0x1936, + 0xb6ec, 0x69e8, 0x0abf, 0x441e, 0xa420, 0x396c, 0xe7c9, 0xa464); + scalar = -538; + asm volatile("vrem.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0xff2e, 0xfe9d, 0x0089, 0x00f4, 0xffe8, 0xff5c, 0x01c7, + 0x0218, 0xfe60, 0x00d4, 0x003d, 0x00de, 0xfe7e, 0x00ae, 0xfee7, + 0xfec2); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xf937dbf9, 0x6d855b59, 0x3bd09126, 0xaed11886, 0x6eb6f4bd, + 0x5c639253, 0xca0f2abf, 0x57fec97b, 0x39496099, 0x8bfcdd58, + 0x0f19f6e2, 0x2070c8d4, 0x8c689324, 0x2eecd9d7, 0xe2907e94, + 0xb6cc2d44); + scalar = 649; + asm volatile("vrem.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0xfffffee4, 0x00000116, 0x00000160, 0xffffffef, 0x00000217, + 0x00000275, 0xfffffea6, 0x000000a9, 0x000000e4, 0xfffffe09, + 0x00000272, 0x0000023c, 0xffffff79, 0x000000ce, 0xffffffb3, + 0xfffffe0e); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x94236504e03e6525, 0x8d219d7afe5b2fb0, 0xc65a0b252860ab73, + 0x2ca68636bacbc0bb, 0x275575f3e3fea940, 0x8f546251aaad354a, + 0xb1462969035e0fa7, 0x5c9cdc19273ce111, 0x25a8487741ee75db, + 0x38819f95e162663e, 0x698d19ce0e74ff8d, 0xb525257a9b5cd972, + 0xb308a4fe0dcbb2f3, 0xf2fa735abc2db4d0, 0xc73c476461ac3f28, + 0xb2830c2607bfffcc); + scalar = -59223; + asm volatile("vrem.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0xffffffffffff299e, 0xffffffffffff1f8a, 0xffffffffffff57aa, + 0x000000000000cc8c, 0x000000000000416e, 0xffffffffffffcecd, + 0xffffffffffff7e24, 0x000000000000397b, 0x000000000000bb50, + 0x0000000000006b00, 0x0000000000004f3f, 0xffffffffffff9a21, + 0xffffffffffffae24, 0xffffffffffffca84, 0xffffffffffffa7fb, + 0xffffffffffff84dd); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x5b, 0x3b, 0xc4, 0x95, 0x41, 0x71, 0x9b, 0x67, 0x84, 0x2e, 0x0a, + 0x2a, 0xb2, 0x57, 0xe5, 0x6c); + int64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vrem.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0x04, 0, 0xfe, 0, 0x03, 0, 0x03, 0, 0x01, 0, 0x02, 0, 0x02, + 0, 0x03); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xc670, 0x8f3b, 0x200f, 0x52ea, 0xfdce, 0xcf06, 0x57f1, 0x1936, + 0xb6ec, 0x69e8, 0x0abf, 0x441e, 0xa420, 0x396c, 0xe7c9, 0xa464); + scalar = -538; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vrem.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0xfe9d, 0, 0x00f4, 0, 0xff5c, 0, 0x0218, 0, 0x00d4, 0, + 0x00de, 0, 0x00ae, 0, 0xfec2); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xf937dbf9, 0x6d855b59, 0x3bd09126, 0xaed11886, 0x6eb6f4bd, + 0x5c639253, 0xca0f2abf, 0x57fec97b, 0x39496099, 0x8bfcdd58, + 0x0f19f6e2, 0x2070c8d4, 0x8c689324, 0x2eecd9d7, 0xe2907e94, + 0xb6cc2d44); + scalar = 649; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vrem.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0x00000116, 0, 0xffffffef, 0, 0x00000275, 0, 0x000000a9, + 0, 0xfffffe09, 0, 0x0000023c, 0, 0x000000ce, 0, 0xfffffe0e); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x94236504e03e6525, 0x8d219d7afe5b2fb0, 0xc65a0b252860ab73, + 0x2ca68636bacbc0bb, 0x275575f3e3fea940, 0x8f546251aaad354a, + 0xb1462969035e0fa7, 0x5c9cdc19273ce111, 0x25a8487741ee75db, + 0x38819f95e162663e, 0x698d19ce0e74ff8d, 0xb525257a9b5cd972, + 0xb308a4fe0dcbb2f3, 0xf2fa735abc2db4d0, 0xc73c476461ac3f28, + 0xb2830c2607bfffcc); + scalar = -59223; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vrem.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0xffffffffffff1f8a, 0, 0x000000000000cc8c, 0, + 0xffffffffffffcecd, 0, 0x000000000000397b, 0, 0x0000000000006b00, 0, + 0xffffffffffff9a21, 0, 0xffffffffffffca84, 0, 0xffffffffffff84dd); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vremu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vremu.c new file mode 100644 index 000000000..a34fe33f8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vremu.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x11, 0xd2, 0x6a, 0xcc, 0x14, 0xe4, 0x2c, 0x7f, 0xd2, 0x6b, 0x34, + 0x5c, 0x75, 0xdd, 0x0c, 0x42); + VLOAD_8(v3, 0x77, 0xb2, 0xd1, 0x95, 0x6f, 0xbe, 0x0d, 0x5a, 0x93, 0x02, 0xaf, + 0xfd, 0x94, 0xe0, 0xb7, 0xe6); + asm volatile("vremu.vv v1, v2, v3"); + VCMP_I8(1, v1, 0x11, 0x20, 0x6a, 0x37, 0x14, 0x26, 0x05, 0x25, 0x3f, 0x01, + 0x34, 0x5c, 0x75, 0xdd, 0x0c, 0x42); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xf77a, 0x54d7, 0xe527, 0xe28f, 0x53ed, 0x9301, 0xde4f, 0xcb17, + 0xae43, 0x9e4a, 0xa0c2, 0xdf31, 0xb66f, 0x286d, 0x1d15, 0x0480); + VLOAD_16(v6, 0x5bfa, 0x0571, 0x8a43, 0x6350, 0xb962, 0x71fc, 0x0b54, 0x1e8b, + 0x6c25, 0x9c0d, 0x5950, 0x1887, 0xbc18, 0x628e, 0x6561, 0x407f); + asm volatile("vremu.vv v2, v4, v6"); + VCMP_I16(2, v2, 0x3f86, 0x0338, 0x5ae4, 0x1bef, 0x53ed, 0x2105, 0x0713, + 0x13d5, 0x421e, 0x023d, 0x4772, 0x0272, 0xb66f, 0x286d, 0x1d15, + 0x0480); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x647d8841, 0xf9e0aabf, 0xea4aa122, 0xd6178d3e, 0x64a7afe5, + 0xe0350cba, 0xc72768ec, 0x9f977a31, 0x5e1c2ac4, 0xcd44b950, + 0x39dc32f4, 0x1dc82ea3, 0xd1cf125f, 0xc677269c, 0x6405ec5b, + 0x653a05ee); + VLOAD_32(v12, 0x89828d99, 0x5c7c7db0, 0x2911efb6, 0x1f6982ff, 0x564e4bd4, + 0xc4576bff, 0x8e998104, 0x4a23ba44, 0x994b4630, 0x017ee935, + 0xa38c7dae, 0x893dfb15, 0x4969125f, 0x9a951d27, 0x09b6017f, + 0x5a0a7906); + asm volatile("vremu.vv v4, v8, v12"); + VCMP_I32(3, v4, 0x647d8841, 0x40e7af5f, 0x1cf0f294, 0x199e7b44, 0x0e596411, + 0x1bdda0bb, 0x388de7e8, 0x0b5005a9, 0x5e1c2ac4, 0x0059ebf3, + 0x39dc32f4, 0x1dc82ea3, 0x3efceda1, 0x2be20975, 0x02e9dd65, + 0x0b2f8ce8); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x09ab27501ccac4a6, 0x97eb5bf189b39a0e, 0x26f588069b0858c4, + 0x9a251c274a394df3, 0x54b3587602f8d9d2, 0xc3cc623deda95ca7, + 0x118c4335397980bf, 0xc3e2d283cb39133d, 0x71837e24114813fc, + 0x85a1f65867438a09, 0x80f01e0588afc9a0, 0x60e89a1e5a43d9f5, + 0x93a87cf6308ad888, 0xca3976f49ac6a681, 0xcfc7c8f225b47766, + 0xeaa4ce2cf507b527); + VLOAD_64(v24, 0x9fed81c550326301, 0x445bb7ac18d0eaa1, 0x040f8ff58f5adf72, + 0xafc4ff6b8eb4d201, 0xfba36cabfc3fb4a0, 0x9c3ed271bf173d29, + 0xe8b7e325c9ff594b, 0x05169e56693600d7, 0x08e72c4bb62ad267, + 0xbd9677ee996d5fa5, 0x900295e8502a9817, 0x39e0bfa9927679a8, + 0xdd0ca7797d532524, 0x6f8f78c47ddee88a, 0x2f40f7661cca9eee, + 0x8e4a3b2358129e92); + asm volatile("vremu.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x09ab27501ccac4a6, 0x0f33ec995811c4cc, 0x0269786490d67dc2, + 0x9a251c274a394df3, 0x54b3587602f8d9d2, 0x278d8fcc2e921f7e, + 0x118c4335397980bf, 0x028751b02d34f353, 0x06ad6a9787463728, + 0x85a1f65867438a09, 0x80f01e0588afc9a0, 0x2707da74c7cd604d, + 0x93a87cf6308ad888, 0x5aa9fe301ce7bdf7, 0x12c3eb59b289fbae, + 0x5c5a93099cf51695); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x11, 0xd2, 0x6a, 0xcc, 0x14, 0xe4, 0x2c, 0x7f, 0xd2, 0x6b, 0x34, + 0x5c, 0x75, 0xdd, 0x0c, 0x42); + VLOAD_8(v3, 0x77, 0xb2, 0xd1, 0x95, 0x6f, 0xbe, 0x0d, 0x5a, 0x93, 0x02, 0xaf, + 0xfd, 0x94, 0xe0, 0xb7, 0xe6); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vremu.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0x20, 0, 0x37, 0, 0x26, 0, 0x25, 0, 0x01, 0, 0x5c, 0, 0xdd, + 0, 0x42); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xf77a, 0x54d7, 0xe527, 0xe28f, 0x53ed, 0x9301, 0xde4f, 0xcb17, + 0xae43, 0x9e4a, 0xa0c2, 0xdf31, 0xb66f, 0x286d, 0x1d15, 0x0480); + VLOAD_16(v6, 0x5bfa, 0x0571, 0x8a43, 0x6350, 0xb962, 0x71fc, 0x0b54, 0x1e8b, + 0x6c25, 0x9c0d, 0x5950, 0x1887, 0xbc18, 0x628e, 0x6561, 0x407f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vremu.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0x0338, 0, 0x1bef, 0, 0x2105, 0, 0x13d5, 0, 0x023d, 0, + 0x0272, 0, 0x286d, 0, 0x0480); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x647d8841, 0xf9e0aabf, 0xea4aa122, 0xd6178d3e, 0x64a7afe5, + 0xe0350cba, 0xc72768ec, 0x9f977a31, 0x5e1c2ac4, 0xcd44b950, + 0x39dc32f4, 0x1dc82ea3, 0xd1cf125f, 0xc677269c, 0x6405ec5b, + 0x653a05ee); + VLOAD_32(v12, 0x89828d99, 0x5c7c7db0, 0x2911efb6, 0x1f6982ff, 0x564e4bd4, + 0xc4576bff, 0x8e998104, 0x4a23ba44, 0x994b4630, 0x017ee935, + 0xa38c7dae, 0x893dfb15, 0x4969125f, 0x9a951d27, 0x09b6017f, + 0x5a0a7906); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vremu.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0x40e7af5f, 0, 0x199e7b44, 0, 0x1bdda0bb, 0, 0x0b5005a9, 0, + 0x0059ebf3, 0, 0x1dc82ea3, 0, 0x2be20975, 0, 0x0b2f8ce8); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x09ab27501ccac4a6, 0x97eb5bf189b39a0e, 0x26f588069b0858c4, + 0x9a251c274a394df3, 0x54b3587602f8d9d2, 0xc3cc623deda95ca7, + 0x118c4335397980bf, 0xc3e2d283cb39133d, 0x71837e24114813fc, + 0x85a1f65867438a09, 0x80f01e0588afc9a0, 0x60e89a1e5a43d9f5, + 0x93a87cf6308ad888, 0xca3976f49ac6a681, 0xcfc7c8f225b47766, + 0xeaa4ce2cf507b527); + VLOAD_64(v24, 0x9fed81c550326301, 0x445bb7ac18d0eaa1, 0x040f8ff58f5adf72, + 0xafc4ff6b8eb4d201, 0xfba36cabfc3fb4a0, 0x9c3ed271bf173d29, + 0xe8b7e325c9ff594b, 0x05169e56693600d7, 0x08e72c4bb62ad267, + 0xbd9677ee996d5fa5, 0x900295e8502a9817, 0x39e0bfa9927679a8, + 0xdd0ca7797d532524, 0x6f8f78c47ddee88a, 0x2f40f7661cca9eee, + 0x8e4a3b2358129e92); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vremu.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0x0f33ec995811c4cc, 0, 0x9a251c274a394df3, 0, + 0x278d8fcc2e921f7e, 0, 0x028751b02d34f353, 0, 0x85a1f65867438a09, 0, + 0x2707da74c7cd604d, 0, 0x5aa9fe301ce7bdf7, 0, 0x5c5a93099cf51695); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x98, 0x1a, 0xbe, 0x48, 0x7c, 0xd9, 0x5e, 0x58, 0x2e, 0x46, 0x0c, + 0x24, 0xc5, 0x2b, 0x37, 0xbe); + uint64_t scalar = 5; + asm volatile("vremu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0x02, 0x01, 0x00, 0x02, 0x04, 0x02, 0x04, 0x03, 0x01, 0x00, + 0x02, 0x01, 0x02, 0x03, 0x00, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xf11f, 0xb8cd, 0xb686, 0xc226, 0xc35a, 0xd724, 0x03f1, 0xcf10, + 0xbae0, 0x9f01, 0x1d0f, 0xf53c, 0x5461, 0x341e, 0x9ae7, 0x032b); + scalar = 538; + asm volatile("vremu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0x018b, 0x01f7, 0x01ca, 0x00ce, 0x0202, 0x00c8, 0x01d7, + 0x011c, 0x01f0, 0x0163, 0x01bd, 0x0174, 0x0051, 0x01ae, 0x017d, + 0x0111); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x9c36da54, 0x1b1dea93, 0x80be8651, 0x03a23fcf, 0x26973d17, + 0x521f01df, 0x09e8f77a, 0x5b231aa2, 0xd4bea1df, 0x529b4f34, + 0x800a5d88, 0xe7b02512, 0xf7954032, 0x48652b8c, 0x8b14b883, + 0x121a9b8b); + scalar = 649; + asm volatile("vremu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0x00000039, 0x00000141, 0x0000020b, 0x0000015f, 0x0000008a, + 0x00000199, 0x00000214, 0x0000006c, 0x0000025d, 0x000001a6, + 0x000000d2, 0x00000168, 0x000001e6, 0x00000266, 0x00000188, + 0x00000159); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x1882c5f4b911b949, 0x6ca37133428ed155, 0xbacb9408aa8251bf, + 0x62d79deed97681f5, 0x56258335e007492c, 0x2428afa90a14fa61, + 0xd62824119c3084c6, 0xef97986ae9ea2da7, 0xfc28c84e37024f10, + 0x1f475f820dec67e1, 0x9c180cfef468c050, 0x4be017933813e27e, + 0xafd2b5edb83df693, 0xddd4766a628d4c30, 0xa1f4d0f48a6ac917, + 0x827a07db9e6a8897); + scalar = 9223; + asm volatile("vremu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0x000000000000167d, 0x00000000000015f2, 0x00000000000019be, + 0x00000000000003fd, 0x00000000000010ce, 0x0000000000001863, + 0x0000000000000750, 0x0000000000000062, 0x0000000000002237, + 0x00000000000002bc, 0x0000000000000061, 0x0000000000001b82, + 0x0000000000001109, 0x0000000000000fb7, 0x00000000000011e8, + 0x0000000000000545); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x98, 0x1a, 0xbe, 0x48, 0x7c, 0xd9, 0x5e, 0x58, 0x2e, 0x46, 0x0c, + 0x24, 0xc5, 0x2b, 0x37, 0xbe); + uint64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vremu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0x01, 0, 0x02, 0, 0x02, 0, 0x03, 0, 0x00, 0, 0x01, 0, 0x03, + 0, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xf11f, 0xb8cd, 0xb686, 0xc226, 0xc35a, 0xd724, 0x03f1, 0xcf10, + 0xbae0, 0x9f01, 0x1d0f, 0xf53c, 0x5461, 0x341e, 0x9ae7, 0x032b); + scalar = 538; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vremu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0x01f7, 0, 0x00ce, 0, 0x00c8, 0, 0x011c, 0, 0x0163, 0, + 0x0174, 0, 0x01ae, 0, 0x0111); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x9c36da54, 0x1b1dea93, 0x80be8651, 0x03a23fcf, 0x26973d17, + 0x521f01df, 0x09e8f77a, 0x5b231aa2, 0xd4bea1df, 0x529b4f34, + 0x800a5d88, 0xe7b02512, 0xf7954032, 0x48652b8c, 0x8b14b883, + 0x121a9b8b); + scalar = 649; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vremu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0x00000141, 0, 0x0000015f, 0, 0x00000199, 0, 0x0000006c, + 0, 0x000001a6, 0, 0x00000168, 0, 0x00000266, 0, 0x00000159); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x1882c5f4b911b949, 0x6ca37133428ed155, 0xbacb9408aa8251bf, + 0x62d79deed97681f5, 0x56258335e007492c, 0x2428afa90a14fa61, + 0xd62824119c3084c6, 0xef97986ae9ea2da7, 0xfc28c84e37024f10, + 0x1f475f820dec67e1, 0x9c180cfef468c050, 0x4be017933813e27e, + 0xafd2b5edb83df693, 0xddd4766a628d4c30, 0xa1f4d0f48a6ac917, + 0x827a07db9e6a8897); + scalar = 9223; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vremu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0x00000000000015f2, 0, 0x00000000000003fd, 0, + 0x0000000000001863, 0, 0x0000000000000062, 0, 0x00000000000002bc, 0, + 0x0000000000001b82, 0, 0x0000000000000fb7, 0, 0x0000000000000545); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrgather.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrgather.c new file mode 100644 index 000000000..7df4dc498 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrgather.c @@ -0,0 +1,72 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(5, e8, m1); + VLOAD_8(v4, 10, 20, 30, 40, 50); + VLOAD_8(v6, 1, 0, 4, 3, 2); + __asm__ volatile("vrgather.vv v2, v4, v6"); + VEC_CMP_8(1, v2, 20, 10, 50, 40, 30); +} + +void TEST_CASE2() { + VSET(5, e8, m1); + VLOAD_8(v4, 10, 20, 30, 40, 50); + VLOAD_8(v6, 1, 0, 4, 3, 2); + VLOAD_U8(v0, 26, 0, 0, 0, 0); + CLEAR(v2); + __asm__ volatile("vrgather.vv v2, v4, v6, v0.t"); + VEC_CMP_8(2, v2, 0, 10, 0, 40, 30); +} + +void TEST_CASE3() { + VSET(5, e8, m1); + VLOAD_8(v4, 10, 20, 30, 40, 50); + uint64_t scalar = 3; + __asm__ volatile("vrgather.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VEC_CMP_8(3, v2, 40, 40, 40, 40, 40); +} + +void TEST_CASE4() { + VSET(5, e8, m1); + VLOAD_8(v4, 10, 20, 30, 40, 50); + uint64_t scalar = 3; + VLOAD_U8(v0, 7, 0, 0, 0, 0); + CLEAR(v2); + __asm__ volatile("vrgather.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VEC_CMP_8(4, v2, 40, 40, 40, 0, 0); +} + +void TEST_CASE5() { + VSET(5, e8, m1); + VLOAD_8(v4, 10, 20, 30, 40, 50); + __asm__ volatile("vrgather.vi v2, v4, 3"); + VEC_CMP_8(5, v2, 40, 40, 40, 40, 40); +} + +void TEST_CASE6() { + VSET(5, e8, m1); + VLOAD_8(v4, 10, 20, 30, 40, 50); + VLOAD_U8(v0, 7, 0, 0, 0, 0); + CLEAR(v2); + __asm__ volatile("vrgather.vi v2, v4, 3, v0.t"); + VEC_CMP_8(6, v2, 40, 40, 40, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrsub.c new file mode 100644 index 000000000..2cab8099b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrsub.c @@ -0,0 +1,136 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vi v3, v1, 10"); + VCMP_U8(1, v3, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, + -25, -30); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vi v4, v2, 10"); + VCMP_U16(2, v4, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, + -25, -30); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vi v8, v4, 10"); + VCMP_U32(3, v8, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, + -25, -30); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vi v16, v8, 10"); + VCMP_U64(4, v16, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, + -25, -30); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v3); + asm volatile("vrsub.vi v3, v1, 10, v0.t"); + VCMP_U8(5, v3, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v4); + asm volatile("vrsub.vi v4, v2, 10, v0.t"); + VCMP_U16(6, v4, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v8); + asm volatile("vrsub.vi v8, v4, 10, v0.t"); + VCMP_U32(7, v8, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v16); + asm volatile("vrsub.vi v16, v8, 10, v0.t"); + VCMP_U64(8, v16, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); +} + +void TEST_CASE3(void) { + const uint64_t scalar = 25; + + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v3, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, + -15); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v4, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, + -15); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, + -15); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v16, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, + -15); +} + +void TEST_CASE4(void) { + const uint64_t scalar = 25; + + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v3); + asm volatile("vrsub.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v3, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v4); + asm volatile("vrsub.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v4, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v8); + asm volatile("vrsub.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v16); + asm volatile("vrsub.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v16, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vs.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vs.c new file mode 100644 index 000000000..d75926e2b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vs.c @@ -0,0 +1,93 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +#define AXI_DWIDTH 128 + +static volatile uint8_t ALIGNED_O8[16] __attribute__((aligned(AXI_DWIDTH))) = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +static volatile uint16_t ALIGNED_O16[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}; + +static volatile uint32_t ALIGNED_O32[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000}; + +static volatile uint64_t ALIGNED_O64[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000}; + +// Misaligned access wrt 128-bit +void TEST_CASE1(void) { + VSET(15, e8, m1); + VLOAD_8(v1, 0xe4, 0x19, 0x20, 0x9f, 0xe4, 0x19, 0x20, 0x9f, 0xe4, 0x19, 0x20, + 0x9f, 0xe4, 0x19, 0x20); + asm volatile("vse8.v v1, (%0)" ::"r"(&ALIGNED_O8[1])); + VVCMP_U8(1, ALIGNED_O8, 0x00, 0xe4, 0x19, 0x20, 0x9f, 0xe4, 0x19, 0x20, 0x9f, + 0xe4, 0x19, 0x20, 0x9f, 0xe4, 0x19, 0x20); +} + +void TEST_CASE2(void) { + VSET(15, e16, m2); + VLOAD_16(v2, 0xe478, 0x1549, 0x3240, 0x2f11, 0xe448, 0x1546, 0x3220, 0x9f11, + 0xe478, 0x1549, 0x3240, 0x2f11, 0xe448, 0x1546, 0x3220); + asm volatile("vse16.v v2, (%0)" ::"r"(&ALIGNED_O16[1])); + VVCMP_U16(2, ALIGNED_O16, 0x0000, 0xe478, 0x1549, 0x3240, 0x2f11, 0xe448, + 0x1546, 0x3220, 0x9f11, 0xe478, 0x1549, 0x3240, 0x2f11, 0xe448, + 0x1546, 0x3220); +} + +void TEST_CASE3(void) { + VSET(15, e32, m4); + VLOAD_32(v4, 0xe1356784, 0x13241139, 0x20862497, 0x9f872456, 0xe1356784, + 0x13241139, 0x20862497, 0x9f872456, 0xe1356784, 0x13241139, + 0x20862497, 0x9f872456, 0xe1356784, 0x13241139, 0x20862497); + asm volatile("vse32.v v4, (%0)" ::"r"(&ALIGNED_O32[1])); + VVCMP_U32(3, ALIGNED_O32, 0x00000000, 0xe1356784, 0x13241139, 0x20862497, + 0x9f872456, 0xe1356784, 0x13241139, 0x20862497, 0x9f872456, + 0xe1356784, 0x13241139, 0x20862497, 0x9f872456, 0xe1356784, + 0x13241139, 0x20862497); +} + +void TEST_CASE4(void) { + VSET(15, e64, m8); + VLOAD_64(v8, 0xe135578794246784, 0x1315345345241139, 0x2086252110062497, + 0x1100229933847136, 0xaaffaaffaaffaaff, 0xaf87245315434136, + 0xa135578794246784, 0x2315345345241139, 0x1086252110062497, + 0x1100229933847134, 0xaaffaaffaaffaaf4, 0x9315345345241139, + 0x9086252110062497, 0x9100229933847134, 0x9affaaffaaffaaf4); + asm volatile("vse64.v v8, (%0)" ::"r"(&ALIGNED_O64[1])); + VVCMP_U64(4, ALIGNED_O64, 0x0000000000000000, 0xe135578794246784, + 0x1315345345241139, 0x2086252110062497, 0x1100229933847136, + 0xaaffaaffaaffaaff, 0xaf87245315434136, 0xa135578794246784, + 0x2315345345241139, 0x1086252110062497, 0x1100229933847134, + 0xaaffaaffaaffaaf4, 0x9315345345241139, 0x9086252110062497, + 0x9100229933847134, 0x9affaaffaaffaaf4); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vs1r.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vs1r.c new file mode 100644 index 000000000..1374f02f8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vs1r.c @@ -0,0 +1,162 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// +// For simplicity, this test depends on vl1r + +#include "vector_macros.h" + +uint64_t counter; + +// Maximum size: (VLEN/8 Bytes * (MAX_LMUL == 8)) = VLEN +// Define VLEN before compiling me +// #define VLEN 128 +uint8_t gold_vec_8b[VLEN]; +uint8_t zero_vec_8b[VLEN]; +uint8_t buf_vec_8b[VLEN]; + +////////// +// vs1r // +////////// + +// 1 whole register load +void TEST_CASE1(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 8); + // Set vl and vtype to super short values + VSET(1, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Check that the whole register was loaded + asm volatile("vs1r.v v16, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 0, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + asm volatile("vs1r.v v17, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 0, buf_vec_8b, zero_vec_8b, VLEN / 8); +} + +////////// +// vs2r // +////////// + +// 2 whole registers load +void TEST_CASE2(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 4); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 4); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 4); + // Set vl and vtype to super short values + VSET(1, e64, m4); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl2re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Check that the whole register was loaded + asm volatile("vs2r.v v16, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 1, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + asm volatile("vs2r.v v18, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 1, buf_vec_8b, zero_vec_8b, VLEN / 8); +} + +////////// +// vs4r // +////////// + +// 4 whole registers load +void TEST_CASE3(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 2); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 2); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 2); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl4re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Check that the whole register was loaded + asm volatile("vs4r.v v16, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 2, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + asm volatile("vs4r.v v20, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 2, buf_vec_8b, zero_vec_8b, VLEN / 8); +} + +////////// +// vs8r // +////////// + +// 8 whole registers load +void TEST_CASE4(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + VCLEAR(v24); + // Load a buffer from memory - whole register load + asm volatile("vl8re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Check that the whole register was loaded + asm volatile("vs8r.v v16, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 3, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + asm volatile("vs8r.v v24, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 3, buf_vec_8b, zero_vec_8b, VLEN / 8); +} + +//////////// +// Others // +//////////// + +// Check with initial vl == 0 +void TEST_CASE5(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 8); + // Set vl and vtype to super short values + VSET(0, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Check that the whole register was loaded + asm volatile("vs1r.v v16, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 4, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + asm volatile("vs1r.v v17, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 4, buf_vec_8b, zero_vec_8b, VLEN / 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsadd.c new file mode 100644 index 000000000..cd224c730 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsadd.c @@ -0,0 +1,99 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + uint64_t vxsat; + VSET(4, e8, m1); + VLOAD_8(v1, -80, 2, 100, 4); + VLOAD_8(v2, -90, 2, 50, 4); + __asm__ volatile("vsadd.vv v3, v1, v2" ::); + VCMP_U8(1, v3, 0x80, 4, 127, 8); + read_vxsat(vxsat); + check_vxsat(1, vxsat, 1); + reset_vxsat; +} + +void TEST_CASE2(void) { + uint64_t vxsat; + VSET(4, e8, m1); + VLOAD_8(v1, -80, 2, 100, 4); + VLOAD_8(v2, -90, 2, 50, 4); + VLOAD_8(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsadd.vv v3, v1, v2, v0.t" ::); + VCMP_U8(2, v3, 0, 4, 0, 8); + read_vxsat(vxsat); + check_vxsat(2, vxsat, 0); + reset_vxsat; +} + +void TEST_CASE3(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 1, 0x7FFFFFFB, 3, 4); + __asm__ volatile("vsadd.vi v3, v1, 5" ::); + VCMP_U32(3, v3, 6, 0x7FFFFFFF, 8, 9); + read_vxsat(vxsat); + check_vxsat(3, vxsat, 1); + reset_vxsat; +} + +// Dont use VCLEAR here, it results in a glitch where are values are off by 1 +void TEST_CASE4(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 1, 2, 0xFFFFFFFD, 0x7FFFFFFC); + VLOAD_32(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsadd.vi v3, v1, 5, v0.t" ::); + VCMP_U32(4, v3, 0, 7, 0, 0x7FFFFFFF); + read_vxsat(vxsat); + check_vxsat(4, vxsat, 1); + reset_vxsat; +} + +void TEST_CASE5(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 0x7FFFFFFD, 2, 3, 4); + const uint32_t scalar = 5; + __asm__ volatile("vsadd.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U32(5, v3, 0x7FFFFFFF, 7, 8, 9); + read_vxsat(vxsat); + check_vxsat(5, vxsat, 1); + reset_vxsat; +} + +// Dont use VCLEAR here, it results in a glitch where are values are off by 1 +void TEST_CASE6(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 1, 0x7ffffffC, 3, 4); + const uint32_t scalar = 5; + VLOAD_32(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsadd.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(6, v3, 0, 0x7FFFFFFF, 0, 9); + read_vxsat(vxsat); + check_vxsat(6, vxsat, 1); + reset_vxsat; +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsaddu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsaddu.c new file mode 100644 index 000000000..f2fd3d03d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsaddu.c @@ -0,0 +1,113 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + uint64_t vxsat; + VSET(4, e8, m1); + VLOAD_8(v1, 133, 2, 220, 4); + VLOAD_8(v2, 133, 2, 50, 4); + __asm__ volatile("vsaddu.vv v3, v1, v2" ::); + VCMP_U8(1, v3, 255, 4, 255, 8); + read_vxsat(vxsat); + check_vxsat(1, vxsat, 1); + reset_vxsat; +} + +void TEST_CASE2(void) { + uint64_t vxsat; + VSET(4, e8, m1); + VLOAD_8(v1, 1, 2, 3, 154); + VLOAD_8(v2, 1, 2, 3, 124); + VLOAD_8(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsaddu.vv v3, v1, v2, v0.t" ::); + VCMP_U8(2, v3, 0, 4, 0, 255); + read_vxsat(vxsat); + check_vxsat(2, vxsat, 1); + reset_vxsat; +} + +void TEST_CASE3(void) { + uint64_t vxsat; + VSET(4, e32, m2); + VLOAD_32(v2, 1, 0xFFFFFFFB, 3, 4); + __asm__ volatile("vsaddu.vi v6, v2, 5" ::); + VCMP_U32(3, v6, 6, 0xFFFFFFFF, 8, 9); + read_vxsat(vxsat); + check_vxsat(3, vxsat, 1); + reset_vxsat; +} + +// Dont use VCLEAR here, it results in a glitch where are values are off by 1 +void TEST_CASE4(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 1, 2, 0xFFFFFFFD, 0xFFFFFFFC); + VLOAD_32(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsaddu.vi v3, v1, 5, v0.t" ::); + VCMP_U32(4, v3, 0, 7, 0, 0xFFFFFFFF); + read_vxsat(vxsat); + check_vxsat(4, vxsat, 1); + reset_vxsat; +} + +void TEST_CASE5(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 0xFFFFFFFD, 2, 3, 4); + const uint32_t scalar = 5; + __asm__ volatile("vsaddu.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U32(5, v3, 0xFFFFFFFF, 7, 8, 9); + read_vxsat(vxsat); + check_vxsat(5, vxsat, 1); + reset_vxsat; +} + +// Dont use VCLEAR here, it results in a glitch where are values are off by 1 +void TEST_CASE6(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 1, 0xfffffffC, 3, 4); + const uint32_t scalar = 5; + VLOAD_32(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsaddu.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(6, v3, 0, 0xFFFFFFFF, 0, 9); + read_vxsat(vxsat); + check_vxsat(6, vxsat, 1); + reset_vxsat; +} + +void TEST_CASE7(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 1, 0x0000FFFF, 3, 4); + VLOAD_32(v2, 0xA, 0xFFFF0000, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsaddu.vv v3, v1, v2" ::); + VCMP_U32(7, v3, 0xB, 0xFFFFFFFF, 3, 4); + read_vxsat(vxsat); + check_vxsat(7, vxsat, 0); + reset_vxsat; +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsbc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsbc.c new file mode 100644 index 000000000..09d61e8b7 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsbc.c @@ -0,0 +1,76 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vvm v3, v1, v2, v0"); + VCMP_U8(1, v3, -7, -6, -3, -2, 1, 2, 5, 6, 0, -1, 0, -1, 0, -1, 0, -1); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vvm v6, v2, v4, v0"); + VCMP_U16(2, v6, -7, -6, -3, -2, 1, 2, 5, 6, 0, -1, 0, -1, 0, -1, 0, -1); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vvm v12, v4, v8, v0"); + VCMP_U32(3, v12, -7, -6, -3, -2, 1, 2, 5, 6, 0, -1, 0, -1, 0, -1, 0, -1); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vvm v24, v8, v16, v0"); + VCMP_U64(4, v24, -7, -6, -3, -2, 1, 2, 5, 6, 0, -1, 0, -1, 0, -1, 0, -1); +}; + +void TEST_CASE2(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vxm v3, v1, %[A], v0" ::[A] "r"(scalar)); + VCMP_U8(5, v3, -4, -4, -2, -2, 0, 0, 2, 2, -4, -4, -2, -2, 0, 0, 2, 2); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vxm v4, v2, %[A], v0" ::[A] "r"(scalar)); + VCMP_U16(6, v4, -4, -4, -2, -2, 0, 0, 2, 2, -4, -4, -2, -2, 0, 0, 2, 2); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vxm v8, v4, %[A], v0" ::[A] "r"(scalar)); + VCMP_U32(7, v8, -4, -4, -2, -2, 0, 0, 2, 2, -4, -4, -2, -2, 0, 0, 2, 2); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vxm v16, v8, %[A], v0" ::[A] "r"(scalar)); + VCMP_U64(8, v16, -4, -4, -2, -2, 0, 0, 2, 2, -4, -4, -2, -2, 0, 0, 2, 2); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse1.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse1.c new file mode 100644 index 000000000..70f5ca3a0 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse1.c @@ -0,0 +1,57 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// +// For simplicity, this test depends on vle1 + +#include "vector_macros.h" + +#define AXI_DWIDTH 128 + +static volatile uint8_t ALIGNED_I8_GOLD[16] + __attribute__((aligned(AXI_DWIDTH))) = {0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, + 0x89, 0x88, 0x88, 0xae, 0x08, 0x91, + 0x02, 0x59, 0x11, 0x89}; + +static volatile uint8_t ALIGNED_I8_BUF[16] + __attribute__((aligned(AXI_DWIDTH))) = {0x00, 0x00, 0x00, 0x0, 0x00, 0x00, + 0x00, 0x0, 0x00, 0x00, 0x00, 0x0, + 0x00, 0x00, 0x00, 0x0}; + +void TEST_CASE1(void) { + VSET(16, e8, m1); + asm volatile("vle1.v v0, (%0)" ::"r"(ALIGNED_I8_GOLD)); + asm volatile("vse1.v v0, (%0)" ::"r"(ALIGNED_I8_BUF)); + VMCMP(uint8_t, % hhu, 1, ALIGNED_I8_BUF, ALIGNED_I8_GOLD, 2); + + VSET(13, e8, m1); + asm volatile("vle1.v v0, (%0)" ::"r"(ALIGNED_I8_GOLD)); + asm volatile("vse1.v v0, (%0)" ::"r"(ALIGNED_I8_BUF)); + VMCMP(uint8_t, % hhu, 2, ALIGNED_I8_BUF, ALIGNED_I8_GOLD, 2); + + VSET(13, e64, m1); + asm volatile("vle1.v v0, (%0)" ::"r"(ALIGNED_I8_GOLD)); + asm volatile("vse1.v v0, (%0)" ::"r"(ALIGNED_I8_BUF)); + VMCMP(uint8_t, % hhu, 3, ALIGNED_I8_BUF, ALIGNED_I8_GOLD, 2); + + VSET(13, e64, m8); + asm volatile("vle1.v v0, (%0)" ::"r"(ALIGNED_I8_GOLD)); + asm volatile("vse1.v v0, (%0)" ::"r"(ALIGNED_I8_BUF)); + VMCMP(uint8_t, % hhu, 4, ALIGNED_I8_BUF, ALIGNED_I8_GOLD, 2); + + VSET(13, e64, m8); + asm volatile("vle1.v v1, (%0)" ::"r"(ALIGNED_I8_GOLD)); + asm volatile("vse1.v v1, (%0)" ::"r"(ALIGNED_I8_BUF)); + VMCMP(uint8_t, % hhu, 5, ALIGNED_I8_BUF, ALIGNED_I8_GOLD, 2); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse16.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse16.c new file mode 100644 index 000000000..c5d9b06a7 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse16.c @@ -0,0 +1,357 @@ +// TODO uncomment TEST_CASE13 and TEST_CASE 15 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved +#include "long_array.h" +#include "vector_macros.h" + +#define AXI_DWIDTH 128 +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +void reset_vec16(volatile uint16_t *vec) { + for (uint64_t i = 0; i < 1024; ++i) vec[i] = 0; +} + +static volatile uint16_t ALIGNED_I16[1024] __attribute__((aligned(AXI_DWIDTH))); + +//**********Checking functionality of vse16 ********// +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v0, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + asm volatile("vse16.v v0, (%0)" ::"r"(ALIGNED_I16)); + VVCMP_U16(1, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, + 0x1111, 0x1989); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + reset_vec16(ALIGNED_I16); + VSET(16, e16, m2); + VLOAD_16(v1, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VSET(16, e16, m2); + asm volatile("vse16.v v1, (%0)" ::"r"(ALIGNED_I16)); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vse16 with different values of masking +// register******// +void TEST_CASE3(void) { + reset_vec16(ALIGNED_I16); + VSET(16, e16, m2); + VLOAD_16(v6, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vse16.v v6, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VVCMP_U16(3, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, + 0x1111, 0x1989); +} + +void TEST_CASE4(void) { + VSET(16, e16, m2); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v6, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VLOAD_16(v6, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vse16.v v6, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VVCMP_U16(4, ALIGNED_I16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE5(void) { + VSET(16, e16, m2); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v6, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VLOAD_16(v6, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vse16.v v6, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VVCMP_U16(5, ALIGNED_I16, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, + 0x11ae, 11, 0x4891, 13, 0x8759, 15, 0x1989); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + uint64_t avl; + VSET(16, e16, m2); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v8, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_16(v8, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + __asm__ volatile("vsetivli %[A], 12, e16, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vse16.v v8, (%0),v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v8); + VVCMP_U16(6, ALIGNED_I16, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, + 0x11ae, 11, 0x4891, 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + reset_vec16(ALIGNED_I16); + uint64_t avl; + VSET(16, e16, m2); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v8, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_16(v8, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + __asm__ volatile("vsetivli %[A], 12, e16, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vse16.v v8, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v8); + VVCMP_U16(7, ALIGNED_I16, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, + 0x11ae, 11, 0x4891, 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + reset_vec16(ALIGNED_I16); + uint64_t avl; + VSET(16, e8, m1); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v4, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_16(v4, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + __asm__ volatile("vsetivli %[A], 12, e16, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse16.v v4, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v4); + VVCMP_U16(8, ALIGNED_I16, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, + 0x11ae, 11, 0x4891, 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v4, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_16(v4, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + __asm__ volatile("vsetivli %[A], 12, e16, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vse16.v v4, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v4); + VVCMP_U16(9, ALIGNED_I16, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, + 0x11ae, 11, 0x4891, 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case cover corner case for EEW = 16.If LMUL is changed to +// mf8 it will give error because emul become less than 1/8 (EMUL = 1/16) +// But it does not support this configuration because SEW/LMUL > ELEN +void TEST_CASE10(void) { + VSET(16, e16, m2); + VLOAD_16(v6, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VSET(2, e32, mf2); + asm volatile("vse16.v v6, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VVCMP_U16(10, ALIGNED_I16, 0x05e0, 0xbbd3); +} + +// This test case execute upper bound case of EMUL (8) +// If LMUL is changed to m8 it will give error because emul become greater than +// 8 (EMUL = 16) +void TEST_CASE11(void) { + reset_vec16(ALIGNED_I16); + VSET(16, e16, m1); + VLOAD_16(v8, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VSET(16, e8, m4); + asm volatile("vse16.v v8, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v8); + VVCMP_U16(11, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, + 0x1111, 0x1989); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE12(void) { + reset_vec16(ALIGNED_I16); + VSET(16, e16, m2); + VLOAD_16(v6, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + asm volatile("vse16.v v6, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VVCMP_U16(12, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, + 0x1111, 0x1989); +} + +void TEST_CASE13(void) { + uint64_t avl; + VSET(16, e8, m2); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v6, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VLOAD_16(v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + __asm__ volatile("vsetivli %[A], 0, e8, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse16.v v6, (%0)" ::"r"(ALIGNED_I16)); + VSET(16, e16, m2); + VVCMP_U16(13, ALIGNED_I16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE14(void) { + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v12, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v12); + VLOAD_16(v12, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VSET(13, e8, m1); + asm volatile("vse16.v v12, (%0)" ::"r"(ALIGNED_I16)); + VVCMP_U16(14, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE15(void) { + VSET(16, e16, m1); + VLOAD_16(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v7, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v7); + VLOAD_16(v7, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VSET(13, e16, m1); + write_csr(vstart, 2); + asm volatile("vse16.v v7, (%0)" ::"r"(ALIGNED_I16)); + VVCMP_U16(15, ALIGNED_I16, 1, 2, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, + 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 14, 15, 16); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// +void TEST_CASE16(void) { + reset_vec16(ALIGNED_I16); + VSET(1024, e16, m4); + asm volatile("vle16.v v8, (%0)" ::"r"(&LONG_I16[0])); + asm volatile("vse16.v v8, (%0)" ::"r"(ALIGNED_I16)); + LVVCMP_U16(16, ALIGNED_I16, LONG_I16); +} + +void TEST_CASE17(void) { + reset_vec16(ALIGNED_I16); + VSET(512, e16, m2); + asm volatile("vle16.v v10, (%0)" ::"r"(&LONG_I16[0])); + asm volatile("vse16.v v10, (%0)" ::"r"(ALIGNED_I16)); + LVVCMP_U16(17, ALIGNED_I16, LONG_I16); +} + +void TEST_CASE18(void) { + reset_vec16(ALIGNED_I16); + VSET(300, e16, m2); + asm volatile("vle16.v v10, (%0)" ::"r"(&LONG_I16[0])); + asm volatile("vse16.v v10, (%0)" ::"r"(ALIGNED_I16)); + LVVCMP_U16(18, ALIGNED_I16, LONG_I16); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vse16.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + // TEST_CASE13(); + TEST_CASE14(); + // TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse32.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse32.c new file mode 100644 index 000000000..afd7c6427 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse32.c @@ -0,0 +1,408 @@ +// TODO uncomment TEST_CASE13 and TEST_CASE 15 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved +#include "long_array.h" +#include "vector_macros.h" +#define AXI_DWIDTH 128 +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +void reset_vec32(volatile uint32_t *vec) { + for (uint64_t i = 0; i < 1024; ++i) vec[i] = 0; +} + +static volatile uint32_t ALIGNED_I32[1024] __attribute__((aligned(AXI_DWIDTH))); + +//**********Checking functionality of vse32********// +void TEST_CASE1(void) { + VSET(16, e32, m4); + VLOAD_32(v0, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + asm volatile("vse32.v v0, (%0)" ::"r"(ALIGNED_I32)); + VVCMP_U32(1, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, + 0x9fa831c7, 0x38197598, 0x18931795, 0x81937598, 0x18747547, + 0x3eeeeeee, 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, + 0x83195999, 0x89139848); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + reset_vec32(ALIGNED_I32); + VSET(16, e32, m1); + VLOAD_32(v1, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VSET(16, e64, m4); + asm volatile("vse32.v v1, (%0)" ::"r"(ALIGNED_I32)); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vse16 with different values of masking +// register******// +void TEST_CASE3(void) { + reset_vec32(ALIGNED_I32); + VSET(16, e32, m4); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vse32.v v4, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(3, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, + 0x9fa831c7, 0x38197598, 0x18931795, 0x81937598, 0x18747547, + 0x3eeeeeee, 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, + 0x83195999, 0x89139848); +} + +void TEST_CASE4(void) { + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v4, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vse32.v v4, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(4, ALIGNED_I32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE5(void) { + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v4, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vse32.v v4, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(5, ALIGNED_I32, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, + 0x81937598, 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 0x31897598, 15, + 0x89139848); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + reset_vec32(ALIGNED_I32); + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v4, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + __asm__ volatile("vsetivli %[A], 12, e32, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vse32.v v4, (%0),v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(6, ALIGNED_I32, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, + 0x81937598, 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + reset_vec32(ALIGNED_I32); + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v4, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + __asm__ volatile("vsetivli %[A], 12, e32, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vse32.v v4, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(7, ALIGNED_I32, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, + 0x81937598, 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + reset_vec32(ALIGNED_I32); + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v4, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + __asm__ volatile("vsetivli %[A], 12, e32, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse32.v v4, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(8, ALIGNED_I32, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, + 0x81937598, 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + reset_vec32(ALIGNED_I32); + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v4, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + __asm__ volatile("vsetivli %[A], 12, e16, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vse32.v v4, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(9, ALIGNED_I32, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, + 0x81937598, 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case cover corner case for EEW = 32.If LMUL is changed to +// mf8 and SEW is changed to e64 it will give error because emul become less +// than 1/8 (EMUL = 1/16) But it does not support this configuration because +// SEW/LMUL > ELEN +void TEST_CASE10(void) { + reset_vec32(ALIGNED_I32); + VSET(16, e32, m4); + VLOAD_32(v8, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VSET(2, e32, mf2); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v8); + VVCMP_U32(10, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0); +} + +// This test case execute upper bound case of EMUL (8) +// If LMUL is changed to m8 it will give error because emul become greater than +// 8 (EMUL = 16) +void TEST_CASE11(void) { + reset_vec32(ALIGNED_I32); + VSET(16, e32, m4); + VLOAD_32(v8, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VSET(16, e8, m2); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v8); + VVCMP_U32(11, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, + 0x9fa831c7, 0x38197598, 0x18931795, 0x81937598, 0x18747547, + 0x3eeeeeee, 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, + 0x83195999, 0x89139848); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE12(void) { + reset_vec32(ALIGNED_I32); + VSET(16, e32, m4); + VLOAD_32(v8, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v8); + VVCMP_U32(12, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, + 0x9fa831c7, 0x38197598, 0x18931795, 0x81937598, 0x18747547, + 0x3eeeeeee, 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, + 0x83195999, 0x89139848); +} + +void TEST_CASE13(void) { + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v8); + VLOAD_32(v8, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + __asm__ volatile("vsetivli %[A], 0, e32, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VSET(16, e32, m4); + VVCMP_U32(13, ALIGNED_I32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE14(void) { + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v8); + VLOAD_32(v8, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VSET(13, e32, m4); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VSET(16, e32, m4); + VVCMP_U32(14, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, + 0x9fa831c7, 0x38197598, 0x18931795, 0x81937598, 0x18747547, + 0x3eeeeeee, 0x90139301, 0xab8b9148, 0x90318509, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE15(void) { + reset_vec32(ALIGNED_I32); + VSET(16, e32, m1); + VLOAD_32(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v7, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v7); + VLOAD_32(v7, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VSET(13, e32, m1); + write_csr(vstart, 2); + asm volatile("vse32.v v7, (%0)" ::"r"(ALIGNED_I32)); + VVCMP_U32(15, ALIGNED_I32, 1, 2, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 14, 15, 16); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// +void TEST_CASE16(void) { + reset_vec32(ALIGNED_I32); + VSET(1024, e32, m8); + asm volatile("vle32.v v8, (%0)" ::"r"(&LONG_I32[0])); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + LVVCMP_U32(16, ALIGNED_I32, LONG_I32); +} + +void TEST_CASE17(void) { + reset_vec32(ALIGNED_I32); + VSET(512, e32, m4); + asm volatile("vle32.v v12, (%0)" ::"r"(&LONG_I32[0])); + asm volatile("vse32.v v12, (%0)" ::"r"(ALIGNED_I32)); + LVVCMP_U32(17, ALIGNED_I32, LONG_I32); +} + +void TEST_CASE18(void) { + reset_vec32(ALIGNED_I32); + VSET(256, e32, m2); + asm volatile("vle32.v v10, (%0)" ::"r"(&LONG_I32[0])); + asm volatile("vse32.v v10, (%0)" ::"r"(ALIGNED_I32)); + LVVCMP_U32(18, ALIGNED_I32, LONG_I32); +} + +void TEST_CASE19(void) { + reset_vec32(ALIGNED_I32); + VSET(200, e32, m2); + asm volatile("vle32.v v8, (%0)" ::"r"(&LONG_I32[0])); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + LVVCMP_U32(19, ALIGNED_I32, LONG_I32); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vse32.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + // TEST_CASE13(); + TEST_CASE14(); + // TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + TEST_CASE19(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse64.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse64.c new file mode 100644 index 000000000..da857b854 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse64.c @@ -0,0 +1,435 @@ +// TODO uncomment TEST_CASE12 and TEST_CASE 14 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved +#include "long_array.h" +#include "vector_macros.h" + +#define AXI_DWIDTH 128 +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +void reset_vec64(volatile uint64_t *vec) { + for (uint64_t i = 0; i < 1024; ++i) vec[i] = 0; +} + +static volatile uint64_t ALIGNED_I64[1024] __attribute__((aligned(AXI_DWIDTH))); + +//**********Checking functionality of vse64 with different destination +// registers********// +void TEST_CASE1(void) { + VSET(16, e64, m8); + VLOAD_64(v0, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + asm volatile("vse64.v v0, (%0)" ::"r"(ALIGNED_I64)); + VVCMP_U64(1, ALIGNED_I64, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x3819759853987548, 0x1893179501093489, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0x9013930148815808, + 0xab8b914891484891, 0x9031850931584902, 0x3189759837598759, + 0x8319599991911111, 0x8913984898951989); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + reset_vec64(ALIGNED_I64); + VSET(16, e64, m8); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VSET(16, e64, m8); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vse16 with different values of masking +// register******// +void TEST_CASE3(void) { + reset_vec64(ALIGNED_I64); + VSET(16, e64, m8); + VLOAD_64(v16, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vse64.v v16, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v16); + VVCMP_U64(3, ALIGNED_I64, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x3819759853987548, 0x1893179501093489, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0x9013930148815808, + 0xab8b914891484891, 0x9031850931584902, 0x3189759837598759, + 0x8319599991911111, 0x8913984898951989); +} + +void TEST_CASE4(void) { + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v16, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v16); + VLOAD_64(v16, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vse64.v v16, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v16); + VVCMP_U64(4, ALIGNED_I64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE5(void) { + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v16, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v16); + VLOAD_64(v16, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vse64.v v16, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v16); + VVCMP_U64(5, ALIGNED_I64, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, + 11, 0xab8b914891484891, 13, 0x3189759837598759, 15, + 0x8913984898951989); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + reset_vec64(ALIGNED_I64); + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + __asm__ volatile("vsetivli %[A], 12, e64, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vse64.v v8, (%0),v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VVCMP_U64(6, ALIGNED_I64, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, + 11, 0xab8b914891484891, 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + reset_vec64(ALIGNED_I64); + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + __asm__ volatile("vsetivli %[A], 12, e64, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vse64.v v8, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VVCMP_U64(7, ALIGNED_I64, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, + 11, 0xab8b914891484891, 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + reset_vec64(ALIGNED_I64); + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + __asm__ volatile("vsetivli %[A], 12, e64, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse64.v v8, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VVCMP_U64(8, ALIGNED_I64, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, + 11, 0xab8b914891484891, 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + reset_vec64(ALIGNED_I64); + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + __asm__ volatile("vsetivli %[A], 12, e16, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vse64.v v8, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VVCMP_U64(9, ALIGNED_I64, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, + 11, 0xab8b914891484891, 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case cover upper bound of EMUL(8). If LMUL is changed to +// m2 it will give error because emul become greater than 8 (EMUL = 16) +void TEST_CASE10(void) { + VSET(16, e64, m8); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VSET(16, e8, m1); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VVCMP_U64(10, ALIGNED_I64, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x3819759853987548, 0x1893179501093489, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0x9013930148815808, + 0xab8b914891484891, 0x9031850931584902, 0x3189759837598759, + 0x8319599991911111, 0x8913984898951989); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE11(void) { + reset_vec64(ALIGNED_I64); + VSET(16, e64, m8); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VVCMP_U64(11, ALIGNED_I64, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x3819759853987548, 0x1893179501093489, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0x9013930148815808, + 0xab8b914891484891, 0x9031850931584902, 0x3189759837598759, + 0x8319599991911111, 0x8913984898951989); +} + +void TEST_CASE12(void) { + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v6, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + __asm__ volatile("vsetivli %[A], 0, e64, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VSET(16, e64, m8); + VVCMP_U64(12, ALIGNED_I64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE13(void) { + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VSET(13, e64, m8); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VSET(16, e64, m8); + VVCMP_U64(13, ALIGNED_I64, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x3819759853987548, 0x1893179501093489, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0x9013930148815808, + 0xab8b914891484891, 0x9031850931584902, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE14(void) { + reset_vec64(ALIGNED_I64); + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VSET(13, e64, m8); + write_csr(vstart, 2); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VVCMP_U64(14, ALIGNED_I64, 1, 2, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x1893179501093489, + 0x81937598aa819388, 0x1874754791888188, 0x3eeeeeeee33111ae, + 0x9013930148815808, 0xab8b914891484891, 0x9031850931584902, 14, 15, + 16); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// +void TEST_CASE15(void) { + reset_vec64(ALIGNED_I64); + VSET(512, e64, m8); + asm volatile("vle64.v v8, (%0)" ::"r"(&LONG_I64[0])); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + LVVCMP_U64(15, ALIGNED_I64, LONG_I64); +} + +void TEST_CASE16(void) { + reset_vec64(ALIGNED_I64); + VSET(256, e64, m4); + asm volatile("vle64.v v8, (%0)" ::"r"(&LONG_I64[0])); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + LVVCMP_U64(16, ALIGNED_I64, LONG_I64); +} + +void TEST_CASE17(void) { + reset_vec64(ALIGNED_I64); + VSET(128, e64, m2); + asm volatile("vle64.v v8, (%0)" ::"r"(&LONG_I64[0])); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + LVVCMP_U64(17, ALIGNED_I64, LONG_I64); +} + +void TEST_CASE18(void) { + reset_vec64(ALIGNED_I64); + VSET(100, e64, m2); + asm volatile("vle64.v v8, (%0)" ::"r"(&LONG_I64[0])); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + LVVCMP_U64(18, ALIGNED_I64, LONG_I64); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vse64.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + // TEST_CASE12(); + TEST_CASE13(); + // TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse8.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse8.c new file mode 100644 index 000000000..0d74bde5d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse8.c @@ -0,0 +1,331 @@ +// TODO uncomment TEST_CASE12 and TEST_CASE 14 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved +#include "long_array.h" +#include "vector_macros.h" + +#define AXI_DWIDTH 128 +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +void reset_vec8(volatile uint8_t *vec) { + for (uint64_t i = 0; i < 1024; ++i) vec[i] = 0; +} + +static volatile uint8_t ALIGNED_I8[1024] __attribute__((aligned(AXI_DWIDTH))); + +//**********Checking functionality of vse8 ********// +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + asm volatile("vse8.v v0, (%0)" ::"r"(ALIGNED_I8)); + VVCMP_U8(1, ALIGNED_I8, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, + 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + reset_vec8(ALIGNED_I8); + VSET(16, e8, m1); + VLOAD_8(v1, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VSET(16, e16, m4); + asm volatile("vse8.v v1, (%0)" ::"r"(ALIGNED_I8)); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vse8 with different values of masking +// register******// +void TEST_CASE3(void) { + reset_vec8(ALIGNED_I8); + VSET(16, e8, m1); + VLOAD_8(v3, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vse8.v v3, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VVCMP_U8(3, ALIGNED_I8, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, + 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89); +} + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v3, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VLOAD_8(v3, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vse8.v v3, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VVCMP_U8(4, ALIGNED_I8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v3, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VLOAD_8(v3, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vse8.v v3, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VVCMP_U8(5, ALIGNED_I8, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, + 13, 0x59, 15, 0x89); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v4, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_8(v4, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + __asm__ volatile("vsetivli %[A], 12, e8, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vse8.v v4, (%0),v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VVCMP_U8(6, ALIGNED_I8, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, + 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + reset_vec8(ALIGNED_I8); + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v4, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_8(v4, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + __asm__ volatile("vsetivli %[A], 12, e8, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vse8.v v4, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VVCMP_U8(7, ALIGNED_I8, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, + 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + reset_vec8(ALIGNED_I8); + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v4, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_8(v4, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + __asm__ volatile("vsetivli %[A], 12, e8, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse8.v v4, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VVCMP_U8(8, ALIGNED_I8, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, + 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + reset_vec8(ALIGNED_I8); + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v4, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_8(v4, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + __asm__ volatile("vsetivli %[A], 12, e8, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vse8.v v4, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VVCMP_U8(9, ALIGNED_I8, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, + 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case execute lower bound case of EMUL (1/8). If LMUL is changed to +// mf4 or mf8 it will give error because emul become out of range +void TEST_CASE10(void) { + reset_vec8(ALIGNED_I8); + VSET(16, e8, m1); + VLOAD_8(v5, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VSET(2, e32, mf2); + asm volatile("vse8.v v5, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v5); + VVCMP_U8(10, ALIGNED_I8, 0xe0, 0xd3); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE11(void) { + reset_vec8(ALIGNED_I8); + VSET(16, e8, m1); + VLOAD_8(v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + asm volatile("vse8.v v6, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v6); + VVCMP_U8(11, ALIGNED_I8, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, + 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89); +} + +void TEST_CASE12(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v6, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v6); + VLOAD_8(v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + __asm__ volatile("vsetivli %[A], 0, e8, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse8.v v6, (%0)" ::"r"(ALIGNED_I8)); + VSET(16, e8, m1); + VVCMP_U8(12, ALIGNED_I8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE13(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v6, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v6); + VLOAD_8(v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VSET(13, e8, m1); + asm volatile("vse8.v v6, (%0)" ::"r"(ALIGNED_I8)); + VVCMP_U8(13, ALIGNED_I8, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, + 0xae, 0x08, 0x91, 0x02, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE14(void) { + reset_vec8(ALIGNED_I8); + VSET(16, e8, m1); + VLOAD_8(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v7, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v7); + VLOAD_8(v7, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VSET(13, e8, m1); + write_csr(vstart, 2); + asm volatile("vse8.v v7, (%0)" ::"r"(ALIGNED_I8)); + write_csr(vstart, 0); + VVCMP_U8(14, ALIGNED_I8, 1, 2, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, + 0x08, 0x91, 0x02, 14, 15, 16); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// + +void TEST_CASE15(void) { + reset_vec8(ALIGNED_I8); + VSET(1024, e8, m2); + asm volatile("vle8.v v8, (%0)" ::"r"(&LONG_I8[0])); + asm volatile("vse8.v v8, (%0)" ::"r"(ALIGNED_I8)); + LVVCMP_U8(15, ALIGNED_I8, LONG_I8); +} + +void TEST_CASE16(void) { + reset_vec8(ALIGNED_I8); + VSET(800, e8, m2); + asm volatile("vle8.v v8, (%0)" ::"r"(&LONG_I8[0])); + asm volatile("vse8.v v8, (%0)" ::"r"(ALIGNED_I8)); + LVVCMP_U8(16, ALIGNED_I8, LONG_I8); +} +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vse8.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + // TEST_CASE12(); + TEST_CASE13(); + // TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetivli.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetivli.c new file mode 100644 index 000000000..017feaacf --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetivli.c @@ -0,0 +1,466 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +//***********LMUL = 1**********// +void TEST_CASE1(void) { + uint64_t avl, vtype, + vl; // Declaring avl,vtype and vl variables to pass for comparison + uint64_t vlmul = 0; // Setting value of vlmul + uint64_t vsew = 0; // Setting value of vsew + uint64_t vta = 1; // Setting value of vta + uint64_t vma = 1; // Setting value of vma + uint64_t golden_vtype; // Declaring variable to use as a reference value + vtype(golden_vtype, vlmul, vsew, vta, + vma); // Setting up reference variable golden_vtype by assigning + // different fields of configurations + __asm__ volatile("vsetivli %[A], 30, e8, m1, ta, ma" + : [A] "=r"(avl)); // Executing vsetivli instruction + read_vtype(vtype); // Reading vtype CSR + read_vl(vl); // Reading vl CSR + check_vtype_vl( + 1, vtype, golden_vtype, avl, vl, vsew, + vlmul); // Passsing actual values and reference values for comparison +} + +void TEST_CASE2(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 20, e16, m1,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(2, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE3(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],10, e32, m1,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(3, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE4(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],16, e64, m1,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(4, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 2**********// +void TEST_CASE5(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],30, e8, m2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(5, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE6(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],20, e16, m2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(6, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE7(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],10, e32, m2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(7, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE8(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],16, e64, m2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(8, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +/////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 4**********// + +void TEST_CASE9(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],30, e8, m4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(9, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE10(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],20, e16, m4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(10, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE11(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],10, e32, m4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(11, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE12(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],10, e64, m4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(12, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 8**********// + +void TEST_CASE13(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 30, e8, m8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(13, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE14(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 20, e16, m8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(14, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE15(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],10, e32, m8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(15, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE16(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 10, e64, m8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(16, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/8**********// + +void TEST_CASE17(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 10, e8, mf8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(17, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE18(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 10, e16,mf8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(18, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE19(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 5, e32, mf8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(19, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE20(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],7, e64, mf8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(20, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +/////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/4**********// + +void TEST_CASE21(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 10, e8, mf4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(21, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE22(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 10, e16, mf4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(22, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE23(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],5, e32, mf4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(23, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE24(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],15, e64, mf4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(24, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/2**********// + +void TEST_CASE25(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],20, e8, mf2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(25, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE26(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 20, e16, mf2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(26, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE27(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 20, e32, mf2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(27, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE28(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 30, e64, mf2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(28, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("************* Running Test for vsetivli *************\n"); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + TEST_CASE13(); + TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + TEST_CASE19(); + TEST_CASE20(); + TEST_CASE21(); + TEST_CASE22(); + TEST_CASE23(); + TEST_CASE24(); + TEST_CASE25(); + TEST_CASE26(); + TEST_CASE27(); + TEST_CASE28(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvl.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvl.c new file mode 100644 index 000000000..b238ecfc6 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvl.c @@ -0,0 +1,526 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include + +#include "vector_macros.h" +// Define VLEN before compiling me +#define VLEN 128 +//***********LMUL = 1**********// +void TEST_CASE1(void) { + uint64_t vtype, vl; // Setting avl and declaring vtype and vl + // variables to pass for comparison + uint64_t vlmul = 0; // Setting value of vlmul + uint64_t vsew = 0; // Setting value of vsew + uint64_t vta = 1; // Setting value of vta + uint64_t vma = 1; // Setting value of vma + uint64_t golden_vtype; // Declaring variable to use as a reference value + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, + vma); // Setting up reference variable golden_vtype by assigning + // different fields of configurations + __asm__ volatile("vsetvl t0, %[A], %[B]" ::[A] "r"(avl), + [B] "r"(golden_vtype)); // Executing vsetvl instruction + read_vtype(vtype); // Reading vtype CSR + read_vl(vl); // Reading vl CSR + check_vtype_vl( + 1, vtype, golden_vtype, avl, vl, vsew, + vlmul); // Passsing actual values and reference values for comparison +} + +void TEST_CASE2(void) { + uint64_t vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(2, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE3(void) { + uint64_t vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(3, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE4(void) { + uint64_t vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(4, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 2**********// +void TEST_CASE5(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(5, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE6(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(6, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE7(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(7, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE8(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(8, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +/////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 4**********// + +void TEST_CASE9(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(9, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE10(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(10, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE11(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(11, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE12(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(12, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 8**********// + +void TEST_CASE13(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(13, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE14(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(14, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE15(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(15, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE16(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(16, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/8**********// + +void TEST_CASE17(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(17, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE18(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(18, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE19(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(19, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE20(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(20, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +/////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/4**********// + +void TEST_CASE21(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + uint64_t avl = ((VLEN / (8 << vsew)) / 4) - 1; + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(21, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE22(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(22, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE23(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(23, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE24(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(24, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/2**********// + +void TEST_CASE25(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(25, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE26(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma) - 1; + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(26, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE27(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(27, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE28(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(28, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("************* Running Test for vsetvl *************\n"); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + TEST_CASE13(); + TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + TEST_CASE19(); + TEST_CASE20(); + TEST_CASE21(); + TEST_CASE22(); + TEST_CASE23(); + TEST_CASE24(); + TEST_CASE25(); + TEST_CASE26(); + TEST_CASE27(); + TEST_CASE28(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvli.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvli.c new file mode 100644 index 000000000..27689fcd8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvli.c @@ -0,0 +1,528 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include + +#include "vector_macros.h" +// Define VLEN before compiling me +#define VLEN 128 + +//***********LMUL = 1**********// + +//****** SEW = 8 +void TEST_CASE1(void) { + uint64_t vtype, vl; // Setting avl and declaring vtype and vl + // variables to pass for comparison + uint64_t vlmul = 0; // Setting value of vlmul + uint64_t vsew = 0; // Setting value of vsew + uint64_t vta = 1; // Setting value of vta + uint64_t vma = 1; // Setting value of vma + uint64_t golden_vtype; // Declaring variable to use as a reference value + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, + vma); // Setting up reference variable golden_vtype by assigning + // different fields of configurations + __asm__ volatile("vsetvli t0, %[A], e8, m1,ta,ma" ::[A] "r"( + avl)); // Executing vsetvli instruction + read_vtype(vtype); // Reading vtype CSR + read_vl(vl); // Reading vl CSR + check_vtype_vl( + 1, vtype, golden_vtype, avl, vl, vsew, + vlmul); // Passsing actual values and reference values for comparison +} + +//****** SEW = 16 +void TEST_CASE2(void) { + uint64_t vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16, m1,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(2, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE3(void) { + uint64_t vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, m1,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(3, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE4(void) { + uint64_t vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, m1,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(4, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 2**********// +//****** SEW = 8 +void TEST_CASE5(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e8, m2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(5, vtype, golden_vtype, avl, vl, vsew, vlmul); +} +//****** SEW = 16 +void TEST_CASE6(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16, m2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(6, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE7(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, m2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(7, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE8(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, m2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(8, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +/////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 4**********// + +//****** SEW = 8 +void TEST_CASE9(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e8, m4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(9, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 16 +void TEST_CASE10(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16, m4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(10, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE11(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, m4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(11, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE12(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, m4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(12, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 8**********// + +//****** SEW = 8 +void TEST_CASE13(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e8, m8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(13, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 16 +void TEST_CASE14(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16, m8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(14, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE15(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, m8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(15, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE16(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, m8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(16, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/8**********// + +//****** SEW = 8 +void TEST_CASE17(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e8, mf8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(17, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 16 +void TEST_CASE18(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16,mf8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(18, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE19(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, mf8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(19, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE20(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, mf8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(20, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +/////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/4**********// + +//****** SEW = 8 +void TEST_CASE21(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e8, mf4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(21, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 16 +void TEST_CASE22(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16, mf4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(22, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE23(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, mf4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(23, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE24(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, mf4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(24, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/2**********// + +//****** SEW = 8 +void TEST_CASE25(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e8, mf2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(25, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 16 +void TEST_CASE26(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16, mf2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(26, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE27(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, mf2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(27, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE28(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, mf2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(28, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("************* Running Test for vsetvli *************\n"); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + TEST_CASE13(); + TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + TEST_CASE19(); + TEST_CASE20(); + TEST_CASE21(); + TEST_CASE22(); + TEST_CASE23(); + TEST_CASE24(); + TEST_CASE25(); + TEST_CASE26(); + TEST_CASE27(); + TEST_CASE28(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsext.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsext.c new file mode 100644 index 000000000..75b584b65 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsext.c @@ -0,0 +1,106 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(8, e16, m1); + VLOAD_8(v1, 1, 2, -3, -4, 5, 6, -7, -8); + asm volatile("vsext.vf2 v2, v1"); + VCMP_U16(1, v2, 1, 2, -3, -4, 5, 6, -7, -8); + + VSET(16, e32, m1); + VLOAD_16(v1, 1, 2, -3, -4); + asm volatile("vsext.vf2 v0, v1"); + VCMP_U32(2, v0, 1, 2, -3, -4); + + VSET(16, e64, m1); + VLOAD_32(v1, 1, 2); + asm volatile("vsext.vf2 v0, v1"); + VCMP_U64(3, v0, 1, 2); +} + +void TEST_CASE2(void) { + VSET(16, e16, m1); + VLOAD_8(v1, 1, 2, -3, -4, 5, 6, -7, -8); + VLOAD_8(v0, 0xAA); + VCLEAR(v2); + asm volatile("vsext.vf2 v2, v1, v0.t"); + VCMP_U16(4, v2, 0, 2, 0, -4, 0, 6, 0, -8); + + VSET(16, e32, m1); + VLOAD_16(v1, 1, 2, -3, -4); + VLOAD_8(v0, 0x0A); + VCLEAR(v2); + asm volatile("vsext.vf2 v2, v1, v0.t"); + VCMP_U32(5, v2, 0, 2, 0, -4); + + VSET(16, e64, m1); + VLOAD_32(v1, 1, 2); + VLOAD_8(v0, 0x02); + VCLEAR(v2); + asm volatile("vsext.vf2 v2, v1, v0.t"); + VCMP_U64(6, v2, 0, 2); +} + +void TEST_CASE3(void) { + VSET(16, e32, m1); + VLOAD_8(v1, 1, 2, -3, -4); + asm volatile("vsext.vf4 v2, v1"); + VCMP_U32(7, v2, 1, 2, -3, -4); + + VSET(8, e64, m1); + VLOAD_16(v1, 1, 2); + asm volatile("vsext.vf4 v2, v1"); + VCMP_U64(8, v2, 1, 2); +} + +void TEST_CASE4(void) { + VSET(16, e32, m1); + VLOAD_8(v1, 1, 2, -3, -4); + VLOAD_8(v0, 0x0A); + VCLEAR(v2); + asm volatile("vsext.vf4 v2, v1, v0.t"); + VCMP_U32(9, v2, 0, 2, 0, -4); + + VSET(16, e64, m1); + VLOAD_16(v1, 1, 2); + VLOAD_8(v0, 0x02); + VCLEAR(v2); + asm volatile("vsext.vf4 v2, v1, v0.t"); + VCMP_U64(10, v2, 0, 2); +} + +void TEST_CASE5(void) { + VSET(16, e64, m1); + VLOAD_8(v1, 1, 2); + asm volatile("vsext.vf8 v2, v1"); + VCMP_U64(11, v2, 1, 2); +} + +void TEST_CASE6(void) { + VSET(16, e64, m1); + VLOAD_8(v1, 1, 2); + VLOAD_8(v0, 0x02); + VCLEAR(v2); + asm volatile("vsext.vf8 v2, v1, v0.t"); + VCMP_U64(12, v2, 0, 2); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1down.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1down.c new file mode 100644 index 000000000..743640abc --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1down.c @@ -0,0 +1,101 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + uint64_t scalar = 99; + + VSET(32, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1down.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(1, v1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); + + VSET(32, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1down.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(2, v2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); + + VSET(32, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1down.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(3, v4, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1down.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(4, v8, 2, 3, 4, 5, 6, 7, 8, 9, 99); +} + +void TEST_CASE2() { + uint64_t scalar = 99; + + VSET(32, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslide1down.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(5, v1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 99); + + VSET(32, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vslide1down.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(6, v2, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1); + + VSET(32, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslide1down.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(7, v4, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 99); + + VSET(32, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vslide1down.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(8, v8, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1up.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1up.c new file mode 100644 index 000000000..029a17850 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1up.c @@ -0,0 +1,78 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + uint64_t scalar = 99; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1up.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(1, v1, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1up.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(2, v2, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1up.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(3, v4, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1up.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(4, v8, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); +} + +void TEST_CASE2() { + uint64_t scalar = 99; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslide1up.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(5, v1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vslide1up.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(6, v2, 99, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslide1up.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(7, v4, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vslide1up.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(8, v8, 99, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslidedown.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslidedown.c new file mode 100644 index 000000000..8d1d4a1e7 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslidedown.c @@ -0,0 +1,164 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e8, m1); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v1, v2, 3"); + VCMP_U8(1, v1, 4, 5, 6, 7, 8, 9, 10, 11, 12); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e16, m2); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v2, v4, 4"); + VCMP_U16(2, v2, 5, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e32, m4); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v4, v8, 5"); + VCMP_U32(3, v4, 6, 7, 8, 9, 10, 11, 12, 13, 14); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e64, m8); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v8, v16, 6"); + VCMP_U64(4, v8, 7, 8, 9, 10, 11, 12, 13, 14, 15); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e8, m1); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslidedown.vi v1, v2, 3, v0.t"); + VCMP_U8(5, v1, -1, 5, -1, 7, -1, 9, -1, 11, -1); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e16, m2); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v2, v4, 4, v0.t"); + VCMP_U16(6, v2, -1, 6, -1, 8, -1, 10, -1, 12, -1); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e32, m4); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v4, v8, 5, v0.t"); + VCMP_U32(7, v4, -1, 7, -1, 9, -1, 11, -1, 13, -1); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e64, m8); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v8, v16, 6, v0.t"); + VCMP_U64(8, v8, -1, 8, -1, 10, -1, 12, -1, 14, -1); +} + +void TEST_CASE3() { + uint64_t scalar = 3; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e8, m1); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 4, 5, 6, 7, 8, 9, 10, 11, 12); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e16, m2); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 4, 5, 6, 7, 8, 9, 10, 11, 12); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e32, m4); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 4, 5, 6, 7, 8, 9, 10, 11, 12); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + VSET(9, e64, m8); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 4, 5, 6, 7, 8, 9, 10, 11, 12); +} + +void TEST_CASE4() { + uint64_t scalar = 3; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e8, m1); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslidedown.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, -1, 5, -1, 7, -1, 9, -1, 11, -1); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e16, m2); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslidedown.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, -1, 5, -1, 7, -1, 9, -1, 11, -1); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e32, m4); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslidedown.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, -1, 5, -1, 7, -1, 9, -1, 11, -1); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e64, m8); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslidedown.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, -1, 5, -1, 7, -1, 9, -1, 11, -1); +} + +// Corner case: NrLanes divides vl, but the stride requires the operand +// requester to request an additional 64-bit packet per lane, and not only an +// additional 32-bit element per lane. Otherwise, it gets stuck +void TEST_CASE5() { + VSET(32, e32, m8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(9, e32, m8); + VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v8, v16, 7"); + VCMP_U32(17, v8, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslideup.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslideup.c new file mode 100644 index 000000000..a33aae101 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslideup.c @@ -0,0 +1,166 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v1, v2, 3"); + VCMP_U8(1, v1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v2, v4, 4"); + VCMP_U16(2, v2, -1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v4, v8, 5"); + VCMP_U32(3, v4, -1, -1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v8, v16, 6"); + VCMP_U64(4, v8, -1, -1, -1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslideup.vi v1, v2, 3, v0.t"); + VCMP_U8(5, v1, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v2, v4, 4, v0.t"); + VCMP_U16(6, v2, -1, -1, -1, -1, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v4, v8, 5, v0.t"); + VCMP_U32(7, v4, -1, -1, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v8, v16, 6, v0.t"); + VCMP_U64(8, v8, -1, -1, -1, -1, -1, -1, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10); +} + +void TEST_CASE3() { + uint64_t scalar = 3; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v2, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v4, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); +} + +void TEST_CASE4() { + uint64_t scalar = 3; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslideup.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslideup.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslideup.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vslideup.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); +} + +// Stress the masked VSLIDEUP to enforce that the used mask bit indices should +// follow the output vector element indices and not the input ones +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0x80); + asm volatile("vslideup.vi v1, v2, 3, v0.t"); + VCMP_U8(17, v1, -1, -1, -1, 1, -1, 3, -1, 5, -1, -1, -1, -1, -1, -1, -1, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v2, v4, 4, v0.t"); + VCMP_U16(18, v2, -1, -1, -1, -1, -1, 2, -1, 4, -1, -1, -1, -1, -1, -1, -1, + 12); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v4, v8, 5, v0.t"); + VCMP_U32(19, v4, -1, -1, -1, -1, -1, 1, -1, 3, -1, -1, -1, -1, -1, -1, -1, + 11); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v8, v16, 6, v0.t"); + VCMP_U64(20, v8, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, + 10); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + // TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsll.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsll.c new file mode 100644 index 000000000..5ff5e8f20 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsll.c @@ -0,0 +1,316 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsll.vv v4, v2, v3"); + VCMP_U8(1, v4, 0x01, 0x02, 0x04, 0x08, 0x80, 0x80, 0x80, 0x01, 0x01, 0x02, + 0x04, 0x08, 0x80, 0x80, 0x80, 0x01); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, + 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001); + VLOAD_16(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsll.vv v6, v2, v4"); + VCMP_U16(2, v6, 0x0001, 0x0002, 0x0004, 0x0008, 0x0080, 0x8000, 0x8000, + 0x0001, 0x0001, 0x0002, 0x0004, 0x0008, 0x0080, 0x8000, 0x8000, + 0x0001); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x00000001); + VLOAD_32(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsll.vv v12, v4, v8"); + VCMP_U32(3, v12, 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000080, + 0x00008000, 0x80000000, 0x00000001, 0x00000001, 0x00000002, + 0x00000004, 0x00000008, 0x00000080, 0x00008000, 0x80000000, + 0x00000001); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001); + VLOAD_64(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsll.vv v24, v8, v16"); + VCMP_U64(4, v24, 0x000000000000001, 0x0000000000000002, 0x0000000000000004, + 0x0000000000000008, 0x0000000000000080, 0x0000000000008000, + 0x0000000080000000, 0x0000000100000000, 0x0000000000000001, + 0x0000000000000002, 0x0000000000000004, 0x0000000000000008, + 0x0000000000000080, 0x0000000000008000, 0x0000000080000000, + 0x0000000100000000); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsll.vv v4, v2, v3, v0.t"); + VCMP_U8(5, v4, 0x00, 0x02, 0x00, 0x08, 0x00, 0x80, 0x00, 0x01, 0x00, 0x02, + 0x00, 0x08, 0x00, 0x80, 0x00, 0x01); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, + 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001); + VLOAD_16(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vsll.vv v8, v2, v4, v0.t"); + VCMP_U16(6, v8, 0x0000, 0x0002, 0x0000, 0x0008, 0x0000, 0x8000, 0x0000, + 0x0001, 0x0000, 0x0002, 0x0000, 0x0008, 0x0000, 0x8000, 0x0000, + 0x0001); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x00000001); + VLOAD_32(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vsll.vv v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0x00000000, 0x00000002, 0x00000000, 0x00000008, 0x00000000, + 0x00008000, 0x00000000, 0x00000001, 0x00000000, 0x00000002, + 0x00000000, 0x00000008, 0x00000000, 0x00008000, 0x00000000, + 0x00000001); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001); + VLOAD_64(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vsll.vv v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0x000000000000000, 0x0000000000000002, 0x0000000000000000, + 0x0000000000000008, 0x0000000000000000, 0x0000000000008000, + 0x0000000000000000, 0x0000000100000000, 0x0000000000000000, + 0x0000000000000002, 0x0000000000000000, 0x0000000000000008, + 0x0000000000000000, 0x0000000000008000, 0x0000000000000000, + 0x0000000100000000); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_8(v2, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, + 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + asm volatile("vsll.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v4, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, + 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0x00FF); + asm volatile("vsll.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v4, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, + 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, + 0x03FC); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0x000000FF); + asm volatile("vsll.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0x000003FC); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, + 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, + 0x00000000000000FF); + asm volatile("vsll.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v16, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0x00000000000003FC); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_8(v2, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, + 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v4, 0x00, 0x08, 0x00, 0x10, 0x00, 0x18, 0x00, 0x20, 0x00, 0xE4, + 0x00, 0xEC, 0x00, 0xF4, 0x00, 0xFC); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0x00FF); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v4, 0x0000, 0x0008, 0x0000, 0x0010, 0x0000, 0x0018, 0x0000, + 0x0020, 0x0000, 0xFFE4, 0x0000, 0xFFEC, 0x0000, 0xFFF4, 0x0000, + 0x03FC); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0x000000FF); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0x00000000, 0x00000008, 0x00000000, 0x00000010, 0x00000000, + 0x00000018, 0x00000000, 0x00000020, 0x00000000, 0xFFFFFFE4, + 0x00000000, 0xFFFFFFEC, 0x00000000, 0xFFFFFFF4, 0x00000000, + 0x000003FC); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, + 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, + 0x00000000000000FF); + VCLEAR(v16); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v16, 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, + 0x0000000000000010, 0x0000000000000000, 0x0000000000000018, + 0x0000000000000000, 0x0000000000000020, 0x0000000000000000, + 0xFFFFFFFFFFFFFFE4, 0x0000000000000000, 0xFFFFFFFFFFFFFFEC, + 0x0000000000000000, 0xFFFFFFFFFFFFFFF4, 0x0000000000000000, + 0x00000000000003FC); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, + 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + asm volatile("vsll.vi v4, v2, 2"); + VCMP_U8(17, v4, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, + 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0x00FF); + asm volatile("vsll.vi v4, v2, 2"); + VCMP_U16(18, v4, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, + 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, + 0x03FC); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0x000000FF); + asm volatile("vsll.vi v8, v4, 2"); + VCMP_U32(19, v8, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0x000003FC); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, + 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, + 0x00000000000000FF); + asm volatile("vsll.vi v16, v8, 2"); + VCMP_U64(20, v16, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0x00000000000003FC); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, + 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vi v4, v2, 2, v0.t"); + VCMP_U8(21, v4, 0x00, 0x08, 0x00, 0x10, 0x00, 0x18, 0x00, 0x20, 0x00, 0xE4, + 0x00, 0xEC, 0x00, 0xF4, 0x00, 0xFC); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0x00FF); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vi v4, v2, 2, v0.t"); + VCMP_U16(22, v4, 0x0000, 0x0008, 0x0000, 0x0010, 0x0000, 0x0018, 0x0000, + 0x0020, 0x0000, 0xFFE4, 0x0000, 0xFFEC, 0x0000, 0xFFF4, 0x0000, + 0x03FC); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0x000000FF); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vi v8, v4, 2, v0.t"); + VCMP_U32(23, v8, 0x00000000, 0x00000008, 0x00000000, 0x00000010, 0x00000000, + 0x00000018, 0x00000000, 0x00000020, 0x00000000, 0xFFFFFFE4, + 0x00000000, 0xFFFFFFEC, 0x00000000, 0xFFFFFFF4, 0x00000000, + 0x000003FC); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, + 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, + 0x00000000000000FF); + VCLEAR(v16); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vi v16, v8, 2, v0.t"); + VCMP_U64(24, v16, 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, + 0x0000000000000010, 0x0000000000000000, 0x0000000000000018, + 0x0000000000000000, 0x0000000000000020, 0x0000000000000000, + 0xFFFFFFFFFFFFFFE4, 0x0000000000000000, 0xFFFFFFFFFFFFFFEC, + 0x0000000000000000, 0xFFFFFFFFFFFFFFF4, 0x0000000000000000, + 0x00000000000003FC); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsmul.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsmul.c new file mode 100644 index 000000000..0b97f2d4e --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsmul.c @@ -0,0 +1,59 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(3, e8, m1); + VLOAD_8(v2, 127, 127, -50); + VLOAD_8(v3, 127, 10, 127); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vsmul.vv v1, v2, v3"); + VCMP_I8(1, v1, 126, 9, -50); +} + +void TEST_CASE2() { + VSET(3, e8, m1); + VLOAD_8(v2, 127, 127, -50); + VLOAD_8(v3, 127, 10, 127); + VLOAD_8(v0, 5, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vsmul.vv v1, v2, v3, v0.t"); + VCMP_I8(2, v1, 126, 0, -50); +} + +void TEST_CASE3() { + VSET(3, e8, m1); + VLOAD_8(v2, 127, 63, -50); + int8_t scalar = 55; + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vsmul.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(3, v1, 54, 27, -22); +} + +void TEST_CASE4() { + VSET(3, e8, m1); + VLOAD_8(v2, 127, 127, -50); + int8_t scalar = 55; + VLOAD_8(v0, 5, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vsmul.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(4, v1, 54, 0, -22); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} \ No newline at end of file diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsra.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsra.c new file mode 100644 index 000000000..8cfcdab47 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsra.c @@ -0,0 +1,316 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsra.vv v4, v2, v3"); + VCMP_U8(1, v4, 0x80, 0xC0, 0xE0, 0xF0, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0xC0, + 0xE0, 0xF0, 0xFF, 0xFF, 0xFF, 0x80); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000); + VLOAD_16(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsra.vv v6, v2, v4"); + VCMP_U16(2, v6, 0x8000, 0xC000, 0xE000, 0xF000, 0xFF00, 0xFFFF, 0xFFFF, + 0x8000, 0x8000, 0xC000, 0xE000, 0xF000, 0xFF00, 0xFFFF, 0xFFFF, + 0x8000); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000); + VLOAD_32(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsra.vv v12, v4, v8"); + VCMP_U32(3, v12, 0x80000000, 0xC0000000, 0xE0000000, 0xF0000000, 0xFF000000, + 0xFFFF0000, 0xFFFFFFFF, 0x80000000, 0x80000000, 0xC0000000, + 0xE0000000, 0xF0000000, 0xFF000000, 0xFFFF0000, 0xFFFFFFFF, + 0x80000000); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000); + VLOAD_64(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsra.vv v24, v8, v16"); + VCMP_U64(4, v24, 0x8000000000000000, 0xC000000000000000, 0xE000000000000000, + 0xF000000000000000, 0xFF00000000000000, 0xFFFF000000000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF80000000, 0x8000000000000000, + 0xC000000000000000, 0xE000000000000000, 0xF000000000000000, + 0xFF00000000000000, 0xFFFF000000000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF80000000); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsra.vv v4, v2, v3, v0.t"); + VCMP_U8(5, v4, 0x00, 0xC0, 0x00, 0xF0, 0x00, 0xFF, 0x00, 0x80, 0x00, 0xC0, + 0x00, 0xF0, 0x00, 0xFF, 0x00, 0x80); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000); + VLOAD_16(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vsra.vv v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 0x0000, 0xC000, 0x0000, 0xF000, 0x0000, 0xFFFF, 0x0000, + 0x8000, 0x0000, 0xC000, 0x0000, 0xF000, 0x0000, 0xFFFF, 0x0000, + 0x8000); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000); + VLOAD_32(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vsra.vv v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0x00000000, 0xC0000000, 0x00000000, 0xF0000000, 0x00000000, + 0xFFFF0000, 0x00000000, 0x80000000, 0x00000000, 0xC0000000, + 0x00000000, 0xF0000000, 0x00000000, 0xFFFF0000, 0x00000000, + 0x80000000); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000); + VLOAD_64(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vsra.vv v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0x0000000000000000, 0xC000000000000000, 0x0000000000000000, + 0xF000000000000000, 0x0000000000000000, 0xFFFF000000000000, + 0x0000000000000000, 0xFFFFFFFF80000000, 0x0000000000000000, + 0xC000000000000000, 0x0000000000000000, 0xF000000000000000, + 0x0000000000000000, 0xFFFF000000000000, 0x0000000000000000, + 0xFFFFFFFF80000000); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + asm volatile("vsra.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, + 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vsra.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v4, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vsra.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0xFFFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vsra.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v16, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, + 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, + 0xFFFFFFFFFFFFFFFF); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsra.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, + 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsra.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v4, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vsra.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, + 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, + 0xFFFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vsra.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v16, 0x0000000000000000, 0x0000000000000002, 0x0000000000000000, + 0x0000000000000004, 0x0000000000000000, 0x0000000000000006, + 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, + 0xFFFFFFFFFFFFFFF9, 0x0000000000000000, 0xFFFFFFFFFFFFFFFB, + 0x0000000000000000, 0xFFFFFFFFFFFFFFFD, 0x0000000000000000, + 0xFFFFFFFFFFFFFFFF); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + asm volatile("vsra.vi v4, v2, 2"); + VCMP_U8(17, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, + 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vsra.vi v4, v2, 2"); + VCMP_U16(18, v4, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vsra.vi v8, v4, 2"); + VCMP_U32(19, v8, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0xFFFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vsra.vi v16, v8, 2"); + VCMP_U64(20, v16, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, + 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, + 0xFFFFFFFFFFFFFFFF); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsra.vi v4, v2, 2, v0.t"); + VCMP_U8(21, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, + 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsra.vi v4, v2, 2, v0.t"); + VCMP_U16(22, v4, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vsra.vi v8, v4, 2, v0.t"); + VCMP_U32(23, v8, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, + 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, + 0xFFFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vsra.vi v16, v8, 2, v0.t"); + VCMP_U64(24, v16, 0x0000000000000000, 0x0000000000000002, 0x0000000000000000, + 0x0000000000000004, 0x0000000000000000, 0x0000000000000006, + 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, + 0xFFFFFFFFFFFFFFF9, 0x0000000000000000, 0xFFFFFFFFFFFFFFFB, + 0x0000000000000000, 0xFFFFFFFFFFFFFFFD, 0x0000000000000000, + 0xFFFFFFFFFFFFFFFF); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsrl.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsrl.c new file mode 100644 index 000000000..4f7aa571f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsrl.c @@ -0,0 +1,316 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsrl.vv v4, v2, v3"); + VCMP_U8(1, v4, 0x80, 0x40, 0x20, 0x10, 0x01, 0x01, 0x01, 0x80, 0x80, 0x40, + 0x20, 0x10, 0x01, 0x01, 0x01, 0x80); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000); + VLOAD_16(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsrl.vv v6, v2, v4"); + VCMP_U16(2, v6, 0x8000, 0x4000, 0x2000, 0x1000, 0x0100, 0x0001, 0x0001, + 0x8000, 0x8000, 0x4000, 0x2000, 0x1000, 0x0100, 0x0001, 0x0001, + 0x8000); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000); + VLOAD_32(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsrl.vv v12, v4, v8"); + VCMP_U32(3, v12, 0x80000000, 0x40000000, 0x20000000, 0x10000000, 0x01000000, + 0x00010000, 0x00000001, 0x80000000, 0x80000000, 0x40000000, + 0x20000000, 0x10000000, 0x01000000, 0x00010000, 0x00000001, + 0x80000000); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000); + VLOAD_64(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsrl.vv v24, v8, v16"); + VCMP_U64(4, v24, 0x8000000000000000, 0x4000000000000000, 0x2000000000000000, + 0x1000000000000000, 0x0100000000000000, 0x0001000000000000, + 0x0000000100000000, 0x0000000080000000, 0x8000000000000000, + 0x4000000000000000, 0x2000000000000000, 0x1000000000000000, + 0x0100000000000000, 0x0001000000000000, 0x0000000100000000, + 0x0000000080000000); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsrl.vv v4, v2, v3, v0.t"); + VCMP_U8(5, v4, 0x00, 0x40, 0x00, 0x10, 0x00, 0x01, 0x00, 0x80, 0x00, 0x40, + 0x00, 0x10, 0x00, 0x01, 0x00, 0x80); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000); + VLOAD_16(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vsrl.vv v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 0x0000, 0x4000, 0x0000, 0x1000, 0x0000, 0x0001, 0x0000, + 0x8000, 0x0000, 0x4000, 0x0000, 0x1000, 0x0000, 0x0001, 0x0000, + 0x8000); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000); + VLOAD_32(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vsrl.vv v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0x00000000, 0x40000000, 0x00000000, 0x10000000, 0x00000000, + 0x00010000, 0x00000000, 0x80000000, 0x00000000, 0x40000000, + 0x00000000, 0x10000000, 0x00000000, 0x00010000, 0x00000000, + 0x80000000); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000); + VLOAD_64(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vsrl.vv v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0x0000000000000000, 0x4000000000000000, 0x0000000000000000, + 0x1000000000000000, 0x0000000000000000, 0x0001000000000000, + 0x0000000000000000, 0x0000000080000000, 0x0000000000000000, + 0x4000000000000000, 0x0000000000000000, 0x1000000000000000, + 0x0000000000000000, 0x0001000000000000, 0x0000000000000000, + 0x0000000080000000); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + asm volatile("vsrl.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x38, 0x39, + 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vsrl.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v4, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x3FF8, 0x3FF9, 0x3FFA, 0x3FFB, 0x3FFC, 0x3FFD, 0x3FFE, + 0x3FFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vsrl.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0x3FFFFFF8, 0x3FFFFFF9, + 0x3FFFFFFA, 0x3FFFFFFB, 0x3FFFFFFC, 0x3FFFFFFD, 0x3FFFFFFE, + 0x3FFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vsrl.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v16, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0x3FFFFFFFFFFFFFF8, + 0x3FFFFFFFFFFFFFF9, 0x3FFFFFFFFFFFFFFA, 0x3FFFFFFFFFFFFFFB, + 0x3FFFFFFFFFFFFFFC, 0x3FFFFFFFFFFFFFFD, 0x3FFFFFFFFFFFFFFE, + 0x3FFFFFFFFFFFFFFF); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsrl.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0x39, + 0x00, 0x3B, 0x00, 0x3D, 0x00, 0x3F); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsrl.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v4, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0x3FF9, 0x0000, 0x3FFB, 0x0000, 0x3FFD, 0x0000, + 0x3FFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vsrl.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0x3FFFFFF9, + 0x00000000, 0x3FFFFFFB, 0x00000000, 0x3FFFFFFD, 0x00000000, + 0x3FFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vsrl.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v16, 0x0000000000000000, 0x0000000000000002, 0x0000000000000000, + 0x0000000000000004, 0x0000000000000000, 0x0000000000000006, + 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, + 0x3FFFFFFFFFFFFFF9, 0x0000000000000000, 0x3FFFFFFFFFFFFFFB, + 0x0000000000000000, 0x3FFFFFFFFFFFFFFD, 0x0000000000000000, + 0x3FFFFFFFFFFFFFFF); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + asm volatile("vsrl.vi v4, v2, 2"); + VCMP_U8(17, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x38, 0x39, + 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vsrl.vi v4, v2, 2"); + VCMP_U16(18, v4, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x3FF8, 0x3FF9, 0x3FFA, 0x3FFB, 0x3FFC, 0x3FFD, 0x3FFE, + 0x3FFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vsrl.vi v8, v4, 2"); + VCMP_U32(19, v8, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0x3FFFFFF8, 0x3FFFFFF9, + 0x3FFFFFFA, 0x3FFFFFFB, 0x3FFFFFFC, 0x3FFFFFFD, 0x3FFFFFFE, + 0x3FFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vsrl.vi v16, v8, 2"); + VCMP_U64(20, v16, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0x3FFFFFFFFFFFFFF8, + 0x3FFFFFFFFFFFFFF9, 0x3FFFFFFFFFFFFFFA, 0x3FFFFFFFFFFFFFFB, + 0x3FFFFFFFFFFFFFFC, 0x3FFFFFFFFFFFFFFD, 0x3FFFFFFFFFFFFFFE, + 0x3FFFFFFFFFFFFFFF); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsrl.vi v4, v2, 2, v0.t"); + VCMP_U8(21, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0x39, + 0x00, 0x3B, 0x00, 0x3D, 0x00, 0x3F); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsrl.vi v4, v2, 2, v0.t"); + VCMP_U16(22, v4, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0x3FF9, 0x0000, 0x3FFB, 0x0000, 0x3FFD, 0x0000, + 0x3FFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vsrl.vi v8, v4, 2, v0.t"); + VCMP_U32(23, v8, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0x3FFFFFF9, + 0x00000000, 0x3FFFFFFB, 0x00000000, 0x3FFFFFFD, 0x00000000, + 0x3FFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vsrl.vi v16, v8, 2, v0.t"); + VCMP_U64(24, v16, 0x0000000000000000, 0x0000000000000002, 0x0000000000000000, + 0x0000000000000004, 0x0000000000000000, 0x0000000000000006, + 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, + 0x3FFFFFFFFFFFFFF9, 0x0000000000000000, 0x3FFFFFFFFFFFFFFB, + 0x0000000000000000, 0x3FFFFFFFFFFFFFFD, 0x0000000000000000, + 0x3FFFFFFFFFFFFFFF); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vss.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vss.c new file mode 100644 index 000000000..fa4d3f709 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vss.c @@ -0,0 +1,146 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Positive-stride tests +void TEST_CASE1(void) { + VSET(4, e8, m1); + volatile uint8_t OUT1[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + uint64_t stride = 3; + VLOAD_8(v1, 0x9f, 0xe4, 0x19, 0x20); + asm volatile("vsse8.v v1, (%0), %1" ::"r"(OUT1), "r"(stride)); + VVCMP_U8(1, OUT1, 0x9f, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x19, 0x00, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); +} + +void TEST_CASE2(void) { + VSET(8, e16, m1); + volatile uint16_t OUT1[] = {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000}; + uint64_t stride = 4; + VLOAD_16(v1, 0x9f11, 0xe478, 0x1549, 0x3240, 0x2f11, 0xe448, 0x1546, 0x3220); + asm volatile("vsse16.v v1, (%0), %1" ::"r"(OUT1), "r"(stride)); + VVCMP_U16(2, OUT1, 0x9f11, 0x0000, 0xe478, 0x0000, 0x1549, 0x0000, 0x3240, + 0x0000, 0x2f11, 0x0000, 0xe448, 0x0000, 0x1546, 0x0000, 0x3220, + 0x0000); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + volatile uint32_t OUT1[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000}; + uint64_t stride = 8; + VLOAD_32(v1, 0x9f872456, 0xe1356784, 0x13241139, 0x20862497); + asm volatile("vsse32.v v1, (%0), %1" ::"r"(OUT1), "r"(stride)); + VVCMP_U32(3, OUT1, 0x9f872456, 0x00000000, 0xe1356784, 0x00000000, 0x13241139, + 0x00000000, 0x20862497, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000); +} + +void TEST_CASE4(void) { + VSET(16, e64, m8); + volatile uint64_t OUT1[] = { + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000}; + uint64_t stride = 16; + VLOAD_64(v8, 0x9f87245315434136, 0xe135578794246784, 0x1315345345241139, + 0x2086252110062497, 0x1100229933847136, 0xaaffaaffaaffaaff, + 0xaf87245315434136, 0xa135578794246784, 0x2315345345241139, + 0x1086252110062497, 0x1100229933847134, 0xaaffaaffaaffaaf4, + 0x9315345345241139, 0x9086252110062497, 0x9100229933847134, + 0x9affaaffaaffaaf4); + asm volatile("vsse64.v v8, (%0), %1" ::"r"(OUT1), "r"(stride)); + VVCMP_U64(4, OUT1, 0x9f87245315434136, 0x0000000000000000, 0xe135578794246784, + 0x0000000000000000, 0x1315345345241139, 0x0000000000000000, + 0x2086252110062497, 0x0000000000000000, 0x1100229933847136, + 0x0000000000000000, 0xaaffaaffaaffaaff, 0x0000000000000000, + 0xaf87245315434136, 0x0000000000000000, 0xa135578794246784, + 0x0000000000000000, 0x2315345345241139, 0x0000000000000000, + 0x1086252110062497, 0x0000000000000000, 0x1100229933847134, + 0x0000000000000000, 0xaaffaaffaaffaaf4, 0x0000000000000000, + 0x9315345345241139, 0x0000000000000000, 0x9086252110062497, + 0x0000000000000000, 0x9100229933847134, 0x0000000000000000, + 0x9affaaffaaffaaf4, 0x0000000000000000); +} + +// Masked strided store +void TEST_CASE5(void) { + VSET(4, e8, m1); + volatile uint8_t OUT1[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + uint64_t stride = 3; + VLOAD_8(v0, 0xAA); + VLOAD_8(v1, 0x9f, 0xe4, 0x19, 0x20); + asm volatile("vsse8.v v1, (%0), %1, v0.t" ::"r"(OUT1), "r"(stride)); + VVCMP_U8(5, OUT1, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); +} + +void TEST_CASE6(void) { + VSET(16, e64, m8); + volatile uint64_t OUT1[] = { + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000}; + uint64_t stride = 16; + VLOAD_64(v8, 0x9f87245315434136, 0xe135578794246784, 0x1315345345241139, + 0x2086252110062497, 0x1100229933847136, 0xaaffaaffaaffaaff, + 0xaf87245315434136, 0xa135578794246784, 0x2315345345241139, + 0x1086252110062497, 0x1100229933847134, 0xaaffaaffaaffaaf4, + 0x9315345345241139, 0x9086252110062497, 0x9100229933847134, + 0x9affaaffaaffaaf4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsse64.v v8, (%0), %1, v0.t" ::"r"(OUT1), "r"(stride)); + VVCMP_U64(6, OUT1, 0x0000000000000000, 0x0000000000000000, 0xe135578794246784, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x2086252110062497, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0xaaffaaffaaffaaff, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0xa135578794246784, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x1086252110062497, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0xaaffaaffaaffaaf4, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x9086252110062497, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x9affaaffaaffaaf4, 0x0000000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssra.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssra.c new file mode 100644 index 000000000..a21d6aaf5 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssra.c @@ -0,0 +1,79 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 1, 2, 3, 4); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssra.vv v1, v2, v3"); + VCMP_I8(1, v1, 0xff, 0, 0xfe, 0); +} + +void TEST_CASE2() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 1, 2, 3, 4); + VLOAD_8(v0, 5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssra.vv v1, v2, v3, v0.t"); + VCMP_I8(2, v1, 0xff, 0, 0xfe, 0); +} + +void TEST_CASE3() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssra.vi v1, v2, 2"); + VCMP_I8(3, v1, 0xff, 0, 0xfc, 3); +} + +void TEST_CASE4() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v0, 5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssra.vi v1, v2, 2, v0.t"); + VCMP_I8(4, v1, 0xff, 0, 0xfc, 0); +} + +void TEST_CASE5() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + uint64_t scalar = 2; + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssra.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(5, v1, 0xff, 0, 0xfc, 3); +} + +void TEST_CASE6() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + uint64_t scalar = 2; + VLOAD_8(v0, 5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssra.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(6, v1, 0xff, 0, 0xfc, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssrl.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssrl.c new file mode 100644 index 000000000..de73e9fba --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssrl.c @@ -0,0 +1,79 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 1, 2, 3, 4); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssrl.vv v1, v2, v3"); + VCMP_U8(1, v1, 0x7f, 0, 0x1e, 0x00); +} + +void TEST_CASE2() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 1, 2, 3, 4); + VLOAD_8(v0, 5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssrl.vv v1, v2, v3, v0.t"); + VCMP_U8(2, v1, 0x7f, 0, 0x1e, 0); +} + +void TEST_CASE3() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssrl.vi v1, v2, 5"); + VCMP_U8(3, v1, 7, 0, 7, 0); +} + +void TEST_CASE4() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v0, 5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssrl.vi v1, v2, 5, v0.t"); + VCMP_U8(4, v1, 7, 0, 7, 0); +} + +void TEST_CASE5() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + uint64_t scalar = 5; + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssrl.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(5, v1, 7, 0, 7, 0); +} + +void TEST_CASE6() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + uint64_t scalar = 5; + VLOAD_8(v0, 5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssrl.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(6, v1, 7, 0, 7, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + EXIT_CHECK(); +} \ No newline at end of file diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssub.c new file mode 100644 index 000000000..8b2c1f538 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssub.c @@ -0,0 +1,55 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e32, m1); + VLOAD_32(v1, 0xfffffff0, 0x7FFFFFFC, 15, 20); + VLOAD_32(v2, 0x7ffffff0, -500, 3, 25); + __asm__ volatile("vssub.vv v3, v1, v2" ::); + VEC_CMP_32(1, v3, 0x80000000, 0x7fffffff, 12, -5); +} + +void TEST_CASE2(void) { + VSET(4, e32, m1); + VLOAD_32(v1, 0xfffffff0, 0x7FFFFFFC, 15, 20); + VLOAD_32(v2, 0x7ffffff0, -500, 3, 25); + VLOAD_32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vssub.vv v3, v1, v2, v0.t" ::); + VEC_CMP_32(1, v3, 0, 0x7fffffff, 0, -5); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + VLOAD_32(v1, 5, -2147483645, 15, 20); + const int64_t scalar = 5; + __asm__ volatile("vssub.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VEC_CMP_32(3, v3, 0, 0x80000000, 10, 15); +} + +void TEST_CASE4(void) { + VSET(4, e32, m1); + VLOAD_32(v1, 5, -2147483645, 15, 20); + const int64_t scalar = 5; + VLOAD_32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vssub.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VEC_CMP_32(4, v3, 0, 0x80000000, 0, 15); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssubu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssubu.c new file mode 100644 index 000000000..8a7bb9cec --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssubu.c @@ -0,0 +1,55 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + VLOAD_U32(v2, 1, 2, 3, 25); + __asm__ volatile("vssubu.vv v3, v1, v2" ::); + VEC_CMP_U32(1, v3, 4, 8, 12, 0); +} + +void TEST_CASE2(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + VLOAD_U32(v2, 1, 2, 3, 120); + VLOAD_U32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vssubu.vv v3, v1, v2, v0.t" ::); + VEC_CMP_U32(2, v3, 0, 8, 0, 0); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 1, 15, 20); + const uint64_t scalar = 5; + __asm__ volatile("vssubu.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VEC_CMP_U32(3, v3, 0, 0, 10, 15); +} + +void TEST_CASE4(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 1, 15, 20); + const uint64_t scalar = 5; + VLOAD_U32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vssubu.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VEC_CMP_U32(4, v3, 0, 0, 0, 15); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsub.c new file mode 100644 index 000000000..d50029719 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsub.c @@ -0,0 +1,136 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vsub.vv v3, v1, v2"); + VCMP_U8(1, v3, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vsub.vv v6, v2, v4"); + VCMP_U16(2, v6, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vsub.vv v12, v4, v8"); + VCMP_U32(3, v12, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vsub.vv v24, v8, v16"); + VCMP_U64(4, v24, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v3); + asm volatile("vsub.vv v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vsub.vv v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vsub.vv v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); + + VSET(16, e32, m8); + VLOAD_32(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vsub.vv v24, v8, v16, v0.t"); + VCMP_U32(8, v24, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); +} + +void TEST_CASE3(void) { + const uint64_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vsub.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v3, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vsub.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v4, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vsub.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vsub.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v16, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); +} + +void TEST_CASE4(void) { + const uint64_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v3); + asm volatile("vsub.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v3, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsub.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v4, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vsub.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vsub.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v16, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsux.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsux.c new file mode 100644 index 000000000..5f21ec80d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsux.c @@ -0,0 +1,104 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e8, m1); + VLOAD_U8(v2, 0, 1, 2, 3); + volatile uint8_t OUP[] = {0xef, 0xef, 0xef, 0xef}; + VLOAD_U8(v1, 0xff, 0x00, 0xf0, 0x0f); + __asm__ volatile("vsuxei8.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U8_RAW(1, OUP, 0xff, 0x00, 0xf0, 0x0f); +} + +// void TEST_CASE2(void) { +// VSET(4,e8,m1); +// VLOAD_U8(v2,0,1,2,3); +// volatile uint8_t OUP[] = {0xef, 0xef, 0xef, 0xef}; +// VLOAD_U8(v1,0xff,0x00,0xf0,0x0f); +// VLOAD_U8(v0,0x12,0x0,0x0,0x0); +// __asm__ volatile("vsuxei8.v v1, (%0), v2, v0.t"::"r"(OUP)); +// VEC_EQUAL_U8_RAW(2,OUP,0xef,0x00,0xef,0xef); +// } + +void TEST_CASE3(void) { + VSET(4, e16, m1); + VLOAD_U16(v2, 0, 2, 4, 6); + volatile uint16_t OUP[] = {0xdead, 0xbeef, 0xdead, 0xbeef}; + VLOAD_U16(v1, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + __asm__ volatile("vsuxei16.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U16_RAW(3, OUP, 0xffff, 0x0000, 0xf0f0, 0x0f0f); +} + +// void TEST_CASE4(void) { +// VSET(4,e16,m1); +// VLOAD_U16(v2,0,2,4,6); +// volatile uint16_t OUP[] = {0xdead, 0xbeef, 0xdead, 0xbeef}; +// VLOAD_U16(v1,0xffff,0x0000,0xf0f0,0x0f0f); +// VLOAD_U16(v0,0x12,0x0,0x0,0x0); +// __asm__ volatile("vsuxei16.v v1, (%0), v2, v0.t"::"r"(OUP)); +// MEMBARRIER; +// VEC_EQUAL_U16_RAW(4,OUP,0xdead,0x0000,0xdead,0xbeef); +// } + +void TEST_CASE5(void) { + VSET(4, e32, m1); + VLOAD_U32(v2, 0, 4, 8, 12); + volatile uint32_t OUP[] = {0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef}; + VLOAD_U32(v1, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f); + __asm__ volatile("vsuxei32.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U32_RAW(5, OUP, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f); +} + +// void TEST_CASE6(void) { +// VSET(4,e32,m1); +// VLOAD_U32(v2,0,4,8,12); +// volatile uint32_t OUP[] = {0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef}; +// VLOAD_U32(v1,0xffffffff,0x00000000,0xf0f0f0f0,0x0f0f0f0f); +// VLOAD_U32(v0,0x12,0x0,0x0,0x0); +// __asm__ volatile("vsuxei32.v v1, (%0), v2, v0.t"::"r"(OUP)); +// MEMBARRIER; +// VEC_EQUAL_U32_RAW(6,OUP,0xdeadbeef,0x00000000,0xdeadbeef,0xdeadbeef); +// } + +void TEST_CASE7(void) { + VSET(4, e64, m1); + VLOAD_U64(v2, 0, 8, 16, 24); + volatile uint64_t OUP[] = {0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef}; + VLOAD_U64(v1, 0xdeadbeef00000000, 0xdeadbeefffffffff, 0xdeadbeeff0f0f0f0, + 0xdeadbeef0f0f0f0f); + __asm__ volatile("vsuxei64.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U64_RAW(7, OUP, 0xdeadbeef00000000, 0xdeadbeefffffffff, + 0xdeadbeeff0f0f0f0, 0xdeadbeef0f0f0f0f); +} + +// void TEST_CASE8(void) { +// VSET(4,e64,m1); +// VLOAD_U64(v2,0,8,16,24); +// volatile uint64_t OUP[] = +// {0xdeadbeefdeadbeef,0xdeadbeefdeadbeef,0xdeadbeefdeadbeef,0xdeadbeefdeadbeef}; +// VLOAD_U64(v1,0xdeadbeef00000000,0xdeadbeefffffffff,0xdeadbeeff0f0f0f0,0xdeadbeef0f0f0f0f); +// VLOAD_U64(v0,0x6,0x0,0x0,0x0); +// __asm__ volatile("vsuxei64.v v1, (%0), v2, v0.t"::"r"(OUP)); +// VEC_EQUAL_U64_RAW(8,OUP,0xdeadbeefdeadbeef,0xdeadbeefffffffff,0xdeadbeeff0f0f0f0,0xdeadbeefdeadbeef); +// } + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE3(); + TEST_CASE5(); + TEST_CASE7(); + // TEST_CASE2(); + // TEST_CASE4(); + // TEST_CASE6(); + // TEST_CASE8(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsuxei.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsuxei.c new file mode 100644 index 000000000..2f0686722 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsuxei.c @@ -0,0 +1,137 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +#define AXI_DWIDTH 32 + +#define INIT 98 + +void reset_vec8(volatile uint8_t *vec, int rst_val, uint64_t len) { + for (uint64_t i = 0; i < len; ++i) vec[i] = rst_val; +} +void reset_vec16(volatile uint16_t *vec, int rst_val, uint64_t len) { + for (uint64_t i = 0; i < len; ++i) vec[i] = rst_val; +} +void reset_vec32(volatile uint32_t *vec, int rst_val, uint64_t len) { + for (uint64_t i = 0; i < len; ++i) vec[i] = rst_val; +} +void reset_vec64(volatile uint64_t *vec, int rst_val, uint64_t len) { + for (uint64_t i = 0; i < len; ++i) vec[i] = rst_val; +} +static volatile uint8_t BUFFER_O8[16] __attribute__((aligned(AXI_DWIDTH))) = { + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT, + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT}; +static volatile uint16_t BUFFER_O16[16] __attribute__((aligned(AXI_DWIDTH))) = { + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT, + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT}; +static volatile uint32_t BUFFER_O32[16] __attribute__((aligned(AXI_DWIDTH))) = { + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT, + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT}; +static volatile uint64_t BUFFER_O64[16] __attribute__((aligned(AXI_DWIDTH))) = { + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT, + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT}; + +// Naive test +void TEST_CASE1(void) { + VSET(12, e8, m1); + VLOAD_8(v1, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x88, 0x88, 0xae, 0x91, 0x02, 0x59, + 0x89); + VLOAD_8(v2, 1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15); + asm volatile("vsuxei8.v v1, (%0), v2" ::"r"(&BUFFER_O8[0])); + VVCMP_U8(1, BUFFER_O8, INIT, 0xd3, 0x40, 0xd1, 0x84, 0x48, INIT, 0x88, 0x88, + 0xae, INIT, 0x91, 0x02, 0x59, INIT, 0x89); + + VSET(12, e16, m2); + VLOAD_16(v2, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x9388, 0x8188, 0x11ae, + 0x4891, 0x4902, 0x8759, 0x1989); + VLOAD_16(v4, 2, 4, 6, 8, 10, 14, 16, 18, 22, 24, 26, 30); + asm volatile("vsuxei16.v v2, (%0), v4" ::"r"(&BUFFER_O16[0])); + VVCMP_U16(2, BUFFER_O16, INIT, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, INIT, + 0x9388, 0x8188, 0x11ae, INIT, 0x4891, 0x4902, 0x8759, INIT, 0x1989); + + VSET(12, e32, m4); + VLOAD_32(v4, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x81937598, 0x18747547, 0x3eeeeeee, 0xab8b9148, 0x90318509, + 0x31897598, 0x89139848); + VLOAD_32(v8, 4, 8, 12, 16, 20, 28, 32, 36, 44, 48, 52, 60); + asm volatile("vsuxei32.v v4, (%0), v8" ::"r"(&BUFFER_O32[0])); + VVCMP_U32(3, BUFFER_O32, INIT, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, INIT, 0x81937598, 0x18747547, 0x3eeeeeee, INIT, + 0xab8b9148, 0x90318509, 0x31897598, INIT, 0x89139848); + + VSET(12, e64, m8); + VLOAD_64(v8, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8913984898951989); + VLOAD_64(v16, 8, 16, 24, 32, 40, 56, 64, 72, 88, 96, 104, 120); + asm volatile("vsuxei64.v v8, (%0), v16" ::"r"(&BUFFER_O64[0])); + VVCMP_U64(4, BUFFER_O64, INIT, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, INIT, + 0x81937598aa819388, 0x1874754791888188, 0x3eeeeeeee33111ae, INIT, + 0xab8b914891484891, 0x9031850931584902, 0x3189759837598759, INIT, + 0x8913984898951989); +} + +// Naive test, masked +void TEST_CASE2(void) { + reset_vec8(&BUFFER_O8[0], INIT, 16); + VSET(12, e8, m1); + VLOAD_8(v0, 0xAA, 0x0A); + VLOAD_8(v1, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x88, 0x88, 0xae, 0x91, 0x02, 0x59, + 0x89); + VLOAD_8(v2, 1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15); + asm volatile("vsuxei8.v v1, (%0), v2, v0.t" ::"r"(&BUFFER_O8[0])); + VVCMP_U8(5, BUFFER_O8, INIT, INIT, 0x40, INIT, 0x84, INIT, INIT, 0x88, INIT, + 0xae, INIT, INIT, 0x02, INIT, INIT, 0x89); + + reset_vec16(&BUFFER_O16[0], INIT, 16); + VSET(12, e16, m2); + VLOAD_8(v0, 0xAA, 0x0A); + VLOAD_16(v2, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x9388, 0x8188, 0x11ae, + 0x4891, 0x4902, 0x8759, 0x1989); + VLOAD_16(v4, 2, 4, 6, 8, 10, 14, 16, 18, 22, 24, 26, 30); + asm volatile("vsuxei16.v v2, (%0), v4, v0.t" ::"r"(&BUFFER_O16[0])); + VVCMP_U16(6, BUFFER_O16, INIT, INIT, 0x3840, INIT, 0x9384, INIT, INIT, 0x9388, + INIT, 0x11ae, INIT, INIT, 0x4902, INIT, INIT, 0x1989); + + reset_vec32(&BUFFER_O32[0], INIT, 16); + VSET(12, e32, m4); + VLOAD_8(v0, 0xAA, 0x0A); + VLOAD_32(v4, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x81937598, 0x18747547, 0x3eeeeeee, 0xab8b9148, 0x90318509, + 0x31897598, 0x89139848); + VLOAD_32(v8, 4, 8, 12, 16, 20, 28, 32, 36, 44, 48, 52, 60); + asm volatile("vsuxei32.v v4, (%0), v8, v0.t" ::"r"(&BUFFER_O32[0])); + VVCMP_U32(7, BUFFER_O32, INIT, INIT, 0xa11a9384, INIT, 0x9fa831c7, INIT, INIT, + 0x81937598, INIT, 0x3eeeeeee, INIT, INIT, 0x90318509, INIT, INIT, + 0x89139848); + + reset_vec64(&BUFFER_O64[0], INIT, 16); + VSET(12, e64, m8); + VLOAD_8(v0, 0xAA, 0x0A); + VLOAD_64(v8, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8913984898951989); + VLOAD_64(v16, 8, 16, 24, 32, 40, 56, 64, 72, 88, 96, 104, 120); + asm volatile("vsuxei64.v v8, (%0), v16, v0.t" ::"r"(&BUFFER_O64[0])); + VVCMP_U64(8, BUFFER_O64, INIT, INIT, 0xa11a9384a7163840, INIT, + 0x9fa831c7a11a9384, INIT, INIT, 0x81937598aa819388, INIT, + 0x3eeeeeeee33111ae, INIT, INIT, 0x9031850931584902, INIT, INIT, + 0x8913984898951989); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} \ No newline at end of file diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsx.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsx.c new file mode 100644 index 000000000..fc91e81b8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsx.c @@ -0,0 +1,102 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e8, m1); + VLOAD_U8(v2, 0, 1, 2, 3); + volatile uint8_t OUP[] = {0xef, 0xef, 0xef, 0xef}; + VLOAD_U8(v1, 0xff, 0x00, 0xf0, 0x0f); + __asm__ volatile("vsxei8.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U8_RAW(1, OUP, 0xff, 0x00, 0xf0, 0x0f); +} + +// void TEST_CASE2(void) { +// VSET(4,e8,m1); +// VLOAD_8(v2,0,1,2,3); +// volatile int8_t OUP[] = {0xef, 0xef, 0xef, 0xef}; +// VLOAD_8(v1,0xff,0x00,0xf0,0x0f); +// VLOAD_8(v0,12,0,0,0); +// __asm__ volatile("vsxei8.v v1, (%0), v2, v0.t"::"r"(OUP)); +// VEC_EQUAL_8_RAW(2,OUP,0xef,0xef,0xf0,0x0f); +// } + +void TEST_CASE3(void) { + VSET(4, e16, m1); + VLOAD_U16(v2, 0, 2, 4, 6); + volatile uint16_t OUP[] = {0xdead, 0xbeef, 0xdead, 0xbeef}; + VLOAD_U16(v1, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + __asm__ volatile("vsxei16.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U16_RAW(3, OUP, 0xffff, 0x0000, 0xf0f0, 0x0f0f); +} + +// void TEST_CASE4(void) { +// VSET(4,e16,m1); +// VLOAD_16(v2,0,2,4,6); +// volatile int16_t OUP[] = {0xdead, 0xbeef, 0xdead, 0xbeef}; +// VLOAD_16(v1,0xffff,0x0000,0xf0f0,0x0f0f); +// VLOAD_16(v0,12,0,0,0); +// __asm__ volatile("vsxei16.v v1, (%0), v2, v0.t"::"r"(OUP)); +// VEC_EQUAL_16_RAW(4,OUP,0xdead,0xbeef,0xf0f0,0x0f0f); +// } + +void TEST_CASE5(void) { + VSET(4, e32, m1); + VLOAD_U32(v2, 0, 4, 8, 12); + volatile uint32_t OUP[] = {0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef}; + VLOAD_U32(v1, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f); + __asm__ volatile("vsxei32.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U32_RAW(5, OUP, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f); +} + +// void TEST_CASE6(void) { +// VSET(4,e32,m1); +// VLOAD_U32(v2,0,4,8,12); +// volatile int32_t OUP[] = {0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef}; +// VLOAD_32(v1,0xffffffff,0x00000000,0xf0f0f0f0,0x0f0f0f0f); +// VLOAD_32(v0,12,0,0,0); +// __asm__ volatile("vsxei32.v v1, (%0), v2, v0.t"::"r"(OUP)); +// VEC_EQUAL_32_RAW(6,OUP,0xdeadbeef,0xdeadbeef,0xf0f0f0f0,0x0f0f0f0f); +// } + +void TEST_CASE7(void) { + VSET(4, e64, m1); + VLOAD_U64(v2, 0, 8, 16, 24); + volatile uint64_t OUP[] = {0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef}; + VLOAD_U64(v1, 0xdeadbeef00000000, 0xdeadbeefffffffff, 0xdeadbeeff0f0f0f0, + 0xdeadbeef0f0f0f0f); + __asm__ volatile("vsxei64.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U64_RAW(7, OUP, 0xdeadbeef00000000, 0xdeadbeefffffffff, + 0xdeadbeeff0f0f0f0, 0xdeadbeef0f0f0f0f); +} + +// void TEST_CASE8(void) { +// VSET(4,e64,m1); +// VLOAD_64(v2,0,8,16,24); +// volatile int64_t OUP[] = +// {0xdeadbeefdeadbeef,0xdeadbeefdeadbeef,0xdeadbeefdeadbeef,0xdeadbeefdeadbeef}; +// VLOAD_64(v1,0xdeadbeef00000000,0xdeadbeefffffffff,0xdeadbeeff0f0f0f0,0xdeadbeef0f0f0f0f); +// VLOAD_64(v0,6,0,0,0); +// __asm__ volatile("vsxei64.v v1, (%0), v2, v0.t"::"r"(OUP)); +// VEC_EQUAL_64_RAW(8,OUP,0xdeadbeefdeadbeef,0xdeadbeefffffffff,0xdeadbeeff0f0f0f0,0xdeadbeefdeadbeef); +// } + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE3(); + TEST_CASE5(); + TEST_CASE7(); + // TEST_CASE2(); + // TEST_CASE4(); + // TEST_CASE6(); + // TEST_CASE8(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwadd.c new file mode 100644 index 000000000..96777a93c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwadd.c @@ -0,0 +1,241 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwadd.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(1, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwadd.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(2, v12, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwadd.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(3, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwadd.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(4, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwadd.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(5, v12, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwadd.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(6, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); +} + +void TEST_CASE3(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwadd.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(7, v4, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwadd.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(8, v8, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwadd.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(9, v16, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); +} + +void TEST_CASE4(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + VCLEAR(v5); + asm volatile("vwadd.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(10, v4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwadd.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(11, v8, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwadd.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(12, v16, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); +} + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwadd.wv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(13, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwadd.wv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(14, v12, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwadd.wv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(15, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); +} + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwadd.wv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(16, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwadd.wv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(17, v12, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwadd.wv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(18, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); +} + +void TEST_CASE7(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwadd.wx v4, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(19, v4, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwadd.wx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(20, v8, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwadd.wx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(21, v16, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, + -11); +} + +void TEST_CASE8(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + VCLEAR(v5); + asm volatile("vwadd.wx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(22, v4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwadd.wx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(23, v8, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwadd.wx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(24, v16, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwaddu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwaddu.c new file mode 100644 index 000000000..27cc4fa48 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwaddu.c @@ -0,0 +1,244 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwaddu.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(1, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwaddu.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(2, v12, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwaddu.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(3, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwaddu.vv v6, v4, v2, v0.t"); + VSET(16, e16, m2); + VCMP_U16(4, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwaddu.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(5, v12, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwaddu.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(6, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); +} + +void TEST_CASE3(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwaddu.vx v6, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(7, v6, 6, 259, 8, 257, 10, 255, 12, 253, 14, 251, 16, 249, 18, 247, + 20, 245); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwaddu.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(8, v8, 6, 65539, 8, 65537, 10, 65535, 12, 65533, 14, 65531, 16, + 65529, 18, 65527, 20, 65525); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwaddu.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(9, v16, 6, 4294967299, 8, 4294967297, 10, 4294967295, 12, 4294967293, + 14, 4294967291, 16, 4294967289, 18, 4294967287, 20, 4294967285); +} + +void TEST_CASE4(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwaddu.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(10, v6, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwaddu.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(11, v8, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwaddu.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(12, v16, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); +} + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwaddu.wv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(13, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwaddu.wv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(14, v12, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwaddu.wv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(15, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); +} + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwaddu.wv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(16, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwaddu.wv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(17, v12, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwaddu.wv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(18, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); +} + +void TEST_CASE7(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwaddu.wx v6, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(19, v6, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwaddu.wx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(20, v8, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwaddu.wx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(21, v16, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, + -11); +} + +void TEST_CASE8(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwaddu.wx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(22, v6, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwaddu.wx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(23, v8, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwaddu.wx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(24, v16, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmacc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmacc.c new file mode 100644 index 000000000..c7adbaf2f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmacc.c @@ -0,0 +1,248 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x460f, 0x1c3e, 0xa322, 0xa7de, 0xd343, 0xa068, 0xf7a8, 0x3a62, + 0x3f7f, 0x0ae0, 0x0e38, 0x57fe, 0xdc97, 0x61e5, 0xe3f4, 0xb1bd); + VSET(16, e8, m1); + VLOAD_8(v2, 0x19, 0x87, 0x46, 0xf5, 0x3d, 0x66, 0xd7, 0xcf, 0x9f, 0x73, 0x35, + 0x92, 0xb4, 0xc4, 0xdb, 0x1a); + VLOAD_8(v4, 0xd0, 0x62, 0xb7, 0xd9, 0x39, 0xdf, 0x3e, 0x3d, 0xa2, 0xbb, 0xf1, + 0xba, 0xe2, 0xd7, 0x51, 0x5d); + asm volatile("vwmacc.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_I16(1, v6, 0x415f, 0xedec, 0x8f2c, 0xa98b, 0xe0d8, 0x9342, 0xedba, + 0x2eb5, 0x631d, 0xebe1, 0x0b1d, 0x7612, 0xe57f, 0x6b81, 0xd83f, + 0xbb2f); + + VSET(16, e32, m4); + VLOAD_32(v12, 0x1d5e4130, 0x9a736c84, 0xe2c407c1, 0x62baf7c8, 0xc157159f, + 0x6cea275d, 0x0c385a3e, 0xf8f640d1, 0x484e89df, 0xb7720e91, + 0x17a7a4cf, 0x9cba6dac, 0x177e67d2, 0x491950da, 0x5b48691f, + 0x03289e10); + VSET(16, e16, m2); + VLOAD_16(v4, 0x6930, 0x239f, 0x2214, 0x555e, 0x9868, 0x02e7, 0x784f, 0x8c32, + 0xe8d1, 0xe941, 0xaaaf, 0x4833, 0xc773, 0x6156, 0xdad9, 0x02a5); + VLOAD_16(v8, 0xe798, 0x1fe5, 0xca4f, 0xb93c, 0xafe4, 0x5641, 0x4848, 0x82a3, + 0x6065, 0x1385, 0x5a53, 0x3318, 0xd488, 0xb1cf, 0x5142, 0x0277); + asm volatile("vwmacc.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_I32(2, v12, 0x135705b0, 0x9ee38abf, 0xdb9e53ed, 0x4b21e7d0, 0xe1c1ea3f, + 0x6de47e04, 0x2e306876, 0x31abe8a7, 0x3f93c454, 0xb5b61056, + 0xf98d818c, 0xab235b74, 0x211898ea, 0x2b5e7b64, 0x4f7d7e11, + 0x032f22c3); + + VSET(16, e64, m8); + VLOAD_64(v24, 0x37abc1433be408eb, 0xb3af312be2d38e09, 0x3a99dc46913b03d2, + 0xb2cca27c11815d4d, 0x456749124aaf479a, 0xc11d5ef0eaa5ee72, + 0x1e6a624541e03978, 0x36ce0e391abb8a91, 0x552a61c1f7116723, + 0x621ae1e17b7074c2, 0x4c3f1888b5df72b9, 0xde3961024df8c2cf, + 0x37cd59f214853904, 0xe76372440eb37d3d, 0x0f0ff8cee2000142, + 0x061e905b827b9818); + VSET(16, e32, m4); + VLOAD_32(v8, 0xb5c0475b, 0xda0c4af7, 0xa939123e, 0xb7261aa3, 0x510b75c1, + 0x7d5e66d9, 0x3b263bb7, 0xc35c07a0, 0x03b0bb28, 0xba423d88, + 0xb4ddeabb, 0x97b1e0ce, 0x01d07d01, 0x16174f78, 0x40c6b24f, + 0x7fab39a9); + VLOAD_32(v16, 0x376ce1ba, 0x9cc53665, 0x9292669b, 0xcaec0663, 0x174f60ba, + 0x5fc79836, 0x6597295d, 0x737b18f1, 0x8cb86656, 0x044f320e, + 0x2a881643, 0x2e1a8f59, 0xfdc331d1, 0xca03d155, 0x0a51ebfe, + 0xcac2c353); + asm volatile("vwmacc.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_I64(3, v24, 0x27987c3defb2dc09, 0xc2652748b5903b7c, 0x5fb1b6348769c35c, + 0xc1e76e2cf6217c56, 0x4cc871cf26ba35d4, 0xf0052607e34f7838, + 0x35e364f04a4539f3, 0x1b733cf52ef5b831, 0x5380f57403c23693, + 0x60ee57a5b80c6232, 0x3fc390677f77f3aa, 0xcb708510404efc6d, + 0x37c94aa4ac6b77d5, 0xe2badbd70ab9d815, 0x11ac765b0dd270a4, + 0xeb91935c5ffd04e3); +} + +void TEST_CASE2() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x460f, 0x1c3e, 0xa322, 0xa7de, 0xd343, 0xa068, 0xf7a8, 0x3a62, + 0x3f7f, 0x0ae0, 0x0e38, 0x57fe, 0xdc97, 0x61e5, 0xe3f4, 0xb1bd); + VSET(16, e8, m1); + VLOAD_8(v2, 0x19, 0x87, 0x46, 0xf5, 0x3d, 0x66, 0xd7, 0xcf, 0x9f, 0x73, 0x35, + 0x92, 0xb4, 0xc4, 0xdb, 0x1a); + VLOAD_8(v4, 0xd0, 0x62, 0xb7, 0xd9, 0x39, 0xdf, 0x3e, 0x3d, 0xa2, 0xbb, 0xf1, + 0xba, 0xe2, 0xd7, 0x51, 0x5d); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmacc.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_I16(4, v6, 0x460f, 0xedec, 0xa322, 0xa98b, 0xd343, 0x9342, 0xf7a8, + 0x2eb5, 0x3f7f, 0xebe1, 0x0e38, 0x7612, 0xdc97, 0x6b81, 0xe3f4, + 0xbb2f); + + VSET(16, e32, m4); + VLOAD_32(v12, 0x1d5e4130, 0x9a736c84, 0xe2c407c1, 0x62baf7c8, 0xc157159f, + 0x6cea275d, 0x0c385a3e, 0xf8f640d1, 0x484e89df, 0xb7720e91, + 0x17a7a4cf, 0x9cba6dac, 0x177e67d2, 0x491950da, 0x5b48691f, + 0x03289e10); + VSET(16, e16, m2); + VLOAD_16(v4, 0x6930, 0x239f, 0x2214, 0x555e, 0x9868, 0x02e7, 0x784f, 0x8c32, + 0xe8d1, 0xe941, 0xaaaf, 0x4833, 0xc773, 0x6156, 0xdad9, 0x02a5); + VLOAD_16(v8, 0xe798, 0x1fe5, 0xca4f, 0xb93c, 0xafe4, 0x5641, 0x4848, 0x82a3, + 0x6065, 0x1385, 0x5a53, 0x3318, 0xd488, 0xb1cf, 0x5142, 0x0277); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmacc.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_I32(5, v12, 0x1d5e4130, 0x9ee38abf, 0xe2c407c1, 0x4b21e7d0, 0xc157159f, + 0x6de47e04, 0x0c385a3e, 0x31abe8a7, 0x484e89df, 0xb5b61056, + 0x17a7a4cf, 0xab235b74, 0x177e67d2, 0x2b5e7b64, 0x5b48691f, + 0x032f22c3); + + VSET(16, e64, m8); + VLOAD_64(v24, 0x37abc1433be408eb, 0xb3af312be2d38e09, 0x3a99dc46913b03d2, + 0xb2cca27c11815d4d, 0x456749124aaf479a, 0xc11d5ef0eaa5ee72, + 0x1e6a624541e03978, 0x36ce0e391abb8a91, 0x552a61c1f7116723, + 0x621ae1e17b7074c2, 0x4c3f1888b5df72b9, 0xde3961024df8c2cf, + 0x37cd59f214853904, 0xe76372440eb37d3d, 0x0f0ff8cee2000142, + 0x061e905b827b9818); + VSET(16, e32, m4); + VLOAD_32(v8, 0xb5c0475b, 0xda0c4af7, 0xa939123e, 0xb7261aa3, 0x510b75c1, + 0x7d5e66d9, 0x3b263bb7, 0xc35c07a0, 0x03b0bb28, 0xba423d88, + 0xb4ddeabb, 0x97b1e0ce, 0x01d07d01, 0x16174f78, 0x40c6b24f, + 0x7fab39a9); + VLOAD_32(v16, 0x376ce1ba, 0x9cc53665, 0x9292669b, 0xcaec0663, 0x174f60ba, + 0x5fc79836, 0x6597295d, 0x737b18f1, 0x8cb86656, 0x044f320e, + 0x2a881643, 0x2e1a8f59, 0xfdc331d1, 0xca03d155, 0x0a51ebfe, + 0xcac2c353); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmacc.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_I64(6, v24, 0x37abc1433be408eb, 0xc2652748b5903b7c, 0x3a99dc46913b03d2, + 0xc1e76e2cf6217c56, 0x456749124aaf479a, 0xf0052607e34f7838, + 0x1e6a624541e03978, 0x1b733cf52ef5b831, 0x552a61c1f7116723, + 0x60ee57a5b80c6232, 0x4c3f1888b5df72b9, 0xcb708510404efc6d, + 0x37cd59f214853904, 0xe2badbd70ab9d815, 0x0f0ff8cee2000142, + 0xeb91935c5ffd04e3); +} + +void TEST_CASE3() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x17db, 0x9069, 0x8e1f, 0x3584, 0xbb3d, 0x39b2, 0x82cf, 0x015b, + 0xd556, 0xd603, 0x85d1, 0x66a6, 0x4e3e, 0xb965, 0xaa7b, 0x9d27); + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v4, 0x50, 0x56, 0x94, 0x1e, 0x09, 0x8f, 0xe1, 0x9e, 0x86, 0x97, 0x71, + 0x5e, 0x55, 0x09, 0xdd, 0x23); + asm volatile("vwmacc.vx v6, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(7, v6, 0x196b, 0x9217, 0x8c03, 0x361a, 0xbb6a, 0x377d, 0x8234, + 0xff71, 0xd2f4, 0xd3f6, 0x8806, 0x687c, 0x4fe7, 0xb992, 0xa9cc, + 0x9dd6); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xc9b9ade8, 0xfc9c14a8, 0xe1ace4f7, 0x43ea8b48, 0x3ab3025c, + 0xe545695b, 0x538304ce, 0xf430c148, 0xd126fac1, 0xbf51d251, + 0x85ebc0a4, 0x2167faaf, 0x0a2e18cc, 0x0ae19395, 0x03cc9899, + 0x05524f83); + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0x4324, 0xd762, 0xc34b, 0x6f67, 0x5134, 0x4d9d, 0xfa05, 0xacb7, + 0xb7d2, 0xb079, 0x5bb2, 0x7949, 0x51df, 0xbadd, 0xee81, 0x3b49); + asm volatile("vwmacc.vx v8, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(8, v8, 0xc435e3ec, 0xfff226fa, 0xe6a966ea, 0x3ac40c77, 0x340785f0, + 0xdee56910, 0x5400c5ab, 0xfb080547, 0xd714ba03, 0xc5da1202, + 0x7e63a4c6, 0x1771acb0, 0x037490b3, 0x108f568a, 0x053c7e12, + 0x0073b384); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xc3afd90f697a742a, 0x585e39767c2959ab, 0xfd5f5c31e16d95ba, + 0x2c39235d58ff74a1, 0x4a793d202092aeac, 0x6d31f07b7bdfb6ea, + 0x902b8e28be41b10d, 0x89114b9383c4b511, 0x1f9a7e912f5a51f0, + 0x5494b9380432890c, 0xfd260f5f1fc1eb45, 0x80381e728c1baa95, + 0xa6be6d48744a823b, 0xd37b8ae766a82bf8, 0x7992c128f1c1f6ab, + 0xbeca06f79871e7e8); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x56545434, 0x99cd1438, 0xa1d42f8a, 0x3500b207, 0x642cd563, + 0x7405746d, 0xe92c3246, 0xdab496dc, 0xcbe26107, 0x6bb989c7, + 0xc8542e0c, 0x5849a179, 0x04aac7de, 0x7b5ce579, 0x0ce6e7ea, + 0x77402b10); + asm volatile("vwmacc.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(9, v16, 0xc3d1296f1ae893e6, 0x5836c95c6dbae113, 0xfd3b05253b5c9368, + 0x2c4d976fb318600e, 0x4a9fe54cf92b4b8d, 0x6d5eb614d7052bf9, + 0x9022bf12bc18cd4f, 0x8902e74ad235ed05, 0x1f86621f3b05e25d, + 0x54be4b3652df41b9, 0xfd1093afbdc79c49, 0x805a304537cce5a8, + 0xa6c03a5756f94905, 0xd3ab25c46d6cd30b, 0x7997bbbadd639479, + 0xbef80b96ee48ff98); +} + +void TEST_CASE4() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x17db, 0x9069, 0x8e1f, 0x3584, 0xbb3d, 0x39b2, 0x82cf, 0x015b, + 0xd556, 0xd603, 0x85d1, 0x66a6, 0x4e3e, 0xb965, 0xaa7b, 0x9d27); + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v4, 0x50, 0x56, 0x94, 0x1e, 0x09, 0x8f, 0xe1, 0x9e, 0x86, 0x97, 0x71, + 0x5e, 0x55, 0x09, 0xdd, 0x23); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmacc.vx v6, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(10, v6, 0x17db, 0x9217, 0x8e1f, 0x361a, 0xbb3d, 0x377d, 0x82cf, + 0xff71, 0xd556, 0xd3f6, 0x85d1, 0x687c, 0x4e3e, 0xb992, 0xaa7b, + 0x9dd6); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xc9b9ade8, 0xfc9c14a8, 0xe1ace4f7, 0x43ea8b48, 0x3ab3025c, + 0xe545695b, 0x538304ce, 0xf430c148, 0xd126fac1, 0xbf51d251, + 0x85ebc0a4, 0x2167faaf, 0x0a2e18cc, 0x0ae19395, 0x03cc9899, + 0x05524f83); + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0x4324, 0xd762, 0xc34b, 0x6f67, 0x5134, 0x4d9d, 0xfa05, 0xacb7, + 0xb7d2, 0xb079, 0x5bb2, 0x7949, 0x51df, 0xbadd, 0xee81, 0x3b49); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmacc.vx v8, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(11, v8, 0xc9b9ade8, 0xfff226fa, 0xe1ace4f7, 0x3ac40c77, 0x3ab3025c, + 0xdee56910, 0x538304ce, 0xfb080547, 0xd126fac1, 0xc5da1202, + 0x85ebc0a4, 0x1771acb0, 0x0a2e18cc, 0x108f568a, 0x03cc9899, + 0x0073b384); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xc3afd90f697a742a, 0x585e39767c2959ab, 0xfd5f5c31e16d95ba, + 0x2c39235d58ff74a1, 0x4a793d202092aeac, 0x6d31f07b7bdfb6ea, + 0x902b8e28be41b10d, 0x89114b9383c4b511, 0x1f9a7e912f5a51f0, + 0x5494b9380432890c, 0xfd260f5f1fc1eb45, 0x80381e728c1baa95, + 0xa6be6d48744a823b, 0xd37b8ae766a82bf8, 0x7992c128f1c1f6ab, + 0xbeca06f79871e7e8); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x56545434, 0x99cd1438, 0xa1d42f8a, 0x3500b207, 0x642cd563, + 0x7405746d, 0xe92c3246, 0xdab496dc, 0xcbe26107, 0x6bb989c7, + 0xc8542e0c, 0x5849a179, 0x04aac7de, 0x7b5ce579, 0x0ce6e7ea, + 0x77402b10); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmacc.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(12, v16, 0xc3afd90f697a742a, 0x5836c95c6dbae113, 0xfd5f5c31e16d95ba, + 0x2c4d976fb318600e, 0x4a793d202092aeac, 0x6d5eb614d7052bf9, + 0x902b8e28be41b10d, 0x8902e74ad235ed05, 0x1f9a7e912f5a51f0, + 0x54be4b3652df41b9, 0xfd260f5f1fc1eb45, 0x805a304537cce5a8, + 0xa6be6d48744a823b, 0xd3ab25c46d6cd30b, 0x7992c128f1c1f6ab, + 0xbef80b96ee48ff98); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccsu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccsu.c new file mode 100644 index 000000000..c2368776e --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccsu.c @@ -0,0 +1,248 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x30dc, 0x7235, 0xd5f6, 0xa008, 0x6e79, 0xa159, 0xa05c, 0x5914, + 0xd06f, 0x69c5, 0x9475, 0x5625, 0xa5bd, 0x7be7, 0x823c, 0x5fb2); + VSET(16, e8, m1); + VLOAD_8(v2, 0xb6, 0xbb, 0xb6, 0x57, 0xf9, 0x7c, 0xbf, 0x62, 0x1a, 0xeb, 0xa4, + 0x34, 0xde, 0x96, 0x80, 0xe6); + VLOAD_8(v4, 0x26, 0xea, 0xe8, 0x85, 0x2e, 0xf1, 0x46, 0x8f, 0x68, 0x29, 0xbb, + 0x9b, 0xec, 0x5c, 0x8e, 0x77); + asm volatile("vwmaccsu.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_I16(1, v6, 0x25e0, 0x3323, 0x92e6, 0xcd3b, 0x6d37, 0x1615, 0x8e96, + 0x8fd2, 0xdaff, 0x6668, 0x5141, 0x75a1, 0x8665, 0x55cf, 0x3b3c, + 0x539c); + + VSET(16, e32, m4); + VLOAD_32(v12, 0xdbc5b23d, 0x86bd7dad, 0xb744b5c2, 0xc32f4a47, 0x237edfc4, + 0x5d6e851a, 0xbd3110cd, 0x18c61b57, 0x7ade2943, 0x7e4f5ed6, + 0x90e5ba77, 0xce45b744, 0x82d1976e, 0xa88bb4e1, 0x989fbb9a, + 0xab29da17); + VSET(16, e16, m2); + VLOAD_16(v4, 0x23fb, 0xcee7, 0xa704, 0xc00f, 0xed9f, 0x2cf0, 0x4b53, 0xc0ba, + 0x775b, 0x557c, 0x57b7, 0xbb06, 0xf9ba, 0x178f, 0xec73, 0x8240); + VLOAD_16(v8, 0xad9d, 0x104d, 0xdc56, 0x96af, 0x8c68, 0x1d25, 0x2d70, 0x467a, + 0xc27c, 0x96e2, 0x1c85, 0xe8b6, 0xf7e0, 0xd069, 0x0bca, 0x4f36); + asm volatile("vwmaccsu.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_I32(2, v12, 0xf42c622c, 0x839d2928, 0x6aae411a, 0x9d8c5e88, 0x196a5c5c, + 0x628c33ca, 0xca8f9c1d, 0x075acffb, 0xd58aef57, 0xb0b17e4e, + 0x9aab508a, 0x8f921d88, 0x7cbe902e, 0xbbb98e88, 0x97b93f58, + 0x84411397); + + VSET(16, e64, m8); + VLOAD_64(v24, 0xf8e162af4fefb46a, 0x8e859cff3b076a9d, 0xa7279ec622e749eb, + 0x67bbdace6d6bf1a9, 0xf2090d8d3b00e5b8, 0x9259e92430c5a337, + 0x7cc51e4cc8fd46c6, 0xe5c6946a8e9787fd, 0x0d36e747a75534cc, + 0x9c1a70c0989504f9, 0xa7b0f15e7b51c000, 0x4566f8ffa299d104, + 0xf385b581a4c1c25b, 0xb067f1a7621f9cdd, 0x54ffc96dc442d7b5, + 0x3fc18a6aa65ab8d5); + VSET(16, e32, m4); + VLOAD_32(v8, 0x189138d0, 0xe2f3f48f, 0x58448029, 0x44298d07, 0x6f6b15cf, + 0x13e9cf30, 0x23b6edb8, 0xd532420a, 0xdab302ee, 0xa5e6854e, + 0x538f91b0, 0xc5d4db0e, 0xbc6d31b3, 0x754d418c, 0x96198b07, + 0xf54f785a); + VLOAD_32(v16, 0x52d1517d, 0xa592227e, 0xbb122792, 0x531a3046, 0x88193da7, + 0x13db3502, 0x64efb3f9, 0x55c57a21, 0x31cd5a79, 0x5c0b4048, + 0x899cfb88, 0xfab9de9d, 0x6fa41232, 0x9462cda3, 0x0f8de6ea, + 0x8064029f); + asm volatile("vwmaccsu.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_I64(3, v24, 0x00d3fd4343a241fa, 0x7bbc44d6fa22c6ff, 0xe7a7ead9df60a04d, + 0x7ddc4c9a72efd193, 0x2d44ed6d874572c1, 0x93e550a88e8e3197, + 0x8ada040c3cea26be, 0xd76f3f99213ccf47, 0x05f53f01db8f434a, + 0x7bb552f0a51802e9, 0xd49c03ec9aaeb580, 0x0c6e9b2885384c9a, + 0xd60dbf3493400d51, 0xf465e158f7657501, 0x4e90951ee65f361b, + 0x3a651986a4cf2cbb); +} + +void TEST_CASE2() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x30dc, 0x7235, 0xd5f6, 0xa008, 0x6e79, 0xa159, 0xa05c, 0x5914, + 0xd06f, 0x69c5, 0x9475, 0x5625, 0xa5bd, 0x7be7, 0x823c, 0x5fb2); + VSET(16, e8, m1); + VLOAD_8(v2, 0xb6, 0xbb, 0xb6, 0x57, 0xf9, 0x7c, 0xbf, 0x62, 0x1a, 0xeb, 0xa4, + 0x34, 0xde, 0x96, 0x80, 0xe6); + VLOAD_8(v4, 0x26, 0xea, 0xe8, 0x85, 0x2e, 0xf1, 0x46, 0x8f, 0x68, 0x29, 0xbb, + 0x9b, 0xec, 0x5c, 0x8e, 0x77); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccsu.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_I16(4, v6, 0x30dc, 0x3323, 0xd5f6, 0xcd3b, 0x6e79, 0x1615, 0xa05c, + 0x8fd2, 0xd06f, 0x6668, 0x9475, 0x75a1, 0xa5bd, 0x55cf, 0x823c, + 0x539c); + + VSET(16, e32, m4); + VLOAD_32(v12, 0xdbc5b23d, 0x86bd7dad, 0xb744b5c2, 0xc32f4a47, 0x237edfc4, + 0x5d6e851a, 0xbd3110cd, 0x18c61b57, 0x7ade2943, 0x7e4f5ed6, + 0x90e5ba77, 0xce45b744, 0x82d1976e, 0xa88bb4e1, 0x989fbb9a, + 0xab29da17); + VSET(16, e16, m2); + VLOAD_16(v4, 0x23fb, 0xcee7, 0xa704, 0xc00f, 0xed9f, 0x2cf0, 0x4b53, 0xc0ba, + 0x775b, 0x557c, 0x57b7, 0xbb06, 0xf9ba, 0x178f, 0xec73, 0x8240); + VLOAD_16(v8, 0xad9d, 0x104d, 0xdc56, 0x96af, 0x8c68, 0x1d25, 0x2d70, 0x467a, + 0xc27c, 0x96e2, 0x1c85, 0xe8b6, 0xf7e0, 0xd069, 0x0bca, 0x4f36); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccsu.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_I32(5, v12, 0xdbc5b23d, 0x839d2928, 0xb744b5c2, 0x9d8c5e88, 0x237edfc4, + 0x628c33ca, 0xbd3110cd, 0x075acffb, 0x7ade2943, 0xb0b17e4e, + 0x90e5ba77, 0x8f921d88, 0x82d1976e, 0xbbb98e88, 0x989fbb9a, + 0x84411397); + + VSET(16, e64, m8); + VLOAD_64(v24, 0xf8e162af4fefb46a, 0x8e859cff3b076a9d, 0xa7279ec622e749eb, + 0x67bbdace6d6bf1a9, 0xf2090d8d3b00e5b8, 0x9259e92430c5a337, + 0x7cc51e4cc8fd46c6, 0xe5c6946a8e9787fd, 0x0d36e747a75534cc, + 0x9c1a70c0989504f9, 0xa7b0f15e7b51c000, 0x4566f8ffa299d104, + 0xf385b581a4c1c25b, 0xb067f1a7621f9cdd, 0x54ffc96dc442d7b5, + 0x3fc18a6aa65ab8d5); + VSET(16, e32, m4); + VLOAD_32(v8, 0x189138d0, 0xe2f3f48f, 0x58448029, 0x44298d07, 0x6f6b15cf, + 0x13e9cf30, 0x23b6edb8, 0xd532420a, 0xdab302ee, 0xa5e6854e, + 0x538f91b0, 0xc5d4db0e, 0xbc6d31b3, 0x754d418c, 0x96198b07, + 0xf54f785a); + VLOAD_32(v16, 0x52d1517d, 0xa592227e, 0xbb122792, 0x531a3046, 0x88193da7, + 0x13db3502, 0x64efb3f9, 0x55c57a21, 0x31cd5a79, 0x5c0b4048, + 0x899cfb88, 0xfab9de9d, 0x6fa41232, 0x9462cda3, 0x0f8de6ea, + 0x8064029f); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccsu.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_I64(6, v24, 0xf8e162af4fefb46a, 0x7bbc44d6fa22c6ff, 0xa7279ec622e749eb, + 0x7ddc4c9a72efd193, 0xf2090d8d3b00e5b8, 0x93e550a88e8e3197, + 0x7cc51e4cc8fd46c6, 0xd76f3f99213ccf47, 0x0d36e747a75534cc, + 0x7bb552f0a51802e9, 0xa7b0f15e7b51c000, 0x0c6e9b2885384c9a, + 0xf385b581a4c1c25b, 0xf465e158f7657501, 0x54ffc96dc442d7b5, + 0x3a651986a4cf2cbb); +} + +void TEST_CASE3() { + VSET(16, e16, m2); + VLOAD_16(v6, 0xadd2, 0x2112, 0xbbc6, 0xd113, 0xc6f7, 0xbd07, 0xfd9a, 0x0c0e, + 0xe110, 0xe81b, 0xb432, 0x5c2c, 0x4da9, 0x8c48, 0x6f94, 0x6250); + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v4, 0xfe, 0xd1, 0xc2, 0x3b, 0x79, 0x2f, 0xf5, 0xe8, 0x7f, 0x4b, 0x64, + 0x57, 0x2b, 0x4f, 0x4e, 0xda); + asm volatile("vwmaccsu.vx v6, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(7, v6, 0xb2c8, 0x2527, 0xbf90, 0xd23a, 0xc954, 0xbdf2, 0x0263, + 0x1096, 0xe38b, 0xe992, 0xb626, 0x5ddf, 0x4e80, 0x8dd3, 0x711a, + 0x6692); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x39d3ea89, 0x451d8e1a, 0x83edb2d7, 0xc1919ab3, 0x243c3d4d, + 0xd4745be8, 0x50a58cbe, 0x53b75e9f, 0x2a648b62, 0xd74ce1cf, + 0xa2c6a2e7, 0xc30eadb0, 0x7a908fb9, 0xd4455b56, 0x48109ee2, + 0x2f5b537a); + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0x29f8, 0x6958, 0x6635, 0x03a0, 0x07bc, 0x4881, 0x7d4e, 0x37e3, + 0x8370, 0x405f, 0x1f0d, 0x1252, 0xacf1, 0x06ee, 0x790d, 0x73af); + asm volatile("vwmaccsu.vx v8, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(8, v8, 0x36616cc1, 0x3c7674b2, 0x7b888e64, 0xc1456153, 0x23999b29, + 0xce7fcb61, 0x465ab99c, 0x4f20386a, 0x1f98c352, 0xd2035436, + 0xa039b88c, 0xc18d7372, 0x6c5c1022, 0xd3b3a4d4, 0x3e1f3e87, + 0x25daceb1); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xd860771ff910e8a1, 0xd8de9ddf3be66e90, 0xe55e25348ff4c406, + 0x6ee24d9ebeda1c54, 0x78437fc8299017d1, 0x46a2833ed69dec1d, + 0x0331761dcc2485b7, 0x99c00b7ecbecb5bf, 0xd68d230a95510605, + 0x0e82f981980d47c8, 0x7bb0e1dd5f273626, 0x044cc7c24be55121, + 0x341b063e01c35796, 0xb77a96fdf1826215, 0xdcbd3fe115470433, + 0xc2797417b552325b); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x53046c2d, 0x3b0c65ed, 0x6565f981, 0xaa4c1d70, 0x0a18c71e, + 0xbc91ff46, 0xa52c32d1, 0x73cca3fc, 0xb2a7e5d2, 0x1939af0a, + 0xe4fdb1f5, 0x783f5c5d, 0x3514c875, 0xce346d04, 0x68047428, + 0x72ca548f); + asm volatile("vwmaccsu.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(9, v16, 0xd880804c10498af0, 0xd8f567313109141f, 0xe585463838d91671, + 0x6f2405141ab61224, 0x7847653b1c216e5b, 0x46eb47de4add375f, + 0x03713350939f4492, 0x99ecbb2b8c001a13, 0xd6d2143a6346dfcb, + 0x0e8cb57c0c43cff6, 0x7c093f98e48cef0d, 0x047b2edbc92e1f80, + 0x342f8210ca0537fd, 0xb7ca29b420c298c1, 0xdce563a087e10ceb, + 0xc2a5c016503018a0); +} + +void TEST_CASE4() { + VSET(16, e16, m2); + VLOAD_16(v6, 0xadd2, 0x2112, 0xbbc6, 0xd113, 0xc6f7, 0xbd07, 0xfd9a, 0x0c0e, + 0xe110, 0xe81b, 0xb432, 0x5c2c, 0x4da9, 0x8c48, 0x6f94, 0x6250); + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v4, 0xfe, 0xd1, 0xc2, 0x3b, 0x79, 0x2f, 0xf5, 0xe8, 0x7f, 0x4b, 0x64, + 0x57, 0x2b, 0x4f, 0x4e, 0xda); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccsu.vx v6, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(10, v6, 0xadd2, 0x2527, 0xbbc6, 0xd23a, 0xc6f7, 0xbdf2, 0xfd9a, + 0x1096, 0xe110, 0xe992, 0xb432, 0x5ddf, 0x4da9, 0x8dd3, 0x6f94, + 0x6692); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x39d3ea89, 0x451d8e1a, 0x83edb2d7, 0xc1919ab3, 0x243c3d4d, + 0xd4745be8, 0x50a58cbe, 0x53b75e9f, 0x2a648b62, 0xd74ce1cf, + 0xa2c6a2e7, 0xc30eadb0, 0x7a908fb9, 0xd4455b56, 0x48109ee2, + 0x2f5b537a); + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0x29f8, 0x6958, 0x6635, 0x03a0, 0x07bc, 0x4881, 0x7d4e, 0x37e3, + 0x8370, 0x405f, 0x1f0d, 0x1252, 0xacf1, 0x06ee, 0x790d, 0x73af); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccsu.vx v8, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(11, v8, 0x39d3ea89, 0x3c7674b2, 0x83edb2d7, 0xc1456153, 0x243c3d4d, + 0xce7fcb61, 0x50a58cbe, 0x4f20386a, 0x2a648b62, 0xd2035436, + 0xa2c6a2e7, 0xc18d7372, 0x7a908fb9, 0xd3b3a4d4, 0x48109ee2, + 0x25daceb1); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xd860771ff910e8a1, 0xd8de9ddf3be66e90, 0xe55e25348ff4c406, + 0x6ee24d9ebeda1c54, 0x78437fc8299017d1, 0x46a2833ed69dec1d, + 0x0331761dcc2485b7, 0x99c00b7ecbecb5bf, 0xd68d230a95510605, + 0x0e82f981980d47c8, 0x7bb0e1dd5f273626, 0x044cc7c24be55121, + 0x341b063e01c35796, 0xb77a96fdf1826215, 0xdcbd3fe115470433, + 0xc2797417b552325b); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x53046c2d, 0x3b0c65ed, 0x6565f981, 0xaa4c1d70, 0x0a18c71e, + 0xbc91ff46, 0xa52c32d1, 0x73cca3fc, 0xb2a7e5d2, 0x1939af0a, + 0xe4fdb1f5, 0x783f5c5d, 0x3514c875, 0xce346d04, 0x68047428, + 0x72ca548f); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccsu.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(12, v16, 0xd860771ff910e8a1, 0xd8f567313109141f, 0xe55e25348ff4c406, + 0x6f2405141ab61224, 0x78437fc8299017d1, 0x46eb47de4add375f, + 0x0331761dcc2485b7, 0x99ecbb2b8c001a13, 0xd68d230a95510605, + 0x0e8cb57c0c43cff6, 0x7bb0e1dd5f273626, 0x047b2edbc92e1f80, + 0x341b063e01c35796, 0xb7ca29b420c298c1, 0xdcbd3fe115470433, + 0xc2a5c016503018a0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccu.c new file mode 100644 index 000000000..9f0b04ebc --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccu.c @@ -0,0 +1,248 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x519d, 0x7122, 0x672c, 0x4d97, 0x436e, 0x3f1f, 0x423d, 0x44e8, + 0x3d7b, 0x5570, 0x1e90, 0x79f4, 0x456b, 0x0283, 0x02b5, 0x6865); + VSET(16, e8, m1); + VLOAD_8(v2, 0xce, 0x96, 0x33, 0x88, 0xf8, 0x3f, 0x0c, 0xde, 0x1e, 0x9d, 0x5a, + 0x75, 0x73, 0x43, 0xd9, 0x43); + VLOAD_8(v4, 0x51, 0x88, 0x16, 0xf6, 0x57, 0xab, 0xd8, 0x26, 0x2e, 0x35, 0x94, + 0xd1, 0xf0, 0xb9, 0x09, 0x8a); + asm volatile("vwmaccu.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(1, v6, 0x92cb, 0xc0d2, 0x6b8e, 0xd047, 0x97b6, 0x6934, 0x4c5d, + 0x65dc, 0x42df, 0x75f1, 0x5298, 0xd979, 0xb13b, 0x32ee, 0x0a56, + 0x8c83); + + VSET(16, e32, m4); + VLOAD_32(v12, 0x1f50b763, 0x6d1a7f46, 0x17b8b2b5, 0x6b69c966, 0x25d945cb, + 0x3e6c375b, 0x314db8d3, 0x35ade27d, 0x74fa2d58, 0x735f513d, + 0x3cad4e4d, 0x628eb81a, 0x1c48c2f9, 0x14f08921, 0x77de05bf, + 0x528c354b); + VSET(16, e16, m2); + VLOAD_16(v4, 0x4ed5, 0xcf74, 0x3442, 0x280f, 0x795e, 0x3007, 0xdf3e, 0xb348, + 0x3865, 0xcb59, 0x1291, 0xa04b, 0xc5bd, 0x957f, 0xefe4, 0xe75d); + VLOAD_16(v8, 0x7d39, 0xddd8, 0x17d7, 0x0574, 0x251a, 0x4ce4, 0x4817, 0x9de1, + 0xd773, 0xdcc8, 0xeb92, 0x8fa8, 0x9382, 0x4369, 0xb1c7, 0x9185); + asm volatile("vwmaccu.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(2, v12, 0x45e045d0, 0x20e0ad26, 0x1c968423, 0x6c443b32, 0x37702f57, + 0x4cd91197, 0x702b3765, 0xa43e9cc5, 0xa47055b7, 0x22beaac5, + 0x4dc2ffff, 0xbc81ce52, 0x8e38b3f3, 0x3c4e1738, 0x1e7523fb, + 0xd610159c); + + VSET(16, e64, m8); + VLOAD_64(v24, 0x5118504f9237ea08, 0x6b71d4ee88073dde, 0x4420559f61e6927c, + 0x24eea54c6530475c, 0x289211cb16ebbbc9, 0x1a0b2b7644ecd474, + 0x159c16af3e71f736, 0x12dda0d2ca288012, 0x790fab107c1346b6, + 0x589cc8132c869645, 0x323623bba87568ce, 0x5ce2e94d5e335c5d, + 0x6e6b3c19c0d78ca0, 0x0502bed40a0600bc, 0x5ff6f4b3610e009c, + 0x40d6eb0605052915); + VSET(16, e32, m4); + VLOAD_32(v8, 0xd1247b78, 0xfd5d326b, 0x7fe40cf5, 0xfd802d90, 0x9ec23b7e, + 0x67219fe8, 0x9dc7f026, 0x257d8b7f, 0x782bc512, 0x42fa808b, + 0x48d3273d, 0x7ca0371d, 0x06409254, 0xb77ce3ba, 0x28aac174, + 0xd2e4cdbf); + VLOAD_32(v16, 0xab3b5969, 0xe91aa966, 0x336c2f4c, 0xfcc75a99, 0x1854180c, + 0xeec0354b, 0x8b4595bf, 0x9200fb5c, 0x0d627fcf, 0xdf0a8280, + 0x4b5733be, 0x4f3bd496, 0x10f5d788, 0x3499c99d, 0xdeee29dd, + 0x7e8643a4); + asm volatile("vwmaccu.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(3, v24, 0xdcfc2b3ca5c34640, 0x52260af0834ff780, 0x5dd0d00a7e576638, + 0x1f3e3a8021c0226c, 0x37a86e0412f255b1, 0x7a39dcd00fe1b56c, + 0x6b728942b1a24190, 0x283f5f30c90c26b6, 0x7f5820d2b91f8e44, + 0x92f7bdf9f97b71c5, 0x47a4ceb4fab8af14, 0x83757ab9b866ab5b, + 0x6ed5464e986dd540, 0x2ab6573b8294b3ce, 0x8360dddc474a95c0, + 0xa91223c6b56bf471); +} + +void TEST_CASE2() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x519d, 0x7122, 0x672c, 0x4d97, 0x436e, 0x3f1f, 0x423d, 0x44e8, + 0x3d7b, 0x5570, 0x1e90, 0x79f4, 0x456b, 0x0283, 0x02b5, 0x6865); + VSET(16, e8, m1); + VLOAD_8(v2, 0xce, 0x96, 0x33, 0x88, 0xf8, 0x3f, 0x0c, 0xde, 0x1e, 0x9d, 0x5a, + 0x75, 0x73, 0x43, 0xd9, 0x43); + VLOAD_8(v4, 0x51, 0x88, 0x16, 0xf6, 0x57, 0xab, 0xd8, 0x26, 0x2e, 0x35, 0x94, + 0xd1, 0xf0, 0xb9, 0x09, 0x8a); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccu.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(4, v6, 0x519d, 0xc0d2, 0x672c, 0xd047, 0x436e, 0x6934, 0x423d, + 0x65dc, 0x3d7b, 0x75f1, 0x1e90, 0xd979, 0x456b, 0x32ee, 0x02b5, + 0x8c83); + + VSET(16, e32, m4); + VLOAD_32(v12, 0x1f50b763, 0x6d1a7f46, 0x17b8b2b5, 0x6b69c966, 0x25d945cb, + 0x3e6c375b, 0x314db8d3, 0x35ade27d, 0x74fa2d58, 0x735f513d, + 0x3cad4e4d, 0x628eb81a, 0x1c48c2f9, 0x14f08921, 0x77de05bf, + 0x528c354b); + VSET(16, e16, m2); + VLOAD_16(v4, 0x4ed5, 0xcf74, 0x3442, 0x280f, 0x795e, 0x3007, 0xdf3e, 0xb348, + 0x3865, 0xcb59, 0x1291, 0xa04b, 0xc5bd, 0x957f, 0xefe4, 0xe75d); + VLOAD_16(v8, 0x7d39, 0xddd8, 0x17d7, 0x0574, 0x251a, 0x4ce4, 0x4817, 0x9de1, + 0xd773, 0xdcc8, 0xeb92, 0x8fa8, 0x9382, 0x4369, 0xb1c7, 0x9185); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccu.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(5, v12, 0x1f50b763, 0x20e0ad26, 0x17b8b2b5, 0x6c443b32, 0x25d945cb, + 0x4cd91197, 0x314db8d3, 0xa43e9cc5, 0x74fa2d58, 0x22beaac5, + 0x3cad4e4d, 0xbc81ce52, 0x1c48c2f9, 0x3c4e1738, 0x77de05bf, + 0xd610159c); + + VSET(16, e64, m8); + VLOAD_64(v24, 0x5118504f9237ea08, 0x6b71d4ee88073dde, 0x4420559f61e6927c, + 0x24eea54c6530475c, 0x289211cb16ebbbc9, 0x1a0b2b7644ecd474, + 0x159c16af3e71f736, 0x12dda0d2ca288012, 0x790fab107c1346b6, + 0x589cc8132c869645, 0x323623bba87568ce, 0x5ce2e94d5e335c5d, + 0x6e6b3c19c0d78ca0, 0x0502bed40a0600bc, 0x5ff6f4b3610e009c, + 0x40d6eb0605052915); + VSET(16, e32, m4); + VLOAD_32(v8, 0xd1247b78, 0xfd5d326b, 0x7fe40cf5, 0xfd802d90, 0x9ec23b7e, + 0x67219fe8, 0x9dc7f026, 0x257d8b7f, 0x782bc512, 0x42fa808b, + 0x48d3273d, 0x7ca0371d, 0x06409254, 0xb77ce3ba, 0x28aac174, + 0xd2e4cdbf); + VLOAD_32(v16, 0xab3b5969, 0xe91aa966, 0x336c2f4c, 0xfcc75a99, 0x1854180c, + 0xeec0354b, 0x8b4595bf, 0x9200fb5c, 0x0d627fcf, 0xdf0a8280, + 0x4b5733be, 0x4f3bd496, 0x10f5d788, 0x3499c99d, 0xdeee29dd, + 0x7e8643a4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccu.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(6, v24, 0x5118504f9237ea08, 0x52260af0834ff780, 0x4420559f61e6927c, + 0x1f3e3a8021c0226c, 0x289211cb16ebbbc9, 0x7a39dcd00fe1b56c, + 0x159c16af3e71f736, 0x283f5f30c90c26b6, 0x790fab107c1346b6, + 0x92f7bdf9f97b71c5, 0x323623bba87568ce, 0x83757ab9b866ab5b, + 0x6e6b3c19c0d78ca0, 0x2ab6573b8294b3ce, 0x5ff6f4b3610e009c, + 0xa91223c6b56bf471); +} + +void TEST_CASE3() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x6f50, 0x0299, 0x3578, 0x0e45, 0x752b, 0x60c7, 0x7d0c, 0x0943, + 0x3f2d, 0x47bc, 0x4468, 0x616d, 0x5844, 0x3a7d, 0x32fe, 0x7813); + VSET(16, e8, m1); + uint64_t scalar = 5; + VLOAD_8(v4, 0x01, 0xd6, 0x1e, 0x57, 0xcc, 0x31, 0x29, 0x06, 0x5a, 0xab, 0x1e, + 0x0a, 0x97, 0x6f, 0xe0, 0xfc); + asm volatile("vwmaccu.vx v6, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(7, v6, 0x6f55, 0x06c7, 0x360e, 0x0ff8, 0x7927, 0x61bc, 0x7dd9, + 0x0961, 0x40ef, 0x4b13, 0x44fe, 0x619f, 0x5b37, 0x3ca8, 0x375e, + 0x7cff); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x681721c9, 0x5c07924a, 0x5147143e, 0x14da5785, 0x30a43e20, + 0x3498177e, 0x551df71d, 0x29656468, 0x12550807, 0x7dc95cbd, + 0x2167ff36, 0x194b0d6c, 0x79119a1d, 0x6d77fab6, 0x3e32c755, + 0x6e479bf4); + VSET(16, e16, m2); + scalar = 5383; + VLOAD_16(v4, 0x9752, 0x45a4, 0xfde9, 0xa659, 0x957b, 0x1a3f, 0x2212, 0x5d43, + 0xdc08, 0x1fb8, 0x5e15, 0x08da, 0x0468, 0x4458, 0xe1e2, 0x4ef7); + asm volatile("vwmaccu.vx v8, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(8, v8, 0x7484ff07, 0x61bfedc6, 0x6622229d, 0x228430f4, 0x3ceb6b7d, + 0x36bffa37, 0x57ea5f9b, 0x310e703d, 0x2467b43f, 0x806452c5, + 0x29224ac9, 0x1a052d62, 0x796e40f5, 0x7315111e, 0x50c07e83, + 0x74c407b5); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x27a0a02f7e9757d4, 0x7ff7bb4d394926a0, 0x09d25e3173571efd, + 0x11661c8ece6711ac, 0x1e5ffff32ed851dd, 0x0698334d63d206a9, + 0x79598c88fd85995f, 0x2fa78b4b7d90a222, 0x7d65cbfdfc7f2e1d, + 0x6c0101ef46924df6, 0x59ff3d4e018b50f4, 0x2c8ec8409f219401, + 0x20b183b4bb89c200, 0x28bee831261ca372, 0x5b9d142326bcef0a, + 0x1c2ad051e4e7281e); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x307dc235, 0x92187481, 0xa69319d1, 0x682b9abe, 0x8bdce4be, + 0x95ec65ce, 0x46915d6f, 0xd59243e6, 0x1d0943e5, 0x3ae27787, + 0x33c32e03, 0x8be66da2, 0x0fc78147, 0x2ce8d421, 0x9c9bc2fb, + 0x10c8c9f7); + asm volatile("vwmaccu.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(9, v16, 0x27b3569317ce3b7b, 0x80301be52297620b, 0x0a12a5e3bda6ead8, + 0x118e4f69e7a94c16, 0x1e95f8d392f37a47, 0x06d20e0c989e38c3, + 0x7974c7d7abd08544, 0x2ff9f5bd60308c44, 0x7d710070e7f94e54, + 0x6c17bb12e5fd05e3, 0x5a1336d961fce8b5, 0x2cc4c4ceefc069b7, + 0x20b79a86595c2d2d, 0x28d03cbe630746bd, 0x5bd98349d9e5fe73, + 0x1c314a6c44597cdb); +} + +void TEST_CASE4() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x6f50, 0x0299, 0x3578, 0x0e45, 0x752b, 0x60c7, 0x7d0c, 0x0943, + 0x3f2d, 0x47bc, 0x4468, 0x616d, 0x5844, 0x3a7d, 0x32fe, 0x7813); + VSET(16, e8, m1); + uint64_t scalar = 5; + VLOAD_8(v4, 0x01, 0xd6, 0x1e, 0x57, 0xcc, 0x31, 0x29, 0x06, 0x5a, 0xab, 0x1e, + 0x0a, 0x97, 0x6f, 0xe0, 0xfc); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccu.vx v6, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(10, v6, 0x6f50, 0x06c7, 0x3578, 0x0ff8, 0x752b, 0x61bc, 0x7d0c, + 0x0961, 0x3f2d, 0x4b13, 0x4468, 0x619f, 0x5844, 0x3ca8, 0x32fe, + 0x7cff); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x681721c9, 0x5c07924a, 0x5147143e, 0x14da5785, 0x30a43e20, + 0x3498177e, 0x551df71d, 0x29656468, 0x12550807, 0x7dc95cbd, + 0x2167ff36, 0x194b0d6c, 0x79119a1d, 0x6d77fab6, 0x3e32c755, + 0x6e479bf4); + VSET(16, e16, m2); + scalar = 5383; + VLOAD_16(v4, 0x9752, 0x45a4, 0xfde9, 0xa659, 0x957b, 0x1a3f, 0x2212, 0x5d43, + 0xdc08, 0x1fb8, 0x5e15, 0x08da, 0x0468, 0x4458, 0xe1e2, 0x4ef7); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccu.vx v8, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(11, v8, 0x681721c9, 0x61bfedc6, 0x5147143e, 0x228430f4, 0x30a43e20, + 0x36bffa37, 0x551df71d, 0x310e703d, 0x12550807, 0x806452c5, + 0x2167ff36, 0x1a052d62, 0x79119a1d, 0x7315111e, 0x3e32c755, + 0x74c407b5); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x27a0a02f7e9757d4, 0x7ff7bb4d394926a0, 0x09d25e3173571efd, + 0x11661c8ece6711ac, 0x1e5ffff32ed851dd, 0x0698334d63d206a9, + 0x79598c88fd85995f, 0x2fa78b4b7d90a222, 0x7d65cbfdfc7f2e1d, + 0x6c0101ef46924df6, 0x59ff3d4e018b50f4, 0x2c8ec8409f219401, + 0x20b183b4bb89c200, 0x28bee831261ca372, 0x5b9d142326bcef0a, + 0x1c2ad051e4e7281e); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x307dc235, 0x92187481, 0xa69319d1, 0x682b9abe, 0x8bdce4be, + 0x95ec65ce, 0x46915d6f, 0xd59243e6, 0x1d0943e5, 0x3ae27787, + 0x33c32e03, 0x8be66da2, 0x0fc78147, 0x2ce8d421, 0x9c9bc2fb, + 0x10c8c9f7); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccu.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(12, v16, 0x27a0a02f7e9757d4, 0x80301be52297620b, 0x09d25e3173571efd, + 0x118e4f69e7a94c16, 0x1e5ffff32ed851dd, 0x06d20e0c989e38c3, + 0x79598c88fd85995f, 0x2ff9f5bd60308c44, 0x7d65cbfdfc7f2e1d, + 0x6c17bb12e5fd05e3, 0x59ff3d4e018b50f4, 0x2cc4c4ceefc069b7, + 0x20b183b4bb89c200, 0x28d03cbe630746bd, 0x5b9d142326bcef0a, + 0x1c314a6c44597cdb); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccus.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccus.c new file mode 100644 index 000000000..0a403bc0b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccus.c @@ -0,0 +1,127 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x4c8e, 0xd449, 0xe266, 0xb6d1, 0xf28a, 0x1655, 0x3111, 0x4bde, + 0x8787, 0x2ce4, 0x1083, 0xaa0c, 0x9fdf, 0x3e42, 0x98e7, 0xe33b); + VSET(16, e8, m1); + uint64_t scalar = 5; + VLOAD_8(v4, 0x83, 0xfe, 0xa2, 0xc3, 0xa6, 0x18, 0xd9, 0x4c, 0x6e, 0xeb, 0x43, + 0xb7, 0xec, 0x48, 0xb7, 0xe5); + asm volatile("vwmaccus.vx v6, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(1, v6, 0x4a1d, 0xd43f, 0xe090, 0xb5a0, 0xf0c8, 0x16cd, 0x304e, + 0x4d5a, 0x89ad, 0x2c7b, 0x11d2, 0xa89f, 0x9f7b, 0x3faa, 0x977a, + 0xe2b4); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xe318cc7a, 0x489815a8, 0x6e6fc053, 0x8d746807, 0xbc3e6244, + 0xcdfeb4fe, 0x22d24149, 0x26962240, 0x5ef85b7e, 0x2f61a9e8, + 0x373dc202, 0x1567a6b5, 0x763c5239, 0x60dd0502, 0xab178102, + 0x753e0a11); + VSET(16, e16, m2); + scalar = 5383; + VLOAD_16(v4, 0xce02, 0x6935, 0xc803, 0x75bc, 0x80b7, 0x19d2, 0x3b7c, 0xc269, + 0xb639, 0x66f1, 0x678b, 0xc83e, 0x5a5c, 0x389e, 0x9e46, 0xfae9); + asm volatile("vwmaccus.vx v8, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(2, v8, 0xdefd9888, 0x513c4f1b, 0x69d67768, 0x97200c2b, 0xb1c9ea45, + 0xd01da3bc, 0x27b50dad, 0x2187101f, 0x58e9040d, 0x37d63f7f, + 0x3fbefdcf, 0x10d33667, 0x7da856bd, 0x65838754, 0xa31092ec, + 0x74d30370); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xb6a5b1c3c6d69abb, 0x3c6a647eb0d79a41, 0xf0c3eb8821045259, + 0x91d74be946352cae, 0x524c6db6c58f9da6, 0x39185a920f7787e8, + 0x4080fbf0fdcc64ec, 0x9ed1fb83f53270fc, 0xff0661a19269f0c0, + 0x47d26c599193fe0b, 0xd8cc0342dc3104ce, 0xc51f802bc93381cd, + 0xe7d6522aa1c51245, 0x6fa0a9d3f57bc667, 0xd140731478a147a8, + 0x5d716379591922f4); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x09377def, 0x99ee4d30, 0x8b8452d1, 0xc9e17667, 0x2254aa29, + 0xb56ca9f5, 0xa1276371, 0x32ac1413, 0x59ff6af3, 0x6b61bf57, + 0xc0eb37b3, 0x26f06be7, 0x0e9b21b2, 0x22898a93, 0xe3646841, + 0xdd301fdc); + asm volatile("vwmaccus.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(3, v16, 0xb6a940470425dc20, 0x3c430136a5932551, 0xf096f84fcc847134, + 0x91c26988f191bc3b, 0x5259ad33e6940249, 0x38fb9358fb72e8cf, + 0x405c6235eb4a66a7, 0x9ee5895b4431c96d, 0xff291c64abdfe8d1, + 0x47fbdc7722d7f1e8, 0xd8b3ab8cf55eb11f, 0xc52e86eb9b0cedda, + 0xe7dbf512bdc5c2ab, 0x6fadfdb8b3d16658, 0xd13568f3f48a0453, + 0x5d63f4705f821de8); +} + +void TEST_CASE2() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x4c8e, 0xd449, 0xe266, 0xb6d1, 0xf28a, 0x1655, 0x3111, 0x4bde, + 0x8787, 0x2ce4, 0x1083, 0xaa0c, 0x9fdf, 0x3e42, 0x98e7, 0xe33b); + VSET(16, e8, m1); + uint64_t scalar = 5; + VLOAD_8(v4, 0x83, 0xfe, 0xa2, 0xc3, 0xa6, 0x18, 0xd9, 0x4c, 0x6e, 0xeb, 0x43, + 0xb7, 0xec, 0x48, 0xb7, 0xe5); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccus.vx v6, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(4, v6, 0x4c8e, 0xd43f, 0xe266, 0xb5a0, 0xf28a, 0x16cd, 0x3111, + 0x4d5a, 0x8787, 0x2c7b, 0x1083, 0xa89f, 0x9fdf, 0x3faa, 0x98e7, + 0xe2b4); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xe318cc7a, 0x489815a8, 0x6e6fc053, 0x8d746807, 0xbc3e6244, + 0xcdfeb4fe, 0x22d24149, 0x26962240, 0x5ef85b7e, 0x2f61a9e8, + 0x373dc202, 0x1567a6b5, 0x763c5239, 0x60dd0502, 0xab178102, + 0x753e0a11); + VSET(16, e16, m2); + scalar = 5383; + VLOAD_16(v4, 0xce02, 0x6935, 0xc803, 0x75bc, 0x80b7, 0x19d2, 0x3b7c, 0xc269, + 0xb639, 0x66f1, 0x678b, 0xc83e, 0x5a5c, 0x389e, 0x9e46, 0xfae9); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccus.vx v8, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(5, v8, 0xe318cc7a, 0x513c4f1b, 0x6e6fc053, 0x97200c2b, 0xbc3e6244, + 0xd01da3bc, 0x22d24149, 0x2187101f, 0x5ef85b7e, 0x37d63f7f, + 0x373dc202, 0x10d33667, 0x763c5239, 0x65838754, 0xab178102, + 0x74d30370); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xb6a5b1c3c6d69abb, 0x3c6a647eb0d79a41, 0xf0c3eb8821045259, + 0x91d74be946352cae, 0x524c6db6c58f9da6, 0x39185a920f7787e8, + 0x4080fbf0fdcc64ec, 0x9ed1fb83f53270fc, 0xff0661a19269f0c0, + 0x47d26c599193fe0b, 0xd8cc0342dc3104ce, 0xc51f802bc93381cd, + 0xe7d6522aa1c51245, 0x6fa0a9d3f57bc667, 0xd140731478a147a8, + 0x5d716379591922f4); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x09377def, 0x99ee4d30, 0x8b8452d1, 0xc9e17667, 0x2254aa29, + 0xb56ca9f5, 0xa1276371, 0x32ac1413, 0x59ff6af3, 0x6b61bf57, + 0xc0eb37b3, 0x26f06be7, 0x0e9b21b2, 0x22898a93, 0xe3646841, + 0xdd301fdc); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccus.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(6, v16, 0xb6a5b1c3c6d69abb, 0x3c430136a5932551, 0xf0c3eb8821045259, + 0x91c26988f191bc3b, 0x524c6db6c58f9da6, 0x38fb9358fb72e8cf, + 0x4080fbf0fdcc64ec, 0x9ee5895b4431c96d, 0xff0661a19269f0c0, + 0x47fbdc7722d7f1e8, 0xd8cc0342dc3104ce, 0xc52e86eb9b0cedda, + 0xe7d6522aa1c51245, 0x6fadfdb8b3d16658, 0xd140731478a147a8, + 0x5d63f4705f821de8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmul.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmul.c new file mode 100644 index 000000000..351d87766 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmul.c @@ -0,0 +1,188 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xee, 0xfe, 0xbd, 0xc2, 0x02, 0xa4, 0x34, 0x33, 0x2b, 0x35, 0x16, + 0x9b, 0x3b, 0x5f, 0xfc, 0x8b); + VLOAD_8(v4, 0xcb, 0x24, 0xe8, 0xb2, 0xeb, 0x24, 0x80, 0x67, 0x43, 0x11, 0x7c, + 0x94, 0x22, 0x71, 0xca, 0x80); + asm volatile("vwmul.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_I16(1, v6, 0x03ba, 0xffb8, 0x0648, 0x12e4, 0xffd6, 0xf310, 0xe600, + 0x1485, 0x0b41, 0x0385, 0x0aa8, 0x2a9c, 0x07d6, 0x29ef, 0x00d8, + 0x3a80); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x8aed, 0x2153, 0x5377, 0xc19c, 0x1051, 0x1b75, 0xbafd, 0xb200, + 0xb209, 0xa9a2, 0xbdc4, 0x1653, 0x5965, 0x145e, 0xb626, 0xd79c); + VLOAD_16(v8, 0x778d, 0xc104, 0x6eac, 0x78e8, 0xacd2, 0x698b, 0xc7d3, 0x1e29, + 0x0a58, 0x58b5, 0x29f9, 0x2fb0, 0x2166, 0x0ac4, 0x44e5, 0xbc40); + asm volatile("vwmul.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_I32(2, v12, 0xc953af89, 0xf7cd184c, 0x241535f4, 0xe2889560, 0xfab2ce72, + 0x0b51e587, 0x0f24c987, 0xf6cf8200, 0xfcd98d18, 0xe2129f8a, + 0xf523f7a4, 0x04289610, 0x0ba9a33e, 0x00db43f8, 0xec2007fe, + 0x0ab07700); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xbbc467cb, 0xfbb3efda, 0x652f8490, 0x5e7ea848, 0x21fbc400, + 0xbb409fde, 0x98413836, 0x14652ba4, 0xc3d3c86f, 0xc84d3ae3, + 0x3df53027, 0xbda29a2c, 0xa1d7d949, 0x60a3d06e, 0xa91e405d, + 0x7eea498f); + VLOAD_32(v16, 0x80407791, 0x3e51b6e0, 0xd0be8bc1, 0x683f33bd, 0xeddda6c8, + 0x34e351f2, 0xa6a93ab2, 0xc8893cb8, 0xcb61ddc1, 0x341a4cdc, + 0xd377dc52, 0x2f3f3dbf, 0xa97f8c35, 0x38a44020, 0x28e1cc8b, + 0x52d0e17a); + asm volatile("vwmul.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_I64(3, v24, 0x220c9d56193e26fb, 0xfef434f3d9f0dac0, 0xed5267a678ad2090, + 0x267ac8a1a8c09528, 0xfd97bcf001c92000, 0xf1cc14c54f865ddc, + 0x24347d81c3bb518c, 0xfb94cd6ed9b5cde0, 0x0c5e26a06dc0eeaf, + 0xf4a9f71526e7ff14, 0xf538e8d5150bf07e, 0xf3c075503fe182d4, + 0x1fd0bbab88bae81d, 0x1561d6be9f0d8dc0, 0xf220183a08740e7f, + 0x290e99b3f87dbd26); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xee, 0xfe, 0xbd, 0xc2, 0x02, 0xa4, 0x34, 0x33, 0x2b, 0x35, 0x16, + 0x9b, 0x3b, 0x5f, 0xfc, 0x8b); + VLOAD_8(v4, 0xcb, 0x24, 0xe8, 0xb2, 0xeb, 0x24, 0x80, 0x67, 0x43, 0x11, 0x7c, + 0x94, 0x22, 0x71, 0xca, 0x80); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwmul.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_I16(4, v6, 0, 0xffb8, 0, 0x12e4, 0, 0xf310, 0, 0x1485, 0, 0x0385, 0, + 0x2a9c, 0, 0x29ef, 0, 0x3a80); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x8aed, 0x2153, 0x5377, 0xc19c, 0x1051, 0x1b75, 0xbafd, 0xb200, + 0xb209, 0xa9a2, 0xbdc4, 0x1653, 0x5965, 0x145e, 0xb626, 0xd79c); + VLOAD_16(v8, 0x778d, 0xc104, 0x6eac, 0x78e8, 0xacd2, 0x698b, 0xc7d3, 0x1e29, + 0x0a58, 0x58b5, 0x29f9, 0x2fb0, 0x2166, 0x0ac4, 0x44e5, 0xbc40); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwmul.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_I32(5, v12, 0, 0xf7cd184c, 0, 0xe2889560, 0, 0x0b51e587, 0, 0xf6cf8200, + 0, 0xe2129f8a, 0, 0x04289610, 0, 0x00db43f8, 0, 0x0ab07700); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xbbc467cb, 0xfbb3efda, 0x652f8490, 0x5e7ea848, 0x21fbc400, + 0xbb409fde, 0x98413836, 0x14652ba4, 0xc3d3c86f, 0xc84d3ae3, + 0x3df53027, 0xbda29a2c, 0xa1d7d949, 0x60a3d06e, 0xa91e405d, + 0x7eea498f); + VLOAD_32(v16, 0x80407791, 0x3e51b6e0, 0xd0be8bc1, 0x683f33bd, 0xeddda6c8, + 0x34e351f2, 0xa6a93ab2, 0xc8893cb8, 0xcb61ddc1, 0x341a4cdc, + 0xd377dc52, 0x2f3f3dbf, 0xa97f8c35, 0x38a44020, 0x28e1cc8b, + 0x52d0e17a); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwmul.vv v24, v16, v8, v0.t"); + VSET(16, e64, m8); + VCMP_I64(6, v24, 0, 0xfef434f3d9f0dac0, 0, 0x267ac8a1a8c09528, 0, + 0xf1cc14c54f865ddc, 0, 0xfb94cd6ed9b5cde0, 0, 0xf4a9f71526e7ff14, 0, + 0xf3c075503fe182d4, 0, 0x1561d6be9f0d8dc0, 0, 0x290e99b3f87dbd26); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x86, 0x79, 0xa0, 0x8a, 0x3e, 0xc3, 0x3e, 0x0c, 0x1b, 0xca, 0x80, + 0x41, 0x0e, 0xee, 0x94, 0xdf); + int64_t scalar = 5; + asm volatile("vwmul.vx v6, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(7, v6, 0xfd9e, 0x025d, 0xfe20, 0xfdb2, 0x0136, 0xfecf, 0x0136, + 0x003c, 0x0087, 0xfef2, 0xfd80, 0x0145, 0x0046, 0xffa6, 0xfde4, + 0xff5b); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xb0ab, 0xcccb, 0x5fad, 0x9e24, 0x1496, 0xd4a0, 0x2552, 0xcef6, + 0x34b8, 0xef22, 0x69c3, 0xbb05, 0xbe72, 0x315b, 0x3f03, 0xf58b); + scalar = -5383; + asm volatile("vwmul.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(8, v8, 0x06842453, 0x0434bf73, 0xf8243145, 0x0809b904, 0xfe4f21e6, + 0x03900fa0, 0xfcef40c2, 0x04072946, 0xfbab76f8, 0x0162ac12, + 0xf7501cab, 0x05aa79dd, 0x056270e2, 0xfbf22f83, 0xfad307eb, + 0x00dbe233); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x71c6753d, 0x66646cc2, 0x23065c23, 0xde594cad, 0xa2f87c53, + 0xaebb2bcb, 0xc53688b8, 0xf0c161dd, 0x2d856780, 0xa520cce5, + 0x677c5e13, 0x83d288f4, 0x78b6acdc, 0x5b635dd1, 0x97dc75c8, + 0x1a1aa6d4); + scalar = 6474219; + asm volatile("vwmul.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(9, v16, 0x002be7b6249483ff, 0x0027833479d82816, 0x000d840f6a940f21, + 0xfff303a4936737cf, 0xffdc19bf4b7d4b31, 0xffe0a38d1ee99659, + 0xffe9508230d7f8e8, 0xfffa1e014df55adf, 0x001190f926b98280, + 0xffdceee1c5a5e337, 0x0027ef3ba84d4671, 0xffd014a6bd334bfc, + 0x002e952469a169f4, 0x0023441ed2e237db, 0xffd7d041966a2698, + 0x000a12cac09b989c); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x86, 0x79, 0xa0, 0x8a, 0x3e, 0xc3, 0x3e, 0x0c, 0x1b, 0xca, 0x80, + 0x41, 0x0e, 0xee, 0x94, 0xdf); + int64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwmul.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(10, v6, 0, 0x025d, 0, 0xfdb2, 0, 0xfecf, 0, 0x003c, 0, 0xfef2, 0, + 0x0145, 0, 0xffa6, 0, 0xff5b); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xb0ab, 0xcccb, 0x5fad, 0x9e24, 0x1496, 0xd4a0, 0x2552, 0xcef6, + 0x34b8, 0xef22, 0x69c3, 0xbb05, 0xbe72, 0x315b, 0x3f03, 0xf58b); + scalar = -5383; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwmul.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(11, v8, 0, 0x0434bf73, 0, 0x0809b904, 0, 0x03900fa0, 0, 0x04072946, + 0, 0x0162ac12, 0, 0x05aa79dd, 0, 0xfbf22f83, 0, 0x00dbe233); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x71c6753d, 0x66646cc2, 0x23065c23, 0xde594cad, 0xa2f87c53, + 0xaebb2bcb, 0xc53688b8, 0xf0c161dd, 0x2d856780, 0xa520cce5, + 0x677c5e13, 0x83d288f4, 0x78b6acdc, 0x5b635dd1, 0x97dc75c8, + 0x1a1aa6d4); + scalar = 6474219; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwmul.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(12, v16, 0, 0x0027833479d82816, 0, 0xfff303a4936737cf, 0, + 0xffe0a38d1ee99659, 0, 0xfffa1e014df55adf, 0, 0xffdceee1c5a5e337, 0, + 0xffd014a6bd334bfc, 0, 0x0023441ed2e237db, 0, 0x000a12cac09b989c); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulsu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulsu.c new file mode 100644 index 000000000..3831748d9 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulsu.c @@ -0,0 +1,188 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x37, 0x4e, 0x9a, 0x08, 0x12, 0xfd, 0xa4, 0x21, 0x44, 0x58, 0x5a, + 0xa9, 0x1d, 0x5e, 0xd4, 0x8e); + VLOAD_8(v4, 0x60, 0x5b, 0x0e, 0x78, 0x67, 0xf4, 0xd3, 0x0f, 0x75, 0x34, 0xc3, + 0xb1, 0x62, 0x42, 0xa9, 0x75); + asm volatile("vwmulsu.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_I16(1, v6, 0x14a0, 0x1bba, 0xfa6c, 0x03c0, 0x073e, 0xfd24, 0xb42c, + 0x01ef, 0x1f14, 0x11e0, 0x448e, 0xc3d9, 0x0b1a, 0x183c, 0xe2f4, + 0xcbe6); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xba33, 0x3a22, 0x9f52, 0x0c6a, 0xcb67, 0x790c, 0x1a85, 0x958e, + 0xe967, 0x52b6, 0xa453, 0xe306, 0x3c91, 0x0309, 0xcbad, 0x9b78); + VLOAD_16(v8, 0x84ef, 0xf522, 0x6224, 0x6e02, 0xeedb, 0x5a1f, 0x98d7, 0xa498, + 0x7a66, 0xdc69, 0xd88b, 0xa611, 0x5a08, 0x6836, 0x9130, 0x85be); + asm volatile("vwmulsu.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_I32(2, v12, 0xdbc1219d, 0x37aa4284, 0xdaefcb88, 0x0555a4d4, 0xceecc31d, + 0x2a9ce074, 0x0fd53db3, 0xbb8fc450, 0xf532150a, 0x473654a6, + 0xb2744111, 0xed33f766, 0x154cde88, 0x013c4be6, 0xe2532d70, + 0xcb7abb10); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x8bc45f0a, 0x8a60bc63, 0xf3fcddb9, 0xa810b1c8, 0x8cb59934, + 0x8d3334b0, 0x4387cb58, 0xc59d7a46, 0x939dd006, 0xfbd1dfc1, + 0x75307321, 0xb46b5a27, 0xfd2fbdda, 0xec141137, 0x3a1bc8a7, + 0xf0b1eb21); + VLOAD_32(v16, 0xe193ae9a, 0x57eccb97, 0x41d9eeff, 0xe8d58ddd, 0x2057ccc2, + 0x3122b84c, 0x003bb317, 0xcdfc6918, 0xb5883636, 0x52788c1f, + 0xab90d0a1, 0x6d890387, 0xf9bf35e7, 0xf88259a4, 0x79d9e6f0, + 0x8203a804); + asm volatile("vwmulsu.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_I64(3, v24, 0x99948a03e977f804, 0xd79a13a8a4b59f65, 0xfce8fa6d09d3d947, + 0xb005c64673bca1a8, 0xf16f28a6e85f8968, 0xe9f73b3d68722440, + 0x000fbf89e1a9cce8, 0xd10589f688d22c90, 0xb324e8f72e5b2544, + 0xfea741c2f93fa45f, 0x4e89a7a8de9337c1, 0xdfa941cb4089ff91, + 0xfd4155b0080871b6, 0xeca94be43cc5263c, 0x1ba897cf74e12690, + 0xf83a217108785484); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x37, 0x4e, 0x9a, 0x08, 0x12, 0xfd, 0xa4, 0x21, 0x44, 0x58, 0x5a, + 0xa9, 0x1d, 0x5e, 0xd4, 0x8e); + VLOAD_8(v4, 0x60, 0x5b, 0x0e, 0x78, 0x67, 0xf4, 0xd3, 0x0f, 0x75, 0x34, 0xc3, + 0xb1, 0x62, 0x42, 0xa9, 0x75); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwmulsu.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_I16(4, v6, 0, 0x1bba, 0, 0x03c0, 0, 0xfd24, 0, 0x01ef, 0, 0x11e0, 0, + 0xc3d9, 0, 0x183c, 0, 0xcbe6); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xba33, 0x3a22, 0x9f52, 0x0c6a, 0xcb67, 0x790c, 0x1a85, 0x958e, + 0xe967, 0x52b6, 0xa453, 0xe306, 0x3c91, 0x0309, 0xcbad, 0x9b78); + VLOAD_16(v8, 0x84ef, 0xf522, 0x6224, 0x6e02, 0xeedb, 0x5a1f, 0x98d7, 0xa498, + 0x7a66, 0xdc69, 0xd88b, 0xa611, 0x5a08, 0x6836, 0x9130, 0x85be); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwmulsu.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_I32(5, v12, 0, 0x37aa4284, 0, 0x0555a4d4, 0, 0x2a9ce074, 0, 0xbb8fc450, + 0, 0x473654a6, 0, 0xed33f766, 0, 0x013c4be6, 0, 0xcb7abb10); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x8bc45f0a, 0x8a60bc63, 0xf3fcddb9, 0xa810b1c8, 0x8cb59934, + 0x8d3334b0, 0x4387cb58, 0xc59d7a46, 0x939dd006, 0xfbd1dfc1, + 0x75307321, 0xb46b5a27, 0xfd2fbdda, 0xec141137, 0x3a1bc8a7, + 0xf0b1eb21); + VLOAD_32(v16, 0xe193ae9a, 0x57eccb97, 0x41d9eeff, 0xe8d58ddd, 0x2057ccc2, + 0x3122b84c, 0x003bb317, 0xcdfc6918, 0xb5883636, 0x52788c1f, + 0xab90d0a1, 0x6d890387, 0xf9bf35e7, 0xf88259a4, 0x79d9e6f0, + 0x8203a804); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwmulsu.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_I64(6, v24, 0, 0xd79a13a8a4b59f65, 0, 0xb005c64673bca1a8, 0, + 0xe9f73b3d68722440, 0, 0xd10589f688d22c90, 0, 0xfea741c2f93fa45f, 0, + 0xdfa941cb4089ff91, 0, 0xeca94be43cc5263c, 0, 0xf83a217108785484); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x16, 0x39, 0x0d, 0xcb, 0x02, 0x2b, 0xcd, 0x30, 0xec, 0x03, 0x18, + 0x78, 0xec, 0xba, 0xf8, 0x49); + uint64_t scalar = 5; + asm volatile("vwmulsu.vx v6, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(7, v6, 0x006e, 0x011d, 0x0041, 0xfef7, 0x000a, 0x00d7, 0xff01, + 0x00f0, 0xff9c, 0x000f, 0x0078, 0x0258, 0xff9c, 0xfea2, 0xffd8, + 0x016d); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x602f, 0x1b08, 0xfdd6, 0x7e53, 0x59f7, 0x70f1, 0x8a33, 0x5d93, + 0x02a3, 0x9f70, 0x3919, 0x8f2b, 0xc9d3, 0x1b65, 0x15bd, 0xf8be); + scalar = 5383; + asm volatile("vwmulsu.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(8, v8, 0x07e67c49, 0x02386538, 0xffd27eda, 0x0a604345, 0x0763b8c1, + 0x0946db97, 0xf652f665, 0x07af9e05, 0x00377175, 0xf8118c10, + 0x04b09caf, 0xf6bb712d, 0xfb8cd3c5, 0x024008c3, 0x01c9192b, + 0xff676332); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x77036e08, 0xeb7bd8dc, 0x9c0d7f6a, 0x19d320f6, 0xb0c0d792, + 0x3f02203c, 0xf72a9ea9, 0x392f2986, 0x85b78fe8, 0xdc6a281b, + 0x146ffa52, 0x61f96c3c, 0x876cda10, 0x24d22032, 0xb6ffba3d, + 0xbbd29543); + scalar = 6474219; + asm volatile("vwmulsu.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(9, v16, 0x002ded2f109a4958, 0xfff8153a6c2ccdf4, 0xffd96e55a002304e, + 0x0009f73117dc67d2, 0xffe16b51302b8506, 0x00185082d93ab314, + 0xfffc975917645623, 0x001611286b315402, 0xffd0cfd1f64e41f8, + 0xfff24492094603c9, 0x0007e2fc81b92b46, 0x0025cec234e97714, + 0xffd178914242bcb0, 0x000e357b1b4ecfe6, 0xffe3d4511153daff, + 0xffe5b0d6e5269f81); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x16, 0x39, 0x0d, 0xcb, 0x02, 0x2b, 0xcd, 0x30, 0xec, 0x03, 0x18, + 0x78, 0xec, 0xba, 0xf8, 0x49); + uint64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwmulsu.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(10, v6, 0, 0x011d, 0, 0xfef7, 0, 0x00d7, 0, 0x00f0, 0, 0x000f, 0, + 0x0258, 0, 0xfea2, 0, 0x016d); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x602f, 0x1b08, 0xfdd6, 0x7e53, 0x59f7, 0x70f1, 0x8a33, 0x5d93, + 0x02a3, 0x9f70, 0x3919, 0x8f2b, 0xc9d3, 0x1b65, 0x15bd, 0xf8be); + scalar = 5383; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwmulsu.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(11, v8, 0, 0x02386538, 0, 0x0a604345, 0, 0x0946db97, 0, 0x07af9e05, + 0, 0xf8118c10, 0, 0xf6bb712d, 0, 0x024008c3, 0, 0xff676332); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x77036e08, 0xeb7bd8dc, 0x9c0d7f6a, 0x19d320f6, 0xb0c0d792, + 0x3f02203c, 0xf72a9ea9, 0x392f2986, 0x85b78fe8, 0xdc6a281b, + 0x146ffa52, 0x61f96c3c, 0x876cda10, 0x24d22032, 0xb6ffba3d, + 0xbbd29543); + scalar = 6474219; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwmulsu.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(12, v16, 0, 0xfff8153a6c2ccdf4, 0, 0x0009f73117dc67d2, 0, + 0x00185082d93ab314, 0, 0x001611286b315402, 0, 0xfff24492094603c9, 0, + 0x0025cec234e97714, 0, 0x000e357b1b4ecfe6, 0, 0xffe5b0d6e5269f81); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulu.c new file mode 100644 index 000000000..67ea9375f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulu.c @@ -0,0 +1,188 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x74, 0xfb, 0xf4, 0xe9, 0xe5, 0x4e, 0x02, 0x27, 0xe9, 0x83, 0xfe, + 0x03, 0xb2, 0xb9, 0x9a, 0x71); + VLOAD_8(v4, 0x67, 0xa9, 0x07, 0x0f, 0xe3, 0x0d, 0xce, 0x81, 0xa2, 0xa5, 0x59, + 0x18, 0x0d, 0xac, 0x80, 0x31); + asm volatile("vwmulu.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(1, v6, 0x2eac, 0xa5b3, 0x06ac, 0x0da7, 0xcb0f, 0x03f6, 0x019c, + 0x13a7, 0x9372, 0x546f, 0x584e, 0x0048, 0x090a, 0x7c4c, 0x4d00, + 0x15a1); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xcf44, 0x249f, 0x3b1d, 0xea59, 0x0c47, 0xd24b, 0xce3e, 0xdb61, + 0x3506, 0xcee2, 0x3c7e, 0xc169, 0x05fd, 0x7fe6, 0xf7db, 0xb7cd); + VLOAD_16(v8, 0xaa0b, 0x2176, 0x34bc, 0x4aa6, 0x221e, 0x9f98, 0x63f5, 0x8da7, + 0x001d, 0x18d7, 0x1dbb, 0x5f2d, 0x0783, 0xd756, 0xa08d, 0x9c49); + asm volatile("vwmulu.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(2, v12, 0x89ac0fec, 0x04c9604a, 0x0c2d4d4c, 0x4455afb6, 0x01a2de52, + 0x83197188, 0x50875b56, 0x79638947, 0x000601ae, 0x1412efce, + 0x0706760a, 0x47e7f675, 0x002cfb77, 0x6b952144, 0x9b71639f, + 0x70355575); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xd6f59ab7, 0x3b760112, 0x185928a9, 0x344f2f98, 0x07084e45, + 0x0af492c5, 0x5de6f51a, 0x76783522, 0x36835490, 0x043d016f, + 0xf583b765, 0xd8796652, 0x1bd09e8f, 0xeecf0026, 0xdb725a7d, + 0x3a4c3ab3); + VLOAD_32(v16, 0x19f9f18b, 0x801fde9f, 0xaf759e4c, 0x9206cfd4, 0x2dc70e82, + 0xb57cb666, 0xc4ab14ac, 0xbf231e21, 0xdc6caaf4, 0x5bbc4031, + 0x2021a0db, 0x4e68ad25, 0xb090da86, 0xe32bd2c2, 0xf45e06d0, + 0xa5320284); + asm volatile("vwmulu.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(3, v24, 0x15cfdbd14900485d, 0x1dc267886466462e, 0x10b022995dcd602c, + 0x1dd68d77269f51e0, 0x0141ed9cff22850a, 0x07c4420a9e36887e, + 0x48239482c2b0b578, 0x5873f004fd5ed562, 0x2ef0007a98143940, + 0x0184cd5f928d063f, 0x1ed0b72b9e520367, 0x424d80e7e10133da, + 0x132f2a19b6a8c4da, 0xd3ea6e817f5f48cc, 0xd179981358ee7390, + 0x259e854b7db9aa4c); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x74, 0xfb, 0xf4, 0xe9, 0xe5, 0x4e, 0x02, 0x27, 0xe9, 0x83, 0xfe, + 0x03, 0xb2, 0xb9, 0x9a, 0x71); + VLOAD_8(v4, 0x67, 0xa9, 0x07, 0x0f, 0xe3, 0x0d, 0xce, 0x81, 0xa2, 0xa5, 0x59, + 0x18, 0x0d, 0xac, 0x80, 0x31); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwmulu.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(4, v6, 0, 0xa5b3, 0, 0x0da7, 0, 0x03f6, 0, 0x13a7, 0, 0x546f, 0, + 0x0048, 0, 0x7c4c, 0, 0x15a1); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xcf44, 0x249f, 0x3b1d, 0xea59, 0x0c47, 0xd24b, 0xce3e, 0xdb61, + 0x3506, 0xcee2, 0x3c7e, 0xc169, 0x05fd, 0x7fe6, 0xf7db, 0xb7cd); + VLOAD_16(v8, 0xaa0b, 0x2176, 0x34bc, 0x4aa6, 0x221e, 0x9f98, 0x63f5, 0x8da7, + 0x001d, 0x18d7, 0x1dbb, 0x5f2d, 0x0783, 0xd756, 0xa08d, 0x9c49); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwmulu.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(5, v12, 0, 0x04c9604a, 0, 0x4455afb6, 0, 0x83197188, 0, 0x79638947, + 0, 0x1412efce, 0, 0x47e7f675, 0, 0x6b952144, 0, 0x70355575); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xd6f59ab7, 0x3b760112, 0x185928a9, 0x344f2f98, 0x07084e45, + 0x0af492c5, 0x5de6f51a, 0x76783522, 0x36835490, 0x043d016f, + 0xf583b765, 0xd8796652, 0x1bd09e8f, 0xeecf0026, 0xdb725a7d, + 0x3a4c3ab3); + VLOAD_32(v16, 0x19f9f18b, 0x801fde9f, 0xaf759e4c, 0x9206cfd4, 0x2dc70e82, + 0xb57cb666, 0xc4ab14ac, 0xbf231e21, 0xdc6caaf4, 0x5bbc4031, + 0x2021a0db, 0x4e68ad25, 0xb090da86, 0xe32bd2c2, 0xf45e06d0, + 0xa5320284); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwmulu.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(6, v24, 0, 0x1dc267886466462e, 0, 0x1dd68d77269f51e0, 0, + 0x07c4420a9e36887e, 0, 0x5873f004fd5ed562, 0, 0x0184cd5f928d063f, 0, + 0x424d80e7e10133da, 0, 0xd3ea6e817f5f48cc, 0, 0x259e854b7db9aa4c); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xf8, 0x11, 0x12, 0xf1, 0x63, 0x21, 0x88, 0x3b, 0x01, 0xf5, 0x6d, + 0xf5, 0xb1, 0x54, 0xcd, 0xb0); + uint64_t scalar = 5; + asm volatile("vwmulu.vx v6, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(7, v6, 0x04d8, 0x0055, 0x005a, 0x04b5, 0x01ef, 0x00a5, 0x02a8, + 0x0127, 0x0005, 0x04c9, 0x0221, 0x04c9, 0x0375, 0x01a4, 0x0401, + 0x0370); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xf8e5, 0x6a23, 0xb52f, 0x8838, 0xb6d4, 0x5279, 0xf80e, 0xa450, + 0x13ec, 0x916f, 0x8edd, 0x0162, 0x9350, 0x9f74, 0xe1e7, 0x2719); + scalar = 5383; + asm volatile("vwmulu.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(8, v8, 0x14719743, 0x08b7c5f5, 0x0ee1cf49, 0x0b305188, 0x0f0463cc, + 0x06c62e4f, 0x145fee62, 0x0d7f0e30, 0x01a2e774, 0x0bf21509, + 0x0bbc090b, 0x001d13ae, 0x0c199730, 0x0d18e02c, 0x128e2051, + 0x03361eaf); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xc41bf344, 0xad5aef4c, 0xf2b11789, 0xeb0d7526, 0xd6c67427, + 0x73724130, 0x440f954a, 0x0661455f, 0x450070ca, 0xc258c90c, + 0xf095d838, 0x358b0916, 0x6e1f1918, 0x4ebf2685, 0x3805d683, + 0x73715886); + scalar = 6474219; + asm volatile("vwmulu.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(9, v16, 0x004bad6117b0b36c, 0x0042e58b237456c4, 0x005da736d67d2bc3, + 0x005ab48c2ee15fe2, 0x0052e15f704d3ecd, 0x002c8ccba3708710, + 0x001a43a5dcd924ee, 0x00027644c9204535, 0x001aa097dd4a236e, + 0x004aff4713f2fa04, 0x005cd71f45c17368, 0x0014a974cb2f9d32, + 0x002a7ec31c6fe108, 0x001e63491da0c917, 0x00159e6c20eec541, + 0x002c8c71dad97902); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xf8, 0x11, 0x12, 0xf1, 0x63, 0x21, 0x88, 0x3b, 0x01, 0xf5, 0x6d, + 0xf5, 0xb1, 0x54, 0xcd, 0xb0); + uint64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwmulu.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(10, v6, 0, 0x0055, 0, 0x04b5, 0, 0x00a5, 0, 0x0127, 0, 0x04c9, 0, + 0x04c9, 0, 0x01a4, 0, 0x0370); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xf8e5, 0x6a23, 0xb52f, 0x8838, 0xb6d4, 0x5279, 0xf80e, 0xa450, + 0x13ec, 0x916f, 0x8edd, 0x0162, 0x9350, 0x9f74, 0xe1e7, 0x2719); + scalar = 5383; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwmulu.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(11, v8, 0, 0x08b7c5f5, 0, 0x0b305188, 0, 0x06c62e4f, 0, 0x0d7f0e30, + 0, 0x0bf21509, 0, 0x001d13ae, 0, 0x0d18e02c, 0, 0x03361eaf); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xc41bf344, 0xad5aef4c, 0xf2b11789, 0xeb0d7526, 0xd6c67427, + 0x73724130, 0x440f954a, 0x0661455f, 0x450070ca, 0xc258c90c, + 0xf095d838, 0x358b0916, 0x6e1f1918, 0x4ebf2685, 0x3805d683, + 0x73715886); + scalar = 6474219; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwmulu.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(12, v16, 0, 0x0042e58b237456c4, 0, 0x005ab48c2ee15fe2, 0, + 0x002c8ccba3708710, 0, 0x00027644c9204535, 0, 0x004aff4713f2fa04, 0, + 0x0014a974cb2f9d32, 0, 0x001e63491da0c917, 0, 0x002c8c71dad97902); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsum.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsum.c new file mode 100644 index 000000000..0c57215a1 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsum.c @@ -0,0 +1,153 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 255); + asm volatile("vwredsum.vs v4, v6, v2"); + VCMP_U16(1, v4, 327); + + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1); + asm volatile("vwredsum.vs v8, v12, v4"); + VCMP_U32(2, v8, 73); + + VSET(16, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1); + asm volatile("vwredsum.vs v16, v24, v8"); + VCMP_U64(3, v16, 73); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 255); + VLOAD_16(v4, 1); + asm volatile("vwredsum.vs v4, v6, v2, v0.t"); + VCMP_U16(4, v4, 291); + + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1); + VLOAD_32(v8, 1); + asm volatile("vwredsum.vs v8, v12, v4, v0.t"); + VCMP_U32(5, v8, 37); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1); + VLOAD_64(v16, 1); + asm volatile("vwredsum.vs v16, v24, v8, v0.t"); + VCMP_U64(6, v16, 37); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v4, v6, v2"); + VCMP_U16(7, v4, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v8, v12, v4"); + VCMP_U32(8, v8, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v16, v24, v8"); + VCMP_U64(9, v16, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(15, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v4, v6, v2"); + VCMP_U16(10, v4, 65, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(1, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v8, v12, v4"); + VCMP_U32(11, v8, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(3, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v16, v24, v8"); + VCMP_U64(12, v16, 7, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(15, e8, m1); + VLOAD_8(v0, 0x00, 0x40); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 100, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v4, v6, v2, v0.t"); + VCMP_U16(13, v4, 107, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(1, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v8, v12, v4, v0.t"); + VCMP_U32(14, v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + // VSET(3, e32, m4); + // VLOAD_8(v0, 0xaa, 0x55); + // VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + // VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + // VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + // asm volatile("vwredsum.vs v16, v24, v8, v0.t"); + // VCMP_U64(15, v16, 3, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Test difference from vwredsumu +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 255, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 255); + asm volatile("vwredsum.vs v4, v6, v2"); + VCMP_U16(16, v4, 325); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsumu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsumu.c new file mode 100644 index 000000000..421acdb6f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsumu.c @@ -0,0 +1,153 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 255); + asm volatile("vwredsumu.vs v4, v6, v2"); + VCMP_U16(1, v4, 327); + + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1); + asm volatile("vwredsumu.vs v8, v12, v4"); + VCMP_U32(2, v8, 73); + + VSET(16, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1); + asm volatile("vwredsumu.vs v16, v24, v8"); + VCMP_U64(3, v16, 73); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 255); + VLOAD_16(v4, 1); + asm volatile("vwredsumu.vs v4, v6, v2, v0.t"); + VCMP_U16(4, v4, 291); + + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1); + VLOAD_32(v8, 1); + asm volatile("vwredsumu.vs v8, v12, v4, v0.t"); + VCMP_U32(5, v8, 37); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1); + VLOAD_64(v16, 1); + asm volatile("vwredsumu.vs v16, v24, v8, v0.t"); + VCMP_U64(6, v16, 37); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v4, v6, v2"); + VCMP_U16(7, v4, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v8, v12, v4"); + VCMP_U32(8, v8, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v16, v24, v8"); + VCMP_U64(9, v16, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(15, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v4, v6, v2"); + VCMP_U16(10, v4, 65, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(1, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v8, v12, v4"); + VCMP_U32(11, v8, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(3, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v16, v24, v8"); + VCMP_U64(12, v16, 7, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(15, e8, m1); + VLOAD_8(v0, 0x00, 0x40); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 100, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v4, v6, v2, v0.t"); + VCMP_U16(13, v4, 107, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(1, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v8, v12, v4, v0.t"); + VCMP_U32(14, v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(3, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v16, v24, v8, v0.t"); + VCMP_U64(15, v16, 3, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Test difference from vwredsumu +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 255, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 255); + asm volatile("vwredsumu.vs v4, v6, v2"); + VCMP_U16(16, v4, 581); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsub.c new file mode 100644 index 000000000..200d28a4b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsub.c @@ -0,0 +1,246 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsub.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(1, v6, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsub.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(2, v12, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsub.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(3, v24, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwsub.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(4, v6, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwsub.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(5, v12, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwsub.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(6, v24, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); +} + +void TEST_CASE3(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsub.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(7, v4, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsub.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(8, v8, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsub.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(9, v16, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); +} + +void TEST_CASE4(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + VCLEAR(v5); + asm volatile("vwsub.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(10, v4, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwsub.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(11, v8, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwsub.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(12, v16, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); +} + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsub.wv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(13, v6, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsub.wv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(14, v12, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsub.wv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(15, v24, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); +} + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwsub.wv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(16, v6, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwsub.wv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(17, v12, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwsub.wv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(18, v24, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); +} + +void TEST_CASE7(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsub.wx v4, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(19, v4, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsub.wx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(20, v8, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsub.wx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(21, v16, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); +} + +void TEST_CASE8(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + VCLEAR(v5); + asm volatile("vwsub.wx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(22, v4, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwsub.wx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(23, v8, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwsub.wx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(24, v16, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsubu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsubu.c new file mode 100644 index 000000000..12e6dc22c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsubu.c @@ -0,0 +1,246 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsubu.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(1, v6, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsubu.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(2, v12, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsubu.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(3, v24, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwsubu.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(4, v6, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwsubu.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(5, v12, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwsubu.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(6, v24, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); +} + +void TEST_CASE3(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsubu.vx v6, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(7, v6, -4, 249, -2, 247, 0, 245, 2, 243, 4, 241, 6, 239, 8, 237, 10, + 235); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsubu.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(8, v8, -4, 65529, -2, 65527, 0, 65525, 2, 65523, 4, 65521, 6, 65519, + 8, 65517, 10, 65515); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsubu.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(9, v16, -4, 4294967289, -2, 4294967287, 0, 4294967285, 2, 4294967283, + 4, 4294967281, 6, 4294967279, 8, 4294967277, 10, 4294967275); +} + +void TEST_CASE4(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwsubu.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(10, v6, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwsubu.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(11, v8, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwsubu.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(12, v16, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); +} + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsubu.wv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(13, v6, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsubu.wv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(14, v12, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsubu.wv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(15, v24, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); +} + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwsubu.wv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(16, v6, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwsubu.wv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(17, v12, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwsubu.wv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(18, v24, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); +} + +void TEST_CASE7(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsubu.wx v4, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(19, v4, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsubu.wx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(20, v8, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsubu.wx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(21, v16, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); +} + +void TEST_CASE8(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + VCLEAR(v5); + asm volatile("vwsubu.wx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(22, v4, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwsubu.wx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(23, v8, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwsubu.wx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(24, v16, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vxor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vxor.c new file mode 100644 index 000000000..0c7574874 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vxor.c @@ -0,0 +1,309 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v3, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + 0xf0); + asm volatile("vxor.vv v1, v2, v3"); + VCMP_U8(1, v1, 0x0f, 0x02, 0x00, 0x0f, 0x02, 0x00, 0x0f, 0x02, 0x00, 0x0f, + 0x02, 0x00); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + 0xf0f0, 0xff00, 0x0003, 0xf0f0); + asm volatile("vxor.vv v2, v4, v6"); + VCMP_U16(2, v2, 0x00ff, 0x0002, 0x0000, 0x00ff, 0x0002, 0x0000, 0x00ff, + 0x0002, 0x0000, 0x00ff, 0x0002, 0x0000); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_32(v12, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, + 0x00000003, 0xf0f0f0f0); + asm volatile("vxor.vv v4, v8, v12"); + VCMP_U32(3, v4, 0x0000ffff, 0x00000002, 0x00000000, 0x0000ffff, 0x00000002, + 0x00000000, 0x0000ffff, 0x00000002, 0x00000000, 0x0000ffff, + 0x00000002, 0x00000000); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); + asm volatile("vxor.vv v8, v16, v24"); + VCMP_U64(4, v8, 0x00000000ffffffff, 0x0000000000000002, 0x0000000000000000, + 0x00000000ffffffff, 0x0000000000000002, 0x0000000000000000, + 0x00000000ffffffff, 0x0000000000000002, 0x0000000000000000, + 0x00000000ffffffff, 0x0000000000000002, 0x0000000000000000); +} + +void TEST_CASE2() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v3, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vxor.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, + 0xef, 0x00); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_16(v8, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + 0xf0f0, 0xff00, 0x0003, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vxor.vv v2, v4, v8, v0.t"); + VCMP_U16(6, v2, 0x00ff, 0xbeef, 0x0000, 0x00ff, 0xbeef, 0x0000, 0x00ff, + 0xbeef, 0x0000, 0x00ff, 0xbeef, 0x0000); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_32(v12, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, + 0x00000003, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vxor.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0x0000ffff, 0xdeadbeef, 0x00000000, 0x0000ffff, 0xdeadbeef, + 0x00000000, 0x0000ffff, 0xdeadbeef, 0x00000000, 0x0000ffff, + 0xdeadbeef, 0x00000000); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vxor.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0x00000000ffffffff, 0xdeadbeefdeadbeef, 0x0000000000000000, + 0x00000000ffffffff, 0xdeadbeefdeadbeef, 0x0000000000000000, + 0x00000000ffffffff, 0xdeadbeefdeadbeef, 0x0000000000000000, + 0x00000000ffffffff, 0xdeadbeefdeadbeef, 0x0000000000000000); +} + +void TEST_CASE3() { + const uint64_t scalar = 0x0ff00ff00ff00ff0; + + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + asm volatile("vxor.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0x0f, 0xf1, 0x00, 0x0f, 0xf1, 0x00, 0x0f, 0xf1, 0x00, 0x0f, + 0xf1, 0x00); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + asm volatile("vxor.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0xf00f, 0x0ff1, 0xff00, 0xf00f, 0x0ff1, 0xff00, 0xf00f, + 0x0ff1, 0xff00, 0xf00f, 0x0ff1, 0xff00); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + asm volatile("vxor.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0xf00ff00f, 0x0ff00ff1, 0xff00ff00, 0xf00ff00f, 0x0ff00ff1, + 0xff00ff00, 0xf00ff00f, 0x0ff00ff1, 0xff00ff00, 0xf00ff00f, + 0x0ff00ff1, 0xff00ff00); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + asm volatile("vxor.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0xf00ff00ff00ff00f, 0x0ff00ff00ff00ff1, 0xff00ff00ff00ff00, + 0xf00ff00ff00ff00f, 0x0ff00ff00ff00ff1, 0xff00ff00ff00ff00, + 0xf00ff00ff00ff00f, 0x0ff00ff00ff00ff1, 0xff00ff00ff00ff00, + 0xf00ff00ff00ff00f, 0x0ff00ff00ff00ff1, 0xff00ff00ff00ff00); +} + +void TEST_CASE4() { + const uint64_t scalar = 0x0ff00ff00ff00ff0; + + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vxor.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, + 0xef, 0x00); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vxor.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0xf00f, 0xbeef, 0xff00, 0xf00f, 0xbeef, 0xff00, 0xf00f, + 0xbeef, 0xff00, 0xf00f, 0xbeef, 0xff00); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vxor.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0xf00ff00f, 0xdeadbeef, 0xff00ff00, 0xf00ff00f, 0xdeadbeef, + 0xff00ff00, 0xf00ff00f, 0xdeadbeef, 0xff00ff00, 0xf00ff00f, + 0xdeadbeef, 0xff00ff00); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vxor.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0xf00ff00ff00ff00f, 0xdeadbeefdeadbeef, 0xff00ff00ff00ff00, + 0xf00ff00ff00ff00f, 0xdeadbeefdeadbeef, 0xff00ff00ff00ff00, + 0xf00ff00ff00ff00f, 0xdeadbeefdeadbeef, 0xff00ff00ff00ff00, + 0xf00ff00ff00ff00f, 0xdeadbeefdeadbeef, 0xff00ff00ff00ff00); +} + +void TEST_CASE5() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + asm volatile("vxor.vi v1, v2, 15"); + VCMP_U8(17, v1, 0xf0, 0x0e, 0xff, 0xf0, 0x0e, 0xff, 0xf0, 0x0e, 0xff, 0xf0, + 0x0e, 0xff); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + asm volatile("vxor.vi v2, v4, 15"); + VCMP_U16(18, v2, 0xfff0, 0x000e, 0xf0ff, 0xfff0, 0x000e, 0xf0ff, 0xfff0, + 0x000e, 0xf0ff, 0xfff0, 0x000e, 0xf0ff); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + asm volatile("vxor.vi v4, v8, 15"); + VCMP_U32(19, v4, 0xfffffff0, 0x0000000e, 0xf0f0f0ff, 0xfffffff0, 0x0000000e, + 0xf0f0f0ff, 0xfffffff0, 0x0000000e, 0xf0f0f0ff, 0xfffffff0, + 0x0000000e, 0xf0f0f0ff); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + asm volatile("vxor.vi v8, v16, 15"); + VCMP_U64(20, v8, 0xfffffffffffffff0, 0x000000000000000e, 0xf0f0f0f0f0f0f0ff, + 0xfffffffffffffff0, 0x000000000000000e, 0xf0f0f0f0f0f0f0ff, + 0xfffffffffffffff0, 0x000000000000000e, 0xf0f0f0f0f0f0f0ff, + 0xfffffffffffffff0, 0x000000000000000e, 0xf0f0f0f0f0f0f0ff); +} + +void TEST_CASE6() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vxor.vi v1, v2, 15, v0.t"); + VCMP_U8(21, v1, 0xf0, 0xef, 0xff, 0xf0, 0xef, 0xff, 0xf0, 0xef, 0xff, 0xf0, + 0xef, 0xff); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vxor.vi v2, v4, 15, v0.t"); + VCMP_U16(22, v2, 0xfff0, 0xbeef, 0xf0ff, 0xfff0, 0xbeef, 0xf0ff, 0xfff0, + 0xbeef, 0xf0ff, 0xfff0, 0xbeef, 0xf0ff); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vxor.vi v4, v8, 15, v0.t"); + VCMP_U32(23, v4, 0xfffffff0, 0xdeadbeef, 0xf0f0f0ff, 0xfffffff0, 0xdeadbeef, + 0xf0f0f0ff, 0xfffffff0, 0xdeadbeef, 0xf0f0f0ff, 0xfffffff0, + 0xdeadbeef, 0xf0f0f0ff); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vxor.vi v8, v16, 15, v0.t"); + VCMP_U64(24, v8, 0xfffffffffffffff0, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + 0xfffffffffffffff0, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + 0xfffffffffffffff0, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + 0xfffffffffffffff0, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vzext.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vzext.c new file mode 100644 index 000000000..0d24c220d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vzext.c @@ -0,0 +1,106 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m1); + VLOAD_8(v1, 1, 2, -3, -4, 5, 6, -7, -8); + asm volatile("vzext.vf2 v2, v1"); + VCMP_U16(1, v2, 1, 2, 253, 252, 5, 6, 249, 248); + + VSET(16, e32, m1); + VLOAD_16(v1, 1, 2, -3, -4); + asm volatile("vzext.vf2 v2, v1"); + VCMP_U32(2, v2, 1, 2, 65533, 65532); + + VSET(16, e64, m1); + VLOAD_32(v8, 1, 2); + asm volatile("vzext.vf2 v0, v8"); + VCMP_U64(3, v0, 1, 2); +} + +void TEST_CASE2(void) { + VSET(16, e16, m1); + VLOAD_8(v1, 1, 2, -3, -4, 5, 6, -7, -8); + VLOAD_8(v0, 0xAA); + VCLEAR(v2); + asm volatile("vzext.vf2 v2, v1, v0.t"); + VCMP_U16(4, v2, 0, 2, 0, 252, 0, 6, 0, 248); + + VSET(16, e32, m1); + VLOAD_16(v1, 1, 2, -3, -4); + VLOAD_8(v0, 0x0A); + VCLEAR(v2); + asm volatile("vzext.vf2 v2, v1, v0.t"); + VCMP_U32(5, v2, 0, 2, 0, 65532); + + VSET(16, e64, m1); + VLOAD_32(v1, 1, 2); + VLOAD_8(v0, 0x02); + VCLEAR(v2); + asm volatile("vzext.vf2 v2, v1, v0.t"); + VCMP_U64(6, v2, 0, 2); +} + +void TEST_CASE3(void) { + VSET(16, e32, m1); + VLOAD_8(v1, 1, 2, -3, -4); + asm volatile("vzext.vf4 v2, v1"); + VCMP_U32(7, v2, 1, 2, 253, 252); + + VSET(16, e64, m1); + VLOAD_16(v1, 1, 2); + asm volatile("vzext.vf4 v2, v1"); + VCMP_U64(8, v2, 1, 2); +} + +void TEST_CASE4(void) { + VSET(16, e32, m1); + VLOAD_8(v1, 1, 2, -3, -4); + VLOAD_8(v0, 0x0A); + VCLEAR(v2); + asm volatile("vzext.vf4 v2, v1, v0.t"); + VCMP_U32(9, v2, 0, 2, 0, 252); + + VSET(16, e64, m1); + VLOAD_16(v1, 1, 2); + VLOAD_8(v0, 0x02); + VCLEAR(v2); + asm volatile("vzext.vf4 v2, v1, v0.t"); + VCMP_U64(10, v2, 0, 2); +} + +void TEST_CASE5(void) { + VSET(16, e64, m1); + VLOAD_8(v1, 1, 2); + asm volatile("vzext.vf8 v2, v1"); + VCMP_U64(11, v2, 1, 2); +} + +void TEST_CASE6(void) { + VSET(16, e64, m1); + VLOAD_8(v1, 1, 2); + VLOAD_8(v0, 0x02); + VCLEAR(v2); + asm volatile("vzext.vf8 v2, v1, v0.t"); + VCMP_U64(12, v2, 0, 2); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +}