diff --git a/src/fpnew_cast_multi.sv b/src/fpnew_cast_multi.sv index 964ef742..7abe3304 100644 --- a/src/fpnew_cast_multi.sv +++ b/src/fpnew_cast_multi.sv @@ -443,7 +443,11 @@ module fpnew_cast_multi #( // By default right shift mantissa to be an integer denorm_shamt = unsigned'(MAX_INT_WIDTH - 1 - input_exp_q); // overflow: when converting to unsigned the range is larger by one - if (input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) begin + if ((input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) // Exponent larger than max int range, + && !(!op_mod_q2 // unless cast to signed int + && input_sign_q // and input value is larges negative int value + && (input_exp_q == signed'(fpnew_pkg::int_width(int_fmt_q2) - 1)) + && (input_mant_q == {1'b1, {INT_MAN_WIDTH-1{1'b0}}}))) begin denorm_shamt = '0; // prevent shifting of_before_round = 1'b1; // underflow diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index a8b00495..56a2f5d6 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -207,7 +207,7 @@ module fpnew_divsqrt_multi #( // Valid synch with other lanes // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes // As soon as all the lanes are over, we can clear this FF and start with a new operation - `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); + `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni) // Tell the other units that this unit has finished now or in the past assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index 051e6a69..6fdd8905 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -613,7 +613,9 @@ module fpnew_fma #( ); // Classification after rounding - assign uf_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // exponent = 0 + assign uf_after_round = (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) // denormal + || ((pre_round_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) && (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == 1) && + ((round_sticky_bits != 2'b11) || (!sum_sticky_bits[MAN_BITS*2 + 4] && ((rnd_mode_i == fpnew_pkg::RNE) || (rnd_mode_i == fpnew_pkg::RMM))))); assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones // ----------------- diff --git a/src/fpnew_fma_multi.sv b/src/fpnew_fma_multi.sv index e691f677..471d966f 100644 --- a/src/fpnew_fma_multi.sv +++ b/src/fpnew_fma_multi.sv @@ -745,8 +745,10 @@ module fpnew_fma_multi #( if (FpFmtConfig[fmt]) begin : active_format always_comb begin : post_process - // detect of / uf - fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal + // detect of / uf + fmt_uf_after_round[fmt] = (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) // denormal + || ((pre_round_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) && (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == 1) && + ((round_sticky_bits != 2'b11) || (!sum_sticky_bits[MAN_BITS*2 + 4] && ((rnd_mode_i == fpnew_pkg::RNE) || (rnd_mode_i == fpnew_pkg::RMM))))); fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp. // Assemble regular result, nan box short ones. diff --git a/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v b/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v index 87139a25..d22e85ba 100644 --- a/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v +++ b/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v @@ -222,7 +222,7 @@ end assign ex4_rst_norm[31:0] = {fdsu_ex4_result_sign, ex4_expnt_rst[7:0], ex4_frac_23[22:0]}; -assign ex4_cor_uf = (fdsu_ex4_uf && !ex4_denorm_potnt_norm || ex4_uf_plus) +assign ex4_cor_uf = (fdsu_ex4_uf || ex4_denorm_potnt_norm || ex4_uf_plus) && fdsu_ex4_nx; assign ex4_cor_nx = fdsu_ex4_nx || fdsu_ex4_of diff --git a/vendor/patches/opene906/0001-fdsu.pack-Correct-Underflow-logic.patch b/vendor/patches/opene906/0001-fdsu.pack-Correct-Underflow-logic.patch new file mode 100644 index 00000000..f42f3f1f --- /dev/null +++ b/vendor/patches/opene906/0001-fdsu.pack-Correct-Underflow-logic.patch @@ -0,0 +1,30 @@ +From e441ef74e80c7efe93ccacd60a03cf75e8167394 Mon Sep 17 00:00:00 2001 +From: Greg Davill +Date: Tue, 11 Jul 2023 15:10:57 +0930 +Subject: [PATCH] fdsu.pack: Correct Underflow logic + +Handle correct behavior when executing DIV instruction. +Flag underflow if result with unbounded exponent would lie between ++/-b^(emin). Even if rounded result is exactly [+/-]01.000000. +Use ex4_denorm_potnt_norm, a flag that is set when a denormal result +rounds to a normal result. +--- + E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v b/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v +index 87139a2..d22e85b 100644 +--- a/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v ++++ b/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v +@@ -222,7 +222,7 @@ end + assign ex4_rst_norm[31:0] = {fdsu_ex4_result_sign, + ex4_expnt_rst[7:0], + ex4_frac_23[22:0]}; +-assign ex4_cor_uf = (fdsu_ex4_uf && !ex4_denorm_potnt_norm || ex4_uf_plus) ++assign ex4_cor_uf = (fdsu_ex4_uf || ex4_denorm_potnt_norm || ex4_uf_plus) + && fdsu_ex4_nx; + assign ex4_cor_nx = fdsu_ex4_nx + || fdsu_ex4_of +-- +2.38.0.windows.1 +