minor update

2025-04-24 13:57:17 -04:00 · 2020-12-30 04:09:21 -08:00 · 2020-12-30 04:09:21 -08:00 · e431162347
commit e431162347
parent d44144f72f
4 changed files with 81 additions and 85 deletions
--- a/hw/rtl/fp_cores/VX_fp_cvt.v
+++ b/hw/rtl/fp_cores/VX_fp_cvt.v
@ -32,7 +32,7 @@ module VX_fp_cvt #(
    input wire  ready_out,
    output wire valid_out
 );   
-    //! Constants
+    // Constants
 
    localparam MAN_BITS = 23;
    localparam EXP_BITS = 8;
@ -58,8 +58,7 @@ module VX_fp_cvt #(
    localparam NUM_FP_STICKY  = 2 * INT_MAN_WIDTH - MAN_BITS - 1;   // removed mantissa, 1. and R
    localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - MAX_INT_WIDTH;  // removed int and R
    
-    //*------------------------------------------------
-    //! Input processing
+    // Input processing
    
    fp_type_t [LANES-1:0] in_a_type;
      
@ -104,8 +103,7 @@ module VX_fp_cvt #(
        assign mant_is_zero[i] = ~mant_is_nonzero;
    end

-    //*------------------------------------------------
-    //! Stage0 pipeline
+    // Pipeline stage0
    
    wire                    valid_in_s0;
    wire [TAGW-1:0]         tag_in_s0;
@ -133,8 +131,7 @@ module VX_fp_cvt #(
        .data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0, renorm_shamt_s0, mant_is_zero_s0})
    );
    
-    //*------------------------------------------------
-    //! Normalization
+    // Normalization

    wire        [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant;      // normalized input mantissa    
    wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp;       // unbiased true exponent
@ -169,8 +166,7 @@ module VX_fp_cvt #(
    `IGNORE_WARNINGS_END
    end

-    //*------------------------------------------------
-    //! Stage1 pipeline
+    // Pipeline stage1
    
    wire                    valid_in_s1;
    wire [TAGW-1:0]         tag_in_s1;
@ -196,8 +192,7 @@ module VX_fp_cvt #(
        .data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, input_mant_s1, input_exp_s1, destination_exp_s1})
    );

-    //*------------------------------------------------
-    //! Casting
+    // Casting
    reg  [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp;          // after eventual adjustments

    reg  [LANES-1:0][2*INT_MAN_WIDTH:0]  preshift_mant;     // mantissa before final shift
@ -271,8 +266,7 @@ module VX_fp_cvt #(
    `IGNORE_WARNINGS_END
    end

-    //*------------------------------------------------
-    //! Rouding and classification
+    // Rouding and classification

    wire [LANES-1:0]        rounded_sign;
    wire [LANES-1:0][31:0]  rounded_abs;     // absolute value of result after rounding    
@ -302,8 +296,7 @@ module VX_fp_cvt #(
        );
    end

-    //*------------------------------------------------
-    //! Stage2 pipeline
+    // Pipeline stage2

    wire                    valid_in_s2;
    wire [TAGW-1:0]         tag_in_s2;
@ -348,8 +341,7 @@ module VX_fp_cvt #(
        assign rounded_int_res_zero[i] = (rounded_int_res[i] == 0);
    end

-    //*------------------------------------------------
-    //! FP Special case handling
+    // FP Special case handling

    wire [LANES-1:0][31:0]  fp_special_result;
    fflags_t [LANES-1:0]    fp_special_status;
@ -370,8 +362,7 @@ module VX_fp_cvt #(
                                                              : {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
    end

-    //*------------------------------------------------
-    //! INT Special case handling
+    // INT Special case handling

    reg [LANES-1:0][31:0]   int_special_result;
    fflags_t [LANES-1:0]    int_special_status;
@ -399,8 +390,7 @@ module VX_fp_cvt #(
        assign int_special_status[i] = {1'b1, 4'h0};
    end

-    //*------------------------------------------------
-    //! Result selection and Output handshake
+    // Result selection and Output handshake

    fflags_t [LANES-1:0] tmp_fflags;    
    wire [LANES-1:0][31:0] tmp_result;
--- a/hw/rtl/fp_cores/VX_fp_fma.v
+++ b/hw/rtl/fp_cores/VX_fp_fma.v
@ -34,13 +34,12 @@ module VX_fp_fma #(

    input wire  ready_out,
    output wire valid_out
-);    
-    
+);
+
    wire stall = ~ready_out && valid_out;
    wire enable = ~stall;

-    for (genvar i = 0; i < LANES; i++) begin
-
+    for (genvar i = 0; i < LANES; i++) begin       
        reg [31:0] a, b, c;

        always @(*) begin
--- a/hw/rtl/fp_cores/VX_fp_ncomp.v
+++ b/hw/rtl/fp_cores/VX_fp_ncomp.v
@ -38,27 +38,17 @@ module VX_fp_ncomp #(
                SIG_NAN     = 32'h00000100,
                QUT_NAN     = 32'h00000200;

-    reg valid_in_r;
-    reg [TAGW-1:0] tag_in_r;
-    reg [`FPU_BITS-1:0] op_type_r;
-    reg [`FRM_BITS-1:0] frm_r;
+    wire [LANES-1:0]        tmp_a_sign, tmp_b_sign;
+    wire [LANES-1:0][7:0]   tmp_a_exponent, tmp_b_exponent;
+    wire [LANES-1:0][22:0]  tmp_a_mantissa, tmp_b_mantissa;
+    fp_type_t [LANES-1:0]   tmp_a_type, tmp_b_type;
+    wire [LANES-1:0]        tmp_a_smaller, tmp_ab_equal;

-    reg [LANES-1:0][31:0]  dataa_r;
-    reg [LANES-1:0][31:0]  datab_r;
-
-    reg [LANES-1:0]       a_sign, b_sign, tmp_a_sign, tmp_b_sign;
-    reg [LANES-1:0][7:0]  a_exponent, tmp_a_exponent, tmp_b_exponent;
-    reg [LANES-1:0][22:0] a_mantissa, tmp_a_mantissa, tmp_b_mantissa;
-    fp_type_t [LANES-1:0] a_type, b_type, tmp_a_type, tmp_b_type;
-    reg [LANES-1:0] a_smaller, ab_equal, tmp_a_smaller, tmp_ab_equal;
-
-    reg [LANES-1:0][31:0] fclass_mask;  // generate a 10-bit mask for integer reg
-    reg [LANES-1:0][31:0] fminmax_res;  // result of fmin/fmax
-    reg [LANES-1:0][31:0] fsgnj_res;    // result of sign injection
-    reg [LANES-1:0][31:0] fcmp_res;     // result of comparison
-    fflags_t [LANES-1:0]  fcmp_fflags;  // comparison fflags
-
-    wire stall = ~ready_out && valid_out;
+    wire [LANES-1:0][31:0] fclass_mask;  // generate a 10-bit mask for integer reg
+    wire [LANES-1:0][31:0] fminmax_res;  // result of fmin/fmax
+    wire [LANES-1:0][31:0] fsgnj_res;    // result of sign injection
+    wire [LANES-1:0][31:0] fcmp_res;     // result of comparison
+    fflags_t [LANES-1:0]   fcmp_fflags;  // comparison fflags

    // Setup
    for (genvar i = 0; i < LANES; i++) begin
@ -86,6 +76,21 @@ module VX_fp_ncomp #(
        assign tmp_ab_equal[i]  = (dataa[i] == datab[i]) | (tmp_a_type[i].is_zero & tmp_b_type[i].is_zero);
    end  

+    // Pipeline stage0
+
+    wire                    valid_in_s0;
+    wire [TAGW-1:0]         tag_in_s0;
+    wire [`FPU_BITS-1:0]    op_type_s0;
+    wire [`FRM_BITS-1:0]    frm_s0;
+    wire [LANES-1:0][31:0]  dataa_s0, datab_s0;
+    wire [LANES-1:0]        a_sign_s0, b_sign_s0;
+    wire [LANES-1:0][7:0]   a_exponent_s0;
+    wire [LANES-1:0][22:0]  a_mantissa_s0;
+    fp_type_t [LANES-1:0]   a_type_s0, b_type_s0;
+    wire [LANES-1:0]        a_smaller_s0, ab_equal_s0;
+
+    wire stall;
+
    VX_pipe_register #(
        .DATAW  (1 + TAGW + `FPU_BITS + `FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)),
        .RESETW (1)
@ -93,27 +98,27 @@ module VX_fp_ncomp #(
        .clk      (clk),
        .reset    (reset),
        .enable   (!stall),
-        .data_in  ({valid_in,   tag_in,   op_type,   frm,   dataa,   datab,   tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
-        .data_out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r, a_sign,     b_sign,     a_exponent,     a_mantissa,     a_type,     b_type,     a_smaller,     ab_equal})
+        .data_in  ({valid_in,    tag_in,    op_type,    frm,    dataa,    datab,    tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
+        .data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0,  b_sign_s0,  a_exponent_s0,  a_mantissa_s0,  a_type_s0,  b_type_s0,  a_smaller_s0,  ab_equal_s0})
    ); 

    // FCLASS
    for (genvar i = 0; i < LANES; i++) begin
        always @(*) begin 
-            if (a_type[i].is_normal) begin
-                fclass_mask[i] = a_sign[i] ? NEG_NORM : POS_NORM;
+            if (a_type_s0[i].is_normal) begin
+                fclass_mask[i] = a_sign_s0[i] ? NEG_NORM : POS_NORM;
            end 
-            else if (a_type[i].is_inf) begin
-                fclass_mask[i] = a_sign[i] ? NEG_INF : POS_INF;
+            else if (a_type_s0[i].is_inf) begin
+                fclass_mask[i] = a_sign_s0[i] ? NEG_INF : POS_INF;
            end 
-            else if (a_type[i].is_zero) begin
-                fclass_mask[i] = a_sign[i] ? NEG_ZERO : POS_ZERO;
+            else if (a_type_s0[i].is_zero) begin
+                fclass_mask[i] = a_sign_s0[i] ? NEG_ZERO : POS_ZERO;
            end 
-            else if (a_type[i].is_subnormal) begin
-                fclass_mask[i] = a_sign[i] ? NEG_SUBNORM : POS_SUBNORM;
+            else if (a_type_s0[i].is_subnormal) begin
+                fclass_mask[i] = a_sign_s0[i] ? NEG_SUBNORM : POS_SUBNORM;
            end 
-            else if (a_type[i].is_nan) begin
-                fclass_mask[i] = {22'h0, a_type[i].is_quiet, a_type[i].is_signaling, 8'h0};
+            else if (a_type_s0[i].is_nan) begin
+                fclass_mask[i] = {22'h0, a_type_s0[i].is_quiet, a_type_s0[i].is_signaling, 8'h0};
            end 
            else begin                     
                fclass_mask[i] = QUT_NAN;
@ -124,16 +129,16 @@ module VX_fp_ncomp #(
    // Min/Max
    for (genvar i = 0; i < LANES; i++) begin
        always @(*) begin
-            if (a_type[i].is_nan && b_type[i].is_nan)
+            if (a_type_s0[i].is_nan && b_type_s0[i].is_nan)
                fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
-            else if (a_type[i].is_nan) 
-                fminmax_res[i] = datab_r[i];
-            else if (b_type[i].is_nan) 
-                fminmax_res[i] = dataa_r[i];
+            else if (a_type_s0[i].is_nan) 
+                fminmax_res[i] = datab_s0[i];
+            else if (b_type_s0[i].is_nan) 
+                fminmax_res[i] = dataa_s0[i];
            else begin 
-                case (frm_r) // use LSB to distinguish MIN and MAX
-                    3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
-                    4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
+                case (frm_s0) // use LSB to distinguish MIN and MAX
+                    3: fminmax_res[i] = a_smaller_s0[i] ? dataa_s0[i] : datab_s0[i];
+                    4: fminmax_res[i] = a_smaller_s0[i] ? datab_s0[i] : dataa_s0[i];
              default: fminmax_res[i] = 'x;  // don't care value
                endcase
            end
@ -143,10 +148,10 @@ module VX_fp_ncomp #(
    // Sign injection
    for (genvar i = 0; i < LANES; i++) begin
        always @(*) begin
-            case (frm_r)
-                0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
-                1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
-                2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
+            case (frm_s0)
+                0: fsgnj_res[i] = { b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]};
+                1: fsgnj_res[i] = {~b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]};
+                2: fsgnj_res[i] = { a_sign_s0[i] ^ b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]};
          default: fsgnj_res[i] = 'x;  // don't care value
            endcase
        end
@ -155,32 +160,32 @@ module VX_fp_ncomp #(
    // Comparison    
    for (genvar i = 0; i < LANES; i++) begin
        always @(*) begin
-            case (frm_r)
+            case (frm_s0)
                `FRM_RNE: begin
                    fcmp_fflags[i] = 5'h0;
-                    if (a_type[i].is_nan || b_type[i].is_nan) begin
+                    if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
                        fcmp_res[i]       = 32'h0;
                        fcmp_fflags[i].NV = 1'b1;
                    end else begin
-                        fcmp_res[i] = {31'h0, (a_smaller[i] | ab_equal[i])};
+                        fcmp_res[i] = {31'h0, (a_smaller_s0[i] | ab_equal_s0[i])};
                    end
                end
                `FRM_RTZ: begin
                    fcmp_fflags[i] = 5'h0;
-                    if (a_type[i].is_nan || b_type[i].is_nan) begin
+                    if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
                        fcmp_res[i]       = 32'h0;
                        fcmp_fflags[i].NV = 1'b1;
                    end else begin
-                        fcmp_res[i] = {31'h0, (a_smaller[i] & ~ab_equal[i])};
+                        fcmp_res[i] = {31'h0, (a_smaller_s0[i] & ~ab_equal_s0[i])};
                    end                    
                end
                `FRM_RDN: begin
                    fcmp_fflags[i] = 5'h0;
-                    if (a_type[i].is_nan || b_type[i].is_nan) begin
+                    if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
                        fcmp_res[i]       = 32'h0;
-                        fcmp_fflags[i].NV = a_type[i].is_signaling | b_type[i].is_signaling; 
+                        fcmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling; 
                    end else begin
-                        fcmp_res[i] = {31'h0, ab_equal[i]};
+                        fcmp_res[i] = {31'h0, ab_equal_s0[i]};
                    end
                end
                default: begin
@ -198,7 +203,7 @@ module VX_fp_ncomp #(

    for (genvar i = 0; i < LANES; i++) begin
        always @(*) begin
-            case (op_type_r)
+            case (op_type_s0)
                `FPU_CLASS: begin
                    tmp_result[i] = fclass_mask[i];
                    tmp_fflags[i] = 'x;
@ -209,7 +214,7 @@ module VX_fp_ncomp #(
                end      
                //`FPU_MISC:
                default: begin
-                    case (frm_r)
+                    case (frm_s0)
                        0,1,2: begin
                            tmp_result[i] = fsgnj_res[i];
                            tmp_fflags[i] = 'x;
@ -217,7 +222,7 @@ module VX_fp_ncomp #(
                        3,4: begin
                            tmp_result[i] = fminmax_res[i];
                            tmp_fflags[i] = 0;
-                            tmp_fflags[i].NV = a_type[i].is_signaling | b_type[i].is_signaling;
+                            tmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling;
                        end
                        //5,6,7: 
                        default: begin
@ -230,8 +235,10 @@ module VX_fp_ncomp #(
        end
    end

-    wire tmp_has_fflags = ((op_type_r == `FPU_MISC) && (frm == 3 || frm == 4)) // MIN/MAX 
-                       || (op_type_r == `FPU_CMP); // CMP
+    wire tmp_has_fflags = ((op_type_s0 == `FPU_MISC) && (frm == 3 || frm == 4)) // MIN/MAX 
+                       || (op_type_s0 == `FPU_CMP); // CMP
+
+    assign stall = ~ready_out && valid_out;

    VX_pipe_register #(
        .DATAW  (1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)),
@ -240,8 +247,8 @@ module VX_fp_ncomp #(
        .clk      (clk),
        .reset    (reset),
        .enable   (!stall),
-        .data_in  ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}),
-        .data_out ({valid_out,  tag_out,  result,     has_fflags,     fflags})
+        .data_in  ({valid_in_s0, tag_in_s0, tmp_result, tmp_has_fflags, tmp_fflags}),
+        .data_out ({valid_out,   tag_out,   result,     has_fflags,     fflags})
    );

    assign ready_in = ~stall;
--- a/hw/rtl/fp_cores/VX_fp_rounding.v
+++ b/hw/rtl/fp_cores/VX_fp_rounding.v
@ -34,7 +34,7 @@ module VX_fp_rounding #(
            `FRM_RNE: // Decide accoring to round/sticky bits
                case (round_sticky_bits_i)
                      2'b00, 
-                      2'b01: round_up = 1'b0;     // < ulp/2 away, round down
+                      2'b01: round_up = 1'b0;            // < ulp/2 away, round down
                      2'b10: round_up = abs_value_i[0];  // = ulp/2 away, round towards even result
                      2'b11: round_up = 1'b1;            // > ulp/2 away, round up
                    default: round_up = 1'bx;