minor update

This commit is contained in:
Blaise Tine 2020-12-30 04:09:21 -08:00
parent d44144f72f
commit e431162347
4 changed files with 81 additions and 85 deletions

View file

@ -32,7 +32,7 @@ module VX_fp_cvt #(
input wire ready_out,
output wire valid_out
);
//! Constants
// Constants
localparam MAN_BITS = 23;
localparam EXP_BITS = 8;
@ -58,8 +58,7 @@ module VX_fp_cvt #(
localparam NUM_FP_STICKY = 2 * INT_MAN_WIDTH - MAN_BITS - 1; // removed mantissa, 1. and R
localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - MAX_INT_WIDTH; // removed int and R
//*------------------------------------------------
//! Input processing
// Input processing
fp_type_t [LANES-1:0] in_a_type;
@ -104,8 +103,7 @@ module VX_fp_cvt #(
assign mant_is_zero[i] = ~mant_is_nonzero;
end
//*------------------------------------------------
//! Stage0 pipeline
// Pipeline stage0
wire valid_in_s0;
wire [TAGW-1:0] tag_in_s0;
@ -133,8 +131,7 @@ module VX_fp_cvt #(
.data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0, renorm_shamt_s0, mant_is_zero_s0})
);
//*------------------------------------------------
//! Normalization
// Normalization
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
@ -169,8 +166,7 @@ module VX_fp_cvt #(
`IGNORE_WARNINGS_END
end
//*------------------------------------------------
//! Stage1 pipeline
// Pipeline stage1
wire valid_in_s1;
wire [TAGW-1:0] tag_in_s1;
@ -196,8 +192,7 @@ module VX_fp_cvt #(
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, input_mant_s1, input_exp_s1, destination_exp_s1})
);
//*------------------------------------------------
//! Casting
// Casting
reg [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
reg [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
@ -271,8 +266,7 @@ module VX_fp_cvt #(
`IGNORE_WARNINGS_END
end
//*------------------------------------------------
//! Rouding and classification
// Rouding and classification
wire [LANES-1:0] rounded_sign;
wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding
@ -302,8 +296,7 @@ module VX_fp_cvt #(
);
end
//*------------------------------------------------
//! Stage2 pipeline
// Pipeline stage2
wire valid_in_s2;
wire [TAGW-1:0] tag_in_s2;
@ -348,8 +341,7 @@ module VX_fp_cvt #(
assign rounded_int_res_zero[i] = (rounded_int_res[i] == 0);
end
//*------------------------------------------------
//! FP Special case handling
// FP Special case handling
wire [LANES-1:0][31:0] fp_special_result;
fflags_t [LANES-1:0] fp_special_status;
@ -370,8 +362,7 @@ module VX_fp_cvt #(
: {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
end
//*------------------------------------------------
//! INT Special case handling
// INT Special case handling
reg [LANES-1:0][31:0] int_special_result;
fflags_t [LANES-1:0] int_special_status;
@ -399,8 +390,7 @@ module VX_fp_cvt #(
assign int_special_status[i] = {1'b1, 4'h0};
end
//*------------------------------------------------
//! Result selection and Output handshake
// Result selection and Output handshake
fflags_t [LANES-1:0] tmp_fflags;
wire [LANES-1:0][31:0] tmp_result;

View file

@ -34,13 +34,12 @@ module VX_fp_fma #(
input wire ready_out,
output wire valid_out
);
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < LANES; i++) begin
for (genvar i = 0; i < LANES; i++) begin
reg [31:0] a, b, c;
always @(*) begin

View file

@ -38,27 +38,17 @@ module VX_fp_ncomp #(
SIG_NAN = 32'h00000100,
QUT_NAN = 32'h00000200;
reg valid_in_r;
reg [TAGW-1:0] tag_in_r;
reg [`FPU_BITS-1:0] op_type_r;
reg [`FRM_BITS-1:0] frm_r;
wire [LANES-1:0] tmp_a_sign, tmp_b_sign;
wire [LANES-1:0][7:0] tmp_a_exponent, tmp_b_exponent;
wire [LANES-1:0][22:0] tmp_a_mantissa, tmp_b_mantissa;
fp_type_t [LANES-1:0] tmp_a_type, tmp_b_type;
wire [LANES-1:0] tmp_a_smaller, tmp_ab_equal;
reg [LANES-1:0][31:0] dataa_r;
reg [LANES-1:0][31:0] datab_r;
reg [LANES-1:0] a_sign, b_sign, tmp_a_sign, tmp_b_sign;
reg [LANES-1:0][7:0] a_exponent, tmp_a_exponent, tmp_b_exponent;
reg [LANES-1:0][22:0] a_mantissa, tmp_a_mantissa, tmp_b_mantissa;
fp_type_t [LANES-1:0] a_type, b_type, tmp_a_type, tmp_b_type;
reg [LANES-1:0] a_smaller, ab_equal, tmp_a_smaller, tmp_ab_equal;
reg [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
reg [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
reg [LANES-1:0][31:0] fsgnj_res; // result of sign injection
reg [LANES-1:0][31:0] fcmp_res; // result of comparison
fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags
wire stall = ~ready_out && valid_out;
wire [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
wire [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
wire [LANES-1:0][31:0] fsgnj_res; // result of sign injection
wire [LANES-1:0][31:0] fcmp_res; // result of comparison
fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags
// Setup
for (genvar i = 0; i < LANES; i++) begin
@ -86,6 +76,21 @@ module VX_fp_ncomp #(
assign tmp_ab_equal[i] = (dataa[i] == datab[i]) | (tmp_a_type[i].is_zero & tmp_b_type[i].is_zero);
end
// Pipeline stage0
wire valid_in_s0;
wire [TAGW-1:0] tag_in_s0;
wire [`FPU_BITS-1:0] op_type_s0;
wire [`FRM_BITS-1:0] frm_s0;
wire [LANES-1:0][31:0] dataa_s0, datab_s0;
wire [LANES-1:0] a_sign_s0, b_sign_s0;
wire [LANES-1:0][7:0] a_exponent_s0;
wire [LANES-1:0][22:0] a_mantissa_s0;
fp_type_t [LANES-1:0] a_type_s0, b_type_s0;
wire [LANES-1:0] a_smaller_s0, ab_equal_s0;
wire stall;
VX_pipe_register #(
.DATAW (1 + TAGW + `FPU_BITS + `FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)),
.RESETW (1)
@ -93,27 +98,27 @@ module VX_fp_ncomp #(
.clk (clk),
.reset (reset),
.enable (!stall),
.data_in ({valid_in, tag_in, op_type, frm, dataa, datab, tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
.data_out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r, a_sign, b_sign, a_exponent, a_mantissa, a_type, b_type, a_smaller, ab_equal})
.data_in ({valid_in, tag_in, op_type, frm, dataa, datab, tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
.data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_type_s0, b_type_s0, a_smaller_s0, ab_equal_s0})
);
// FCLASS
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
if (a_type[i].is_normal) begin
fclass_mask[i] = a_sign[i] ? NEG_NORM : POS_NORM;
if (a_type_s0[i].is_normal) begin
fclass_mask[i] = a_sign_s0[i] ? NEG_NORM : POS_NORM;
end
else if (a_type[i].is_inf) begin
fclass_mask[i] = a_sign[i] ? NEG_INF : POS_INF;
else if (a_type_s0[i].is_inf) begin
fclass_mask[i] = a_sign_s0[i] ? NEG_INF : POS_INF;
end
else if (a_type[i].is_zero) begin
fclass_mask[i] = a_sign[i] ? NEG_ZERO : POS_ZERO;
else if (a_type_s0[i].is_zero) begin
fclass_mask[i] = a_sign_s0[i] ? NEG_ZERO : POS_ZERO;
end
else if (a_type[i].is_subnormal) begin
fclass_mask[i] = a_sign[i] ? NEG_SUBNORM : POS_SUBNORM;
else if (a_type_s0[i].is_subnormal) begin
fclass_mask[i] = a_sign_s0[i] ? NEG_SUBNORM : POS_SUBNORM;
end
else if (a_type[i].is_nan) begin
fclass_mask[i] = {22'h0, a_type[i].is_quiet, a_type[i].is_signaling, 8'h0};
else if (a_type_s0[i].is_nan) begin
fclass_mask[i] = {22'h0, a_type_s0[i].is_quiet, a_type_s0[i].is_signaling, 8'h0};
end
else begin
fclass_mask[i] = QUT_NAN;
@ -124,16 +129,16 @@ module VX_fp_ncomp #(
// Min/Max
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
if (a_type[i].is_nan && b_type[i].is_nan)
if (a_type_s0[i].is_nan && b_type_s0[i].is_nan)
fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
else if (a_type[i].is_nan)
fminmax_res[i] = datab_r[i];
else if (b_type[i].is_nan)
fminmax_res[i] = dataa_r[i];
else if (a_type_s0[i].is_nan)
fminmax_res[i] = datab_s0[i];
else if (b_type_s0[i].is_nan)
fminmax_res[i] = dataa_s0[i];
else begin
case (frm_r) // use LSB to distinguish MIN and MAX
3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
case (frm_s0) // use LSB to distinguish MIN and MAX
3: fminmax_res[i] = a_smaller_s0[i] ? dataa_s0[i] : datab_s0[i];
4: fminmax_res[i] = a_smaller_s0[i] ? datab_s0[i] : dataa_s0[i];
default: fminmax_res[i] = 'x; // don't care value
endcase
end
@ -143,10 +148,10 @@ module VX_fp_ncomp #(
// Sign injection
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (frm_r)
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
case (frm_s0)
0: fsgnj_res[i] = { b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]};
1: fsgnj_res[i] = {~b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]};
2: fsgnj_res[i] = { a_sign_s0[i] ^ b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]};
default: fsgnj_res[i] = 'x; // don't care value
endcase
end
@ -155,32 +160,32 @@ module VX_fp_ncomp #(
// Comparison
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (frm_r)
case (frm_s0)
`FRM_RNE: begin
fcmp_fflags[i] = 5'h0;
if (a_type[i].is_nan || b_type[i].is_nan) begin
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
fcmp_res[i] = 32'h0;
fcmp_fflags[i].NV = 1'b1;
end else begin
fcmp_res[i] = {31'h0, (a_smaller[i] | ab_equal[i])};
fcmp_res[i] = {31'h0, (a_smaller_s0[i] | ab_equal_s0[i])};
end
end
`FRM_RTZ: begin
fcmp_fflags[i] = 5'h0;
if (a_type[i].is_nan || b_type[i].is_nan) begin
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
fcmp_res[i] = 32'h0;
fcmp_fflags[i].NV = 1'b1;
end else begin
fcmp_res[i] = {31'h0, (a_smaller[i] & ~ab_equal[i])};
fcmp_res[i] = {31'h0, (a_smaller_s0[i] & ~ab_equal_s0[i])};
end
end
`FRM_RDN: begin
fcmp_fflags[i] = 5'h0;
if (a_type[i].is_nan || b_type[i].is_nan) begin
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
fcmp_res[i] = 32'h0;
fcmp_fflags[i].NV = a_type[i].is_signaling | b_type[i].is_signaling;
fcmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling;
end else begin
fcmp_res[i] = {31'h0, ab_equal[i]};
fcmp_res[i] = {31'h0, ab_equal_s0[i]};
end
end
default: begin
@ -198,7 +203,7 @@ module VX_fp_ncomp #(
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (op_type_r)
case (op_type_s0)
`FPU_CLASS: begin
tmp_result[i] = fclass_mask[i];
tmp_fflags[i] = 'x;
@ -209,7 +214,7 @@ module VX_fp_ncomp #(
end
//`FPU_MISC:
default: begin
case (frm_r)
case (frm_s0)
0,1,2: begin
tmp_result[i] = fsgnj_res[i];
tmp_fflags[i] = 'x;
@ -217,7 +222,7 @@ module VX_fp_ncomp #(
3,4: begin
tmp_result[i] = fminmax_res[i];
tmp_fflags[i] = 0;
tmp_fflags[i].NV = a_type[i].is_signaling | b_type[i].is_signaling;
tmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling;
end
//5,6,7:
default: begin
@ -230,8 +235,10 @@ module VX_fp_ncomp #(
end
end
wire tmp_has_fflags = ((op_type_r == `FPU_MISC) && (frm == 3 || frm == 4)) // MIN/MAX
|| (op_type_r == `FPU_CMP); // CMP
wire tmp_has_fflags = ((op_type_s0 == `FPU_MISC) && (frm == 3 || frm == 4)) // MIN/MAX
|| (op_type_s0 == `FPU_CMP); // CMP
assign stall = ~ready_out && valid_out;
VX_pipe_register #(
.DATAW (1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)),
@ -240,8 +247,8 @@ module VX_fp_ncomp #(
.clk (clk),
.reset (reset),
.enable (!stall),
.data_in ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}),
.data_out ({valid_out, tag_out, result, has_fflags, fflags})
.data_in ({valid_in_s0, tag_in_s0, tmp_result, tmp_has_fflags, tmp_fflags}),
.data_out ({valid_out, tag_out, result, has_fflags, fflags})
);
assign ready_in = ~stall;

View file

@ -34,7 +34,7 @@ module VX_fp_rounding #(
`FRM_RNE: // Decide accoring to round/sticky bits
case (round_sticky_bits_i)
2'b00,
2'b01: round_up = 1'b0; // < ulp/2 away, round down
2'b01: round_up = 1'b0; // < ulp/2 away, round down
2'b10: round_up = abs_value_i[0]; // = ulp/2 away, round towards even result
2'b11: round_up = 1'b1; // > ulp/2 away, round up
default: round_up = 1'bx;