mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
relaxing fcvt critical path
This commit is contained in:
parent
2372067817
commit
d7bce5ab45
1 changed files with 165 additions and 150 deletions
|
@ -86,22 +86,6 @@ module VX_fp_cvt #(
|
|||
assign input_sign[i] = is_itof ? int_sign : fmt_sign;
|
||||
end
|
||||
|
||||
wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt; // renormalization shift amount
|
||||
wire [LANES-1:0] mant_is_zero; // for integer zeroes
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire mant_is_nonzero;
|
||||
VX_lzc #(
|
||||
.WIDTH (INT_MAN_WIDTH),
|
||||
.MODE (1)
|
||||
) lzc (
|
||||
.in_i (encoded_mant[i]),
|
||||
.cnt_o (renorm_shamt[i]),
|
||||
.valid_o (mant_is_nonzero)
|
||||
);
|
||||
assign mant_is_zero[i] = ~mant_is_nonzero;
|
||||
end
|
||||
|
||||
// Pipeline stage0
|
||||
|
||||
wire valid_in_s0;
|
||||
|
@ -112,29 +96,42 @@ module VX_fp_cvt #(
|
|||
fp_type_t [LANES-1:0] in_a_type_s0;
|
||||
wire [LANES-1:0] input_sign_s0;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
|
||||
wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt_s0;
|
||||
wire [LANES-1:0] mant_is_zero_s0;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
|
||||
|
||||
wire stall;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 +INT_EXP_WIDTH + INT_MAN_WIDTH + LZC_RESULT_WIDTH + 1)),
|
||||
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in, tag_in, is_itof, !is_signed, frm, in_a_type, input_sign, fmt_exponent, encoded_mant, renorm_shamt, mant_is_zero}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0, renorm_shamt_s0, mant_is_zero_s0})
|
||||
.data_in ({valid_in, tag_in, is_itof, !is_signed, frm, in_a_type, input_sign, fmt_exponent, encoded_mant}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0})
|
||||
);
|
||||
|
||||
// Normalization
|
||||
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
|
||||
wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt_s0; // renormalization shift amount
|
||||
wire [LANES-1:0] mant_is_zero_s0; // for integer zeroes
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire mant_is_nonzero;
|
||||
VX_lzc #(
|
||||
.WIDTH (INT_MAN_WIDTH),
|
||||
.MODE (1)
|
||||
) lzc (
|
||||
.in_i (encoded_mant_s0[i]),
|
||||
.cnt_o (renorm_shamt_s0[i]),
|
||||
.valid_o (mant_is_nonzero)
|
||||
);
|
||||
assign mant_is_zero_s0[i] = ~mant_is_nonzero;
|
||||
end
|
||||
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s0; // normalized input mantissa
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s0; // unbiased true exponent
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Input mantissa needs to be normalized
|
||||
|
@ -142,7 +139,7 @@ module VX_fp_cvt #(
|
|||
wire [INT_EXP_WIDTH-1:0] int_input_exp;
|
||||
|
||||
// Realign input mantissa, append zeroes if destination is wider
|
||||
assign input_mant[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
|
||||
assign input_mant_s0[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
|
||||
|
||||
// Unbias exponent and compensate for shift
|
||||
assign fp_input_exp = fmt_exponent_s0[i] +
|
||||
|
@ -152,96 +149,118 @@ module VX_fp_cvt #(
|
|||
|
||||
assign int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
|
||||
|
||||
assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
|
||||
|
||||
// Rebias the exponent
|
||||
assign destination_exp[i] = input_exp[i] + EXP_BIAS;
|
||||
assign input_exp_s0[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
|
||||
// Perform adjustments to mantissa and exponent
|
||||
|
||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s0;
|
||||
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s0;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s0;
|
||||
wire [LANES-1:0] of_before_round_s0;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
|
||||
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
|
||||
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
|
||||
reg of_before_round;
|
||||
|
||||
always @(*) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Default assignment
|
||||
final_exp = destination_exp[i]; // take exponent as is, only look at lower bits
|
||||
preshift_mant = {input_mant[i], 33'b0}; // Place mantissa to the left of the shifter
|
||||
denorm_shamt = 0; // right of mantissa
|
||||
of_before_round = 1'b0;
|
||||
|
||||
// Handle INT casts
|
||||
if (is_itof_s0) begin
|
||||
if ($signed(destination_exp[i]) >= $signed(2**EXP_BITS-1)) begin
|
||||
// Overflow or infinities (for proper rounding)
|
||||
final_exp = (2**EXP_BITS-2); // largest normal value
|
||||
preshift_mant = ~0; // largest normal value and RS bits set
|
||||
of_before_round = 1'b1;
|
||||
end else if ($signed(destination_exp[i]) < $signed(-MAN_BITS)) begin
|
||||
// Limit the shift to retain sticky bits
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
|
||||
end else if ($signed(destination_exp[i]) < $signed(1)) begin
|
||||
// Denormalize underflowing values
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + 1 - destination_exp[i]; // adjust right shifting
|
||||
end
|
||||
end else begin
|
||||
if ($signed(input_exp[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s0)) begin
|
||||
// overflow: when converting to unsigned the range is larger by one
|
||||
denorm_shamt = SHAMT_BITS'(0); // prevent shifting
|
||||
of_before_round = 1'b1;
|
||||
end else if ($signed(input_exp[i]) < $signed(-1)) begin
|
||||
// underflow
|
||||
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
|
||||
end else begin
|
||||
// By default right shift mantissa to be an integer
|
||||
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp[i];
|
||||
end
|
||||
end
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
|
||||
assign preshift_mant_s0[i] = preshift_mant;
|
||||
assign denorm_shamt_s0[i] = denorm_shamt;
|
||||
assign final_exp_s0[i] = final_exp;
|
||||
assign of_before_round_s0[i] = of_before_round;
|
||||
end
|
||||
|
||||
// Pipeline stage1
|
||||
|
||||
|
||||
wire valid_in_s1;
|
||||
wire [TAGW-1:0] tag_in_s1;
|
||||
wire is_itof_s1;
|
||||
wire unsigned_s1;
|
||||
wire [2:0] rnd_mode_s1;
|
||||
fp_type_t [LANES-1:0] in_a_type_s1;
|
||||
fp_type_t [LANES-1:0] in_a_type_s1;
|
||||
wire [LANES-1:0] input_sign_s1;
|
||||
wire [LANES-1:0] mant_is_zero_s1;
|
||||
wire [LANES-1:0] input_sign_s1;
|
||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s1;
|
||||
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s1;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
|
||||
wire [LANES-1:0] of_before_round_s1;
|
||||
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s1;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + SHAMT_BITS + INT_EXP_WIDTH + 1)),
|
||||
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, preshift_mant_s0, denorm_shamt_s0, final_exp_s0, of_before_round_s0}),
|
||||
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, preshift_mant_s1, denorm_shamt_s1, final_exp_s1, of_before_round_s1})
|
||||
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, mant_is_zero_s0, input_mant_s0, input_exp_s0}),
|
||||
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, input_sign_s1, mant_is_zero_s1, input_mant_s1, input_exp_s1})
|
||||
);
|
||||
|
||||
// Perform adjustments to mantissa and exponent
|
||||
|
||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant_s1;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
|
||||
wire [LANES-1:0] of_before_round_s1;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
|
||||
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
|
||||
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
|
||||
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
|
||||
reg of_before_round;
|
||||
|
||||
// Rebias the exponent
|
||||
assign destination_exp = input_exp_s1[i] + EXP_BIAS;
|
||||
|
||||
always @(*) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Default assignment
|
||||
final_exp = destination_exp; // take exponent as is, only look at lower bits
|
||||
preshift_mant = {input_mant_s1[i], 33'b0}; // Place mantissa to the left of the shifter
|
||||
denorm_shamt = 0; // right of mantissa
|
||||
of_before_round = 1'b0;
|
||||
|
||||
// Handle INT casts
|
||||
if (is_itof_s1) begin
|
||||
if ($signed(destination_exp) >= $signed(2**EXP_BITS-1)) begin
|
||||
// Overflow or infinities (for proper rounding)
|
||||
final_exp = (2**EXP_BITS-2); // largest normal value
|
||||
preshift_mant = ~0; // largest normal value and RS bits set
|
||||
of_before_round = 1'b1;
|
||||
end else if ($signed(destination_exp) < $signed(-MAN_BITS)) begin
|
||||
// Limit the shift to retain sticky bits
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
|
||||
end else if ($signed(destination_exp) < $signed(1)) begin
|
||||
// Denormalize underflowing values
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + 1 - destination_exp; // adjust right shifting
|
||||
end
|
||||
end else begin
|
||||
if ($signed(input_exp_s1[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s1)) begin
|
||||
// overflow: when converting to unsigned the range is larger by one
|
||||
denorm_shamt = SHAMT_BITS'(0); // prevent shifting
|
||||
of_before_round = 1'b1;
|
||||
end else if ($signed(input_exp_s1[i]) < $signed(-1)) begin
|
||||
// underflow
|
||||
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
|
||||
end else begin
|
||||
// By default right shift mantissa to be an integer
|
||||
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp_s1[i];
|
||||
end
|
||||
end
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
|
||||
assign destination_mant_s1[i] = preshift_mant >> denorm_shamt;
|
||||
assign final_exp_s1[i] = final_exp;
|
||||
assign of_before_round_s1[i] = of_before_round;
|
||||
end
|
||||
|
||||
// Pipeline stage2
|
||||
|
||||
wire valid_in_s2;
|
||||
wire [TAGW-1:0] tag_in_s2;
|
||||
wire is_itof_s2;
|
||||
wire unsigned_s2;
|
||||
wire [2:0] rnd_mode_s2;
|
||||
fp_type_t [LANES-1:0] in_a_type_s2;
|
||||
wire [LANES-1:0] mant_is_zero_s2;
|
||||
wire [LANES-1:0] input_sign_s2;
|
||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant_s2;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s2;
|
||||
wire [LANES-1:0] of_before_round_s2;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}),
|
||||
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, rnd_mode_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2})
|
||||
);
|
||||
|
||||
wire [LANES-1:0] rounded_sign;
|
||||
|
@ -251,41 +270,37 @@ module VX_fp_cvt #(
|
|||
// Rouding and classification
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire [2*INT_MAN_WIDTH:0] destination_mant;
|
||||
wire [MAN_BITS-1:0] final_mant; // mantissa after adjustments
|
||||
wire [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
|
||||
wire [1:0] round_sticky_bits;
|
||||
wire [31:0] fmt_pre_round_abs;
|
||||
wire [31:0] pre_round_abs;
|
||||
|
||||
// Mantissa adjustment shift
|
||||
assign destination_mant = preshift_mant_s1[i] >> denorm_shamt_s1[i];
|
||||
|
||||
// Extract final mantissa and round bit, discard the normal bit (for FP)
|
||||
assign {final_mant, fp_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
|
||||
assign {final_int, int_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
|
||||
assign {final_mant, fp_round_sticky_bits[i][1]} = destination_mant_s2[i][2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
|
||||
assign {final_int, int_round_sticky_bits[i][1]} = destination_mant_s2[i][2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
|
||||
|
||||
// Collapse sticky bits
|
||||
assign fp_round_sticky_bits[i][0] = (| destination_mant[NUM_FP_STICKY-1:0]);
|
||||
assign int_round_sticky_bits[i][0] = (| destination_mant[NUM_INT_STICKY-1:0]);
|
||||
assign fp_round_sticky_bits[i][0] = (| destination_mant_s2[i][NUM_FP_STICKY-1:0]);
|
||||
assign int_round_sticky_bits[i][0] = (| destination_mant_s2[i][NUM_INT_STICKY-1:0]);
|
||||
|
||||
// select RS bits for destination operation
|
||||
assign round_sticky_bits = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
|
||||
assign round_sticky_bits = is_itof_s2 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
|
||||
|
||||
// Pack exponent and mantissa into proper rounding form
|
||||
assign fmt_pre_round_abs = {1'b0, final_exp_s1[i][EXP_BITS-1:0], final_mant[MAN_BITS-1:0]};
|
||||
assign fmt_pre_round_abs = {1'b0, final_exp_s2[i][EXP_BITS-1:0], final_mant[MAN_BITS-1:0]};
|
||||
|
||||
// Select output with destination format and operation
|
||||
assign pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : final_int;
|
||||
assign pre_round_abs = is_itof_s2 ? fmt_pre_round_abs : final_int;
|
||||
|
||||
// Perform the rounding
|
||||
VX_fp_rounding #(
|
||||
.DAT_WIDTH (32)
|
||||
) fp_rounding (
|
||||
.abs_value_i (pre_round_abs),
|
||||
.sign_i (input_sign_s1[i]),
|
||||
.sign_i (input_sign_s2[i]),
|
||||
.round_sticky_bits_i(round_sticky_bits),
|
||||
.rnd_mode_i (rnd_mode_s1),
|
||||
.rnd_mode_i (rnd_mode_s2),
|
||||
.effective_subtraction_i(1'b0),
|
||||
.abs_rounded_o (rounded_abs[i]),
|
||||
.sign_o (rounded_sign[i]),
|
||||
|
@ -293,28 +308,28 @@ module VX_fp_cvt #(
|
|||
);
|
||||
end
|
||||
|
||||
// Pipeline stage2
|
||||
// Pipeline stage3
|
||||
|
||||
wire valid_in_s2;
|
||||
wire [TAGW-1:0] tag_in_s2;
|
||||
wire is_itof_s2;
|
||||
wire unsigned_s2;
|
||||
fp_type_t [LANES-1:0] in_a_type_s2;
|
||||
wire [LANES-1:0] mant_is_zero_s2;
|
||||
wire [LANES-1:0] input_sign_s2;
|
||||
wire [LANES-1:0] rounded_sign_s2;
|
||||
wire [LANES-1:0][31:0] rounded_abs_s2;
|
||||
wire [LANES-1:0] of_before_round_s2;
|
||||
wire valid_in_s3;
|
||||
wire [TAGW-1:0] tag_in_s3;
|
||||
wire is_itof_s3;
|
||||
wire unsigned_s3;
|
||||
fp_type_t [LANES-1:0] in_a_type_s3;
|
||||
wire [LANES-1:0] mant_is_zero_s3;
|
||||
wire [LANES-1:0] input_sign_s3;
|
||||
wire [LANES-1:0] rounded_sign_s3;
|
||||
wire [LANES-1:0][31:0] rounded_abs_s3;
|
||||
wire [LANES-1:0] of_before_round_s3;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1 + 1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg2 (
|
||||
) pipe_reg3 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign, of_before_round_s1}),
|
||||
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2})
|
||||
.data_in ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs, rounded_sign, of_before_round_s2}),
|
||||
.data_out ({valid_in_s3, tag_in_s3, is_itof_s3, unsigned_s3, in_a_type_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3})
|
||||
);
|
||||
|
||||
wire [LANES-1:0] of_after_round;
|
||||
|
@ -325,14 +340,14 @@ module VX_fp_cvt #(
|
|||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
// Assemble regular result, nan box short ones. Int zeroes need to be detected
|
||||
assign fmt_result[i] = (is_itof_s2 & mant_is_zero_s2[i]) ? 0 : {rounded_sign_s2[i], rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:0]};
|
||||
assign fmt_result[i] = (is_itof_s3 & mant_is_zero_s3[i]) ? 0 : {rounded_sign_s3[i], rounded_abs_s3[i][EXP_BITS+MAN_BITS-1:0]};
|
||||
|
||||
// Classification after rounding select by destination format
|
||||
assign uf_after_round[i] = (rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == 0); // denormal
|
||||
assign of_after_round[i] = (rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == ~0); // inf exp.
|
||||
assign uf_after_round[i] = (rounded_abs_s3[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == 0); // denormal
|
||||
assign of_after_round[i] = (rounded_abs_s3[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == ~0); // inf exp.
|
||||
|
||||
// Negative integer result needs to be brought into two's complement
|
||||
assign rounded_int_res[i] = rounded_sign_s2[i] ? (-rounded_abs_s2[i]) : rounded_abs_s2[i];
|
||||
assign rounded_int_res[i] = rounded_sign_s3[i] ? (-rounded_abs_s3[i]) : rounded_abs_s3[i];
|
||||
assign rounded_int_res_zero[i] = (rounded_int_res[i] == 0);
|
||||
end
|
||||
|
||||
|
@ -347,13 +362,13 @@ module VX_fp_cvt #(
|
|||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
// Detect special case from source format, I2F casts don't produce a special result
|
||||
assign fp_result_is_special[i] = ~is_itof_s2 & (in_a_type_s2[i].is_zero | in_a_type_s2[i].is_nan);
|
||||
assign fp_result_is_special[i] = ~is_itof_s3 & (in_a_type_s3[i].is_zero | in_a_type_s3[i].is_nan);
|
||||
|
||||
// Signalling input NaNs raise invalid flag, otherwise no flags set
|
||||
assign fp_special_status[i] = in_a_type_s2[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation
|
||||
assign fp_special_status[i] = in_a_type_s3[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation
|
||||
|
||||
// Assemble result according to destination format
|
||||
assign fp_special_result[i] = in_a_type_s2[i].is_zero ? (32'(input_sign_s2) << 31) // signed zero
|
||||
assign fp_special_result[i] = in_a_type_s3[i].is_zero ? (32'(input_sign_s3) << 31) // signed zero
|
||||
: {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
|
||||
end
|
||||
|
||||
|
@ -366,20 +381,20 @@ module VX_fp_cvt #(
|
|||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
// Assemble result according to destination format
|
||||
always @(*) begin
|
||||
if (input_sign_s2[i] && !in_a_type_s2[i].is_nan) begin
|
||||
if (input_sign_s3[i] && !in_a_type_s3[i].is_nan) begin
|
||||
int_special_result[i][30:0] = 0; // alone yields 2**(31)-1
|
||||
int_special_result[i][31] = ~unsigned_s2; // for unsigned casts yields 2**31
|
||||
int_special_result[i][31] = ~unsigned_s3; // for unsigned casts yields 2**31
|
||||
end else begin
|
||||
int_special_result[i][30:0] = 2**(31) - 1; // alone yields 2**(31)-1
|
||||
int_special_result[i][31] = unsigned_s2; // for unsigned casts yields 2**31
|
||||
int_special_result[i][31] = unsigned_s3; // for unsigned casts yields 2**31
|
||||
end
|
||||
end
|
||||
|
||||
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
|
||||
assign int_result_is_special[i] = in_a_type_s2[i].is_nan
|
||||
| in_a_type_s2[i].is_inf
|
||||
| of_before_round_s2[i]
|
||||
| (input_sign_s2[i] & unsigned_s2 & ~rounded_int_res_zero[i]);
|
||||
assign int_result_is_special[i] = in_a_type_s3[i].is_nan
|
||||
| in_a_type_s3[i].is_inf
|
||||
| of_before_round_s3[i]
|
||||
| (input_sign_s3[i] & unsigned_s3 & ~rounded_int_res_zero[i]);
|
||||
|
||||
// All integer special cases are invalid
|
||||
assign int_special_status[i] = {1'b1, 4'h0};
|
||||
|
@ -395,12 +410,12 @@ module VX_fp_cvt #(
|
|||
fflags_t fp_status, int_status;
|
||||
wire [31:0] fp_result, int_result;
|
||||
|
||||
wire inexact = is_itof_s2 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
|
||||
: (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i]));
|
||||
wire inexact = is_itof_s3 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
|
||||
: (| fp_round_sticky_bits[i]) | (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i]));
|
||||
|
||||
assign fp_regular_status.NV = is_itof_s2 & (of_before_round_s2[i] | of_after_round[i]); // overflow is invalid for I2F casts
|
||||
assign fp_regular_status.NV = is_itof_s3 & (of_before_round_s3[i] | of_after_round[i]); // overflow is invalid for I2F casts
|
||||
assign fp_regular_status.DZ = 1'b0; // no divisions
|
||||
assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i])); // inf casts no OF
|
||||
assign fp_regular_status.OF = ~is_itof_s3 & (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); // inf casts no OF
|
||||
assign fp_regular_status.UF = uf_after_round[i] & inexact;
|
||||
assign fp_regular_status.NX = inexact;
|
||||
|
||||
|
@ -413,8 +428,8 @@ module VX_fp_cvt #(
|
|||
assign int_status = int_result_is_special[i] ? int_special_status[i] : int_regular_status;
|
||||
|
||||
// Select output depending on special case detection
|
||||
assign tmp_result[i] = is_itof_s2 ? fp_result : int_result;
|
||||
assign tmp_fflags[i] = is_itof_s2 ? fp_status : int_status;
|
||||
assign tmp_result[i] = is_itof_s3 ? fp_result : int_result;
|
||||
assign tmp_fflags[i] = is_itof_s3 ? fp_status : int_status;
|
||||
end
|
||||
|
||||
assign stall = ~ready_out && valid_out;
|
||||
|
@ -422,11 +437,11 @@ module VX_fp_cvt #(
|
|||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + (LANES * 32) + (LANES * `FFG_BITS)),
|
||||
.RESETW (1)
|
||||
) pipe_reg3 (
|
||||
) pipe_reg4 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall),
|
||||
.data_in ({valid_in_s2, tag_in_s2, tmp_result, tmp_fflags}),
|
||||
.data_in ({valid_in_s3, tag_in_s3, tmp_result, tmp_fflags}),
|
||||
.data_out ({valid_out, tag_out, result, fflags})
|
||||
);
|
||||
|
||||
|
@ -434,4 +449,4 @@ module VX_fp_cvt #(
|
|||
|
||||
assign has_fflags = 1'b1;
|
||||
|
||||
endmodule
|
||||
endmodule
|
Loading…
Add table
Add a link
Reference in a new issue