mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
fpu area optimization
This commit is contained in:
parent
33a83cc733
commit
377466ed1c
8 changed files with 186 additions and 174 deletions
|
@ -11,7 +11,7 @@ typedef struct packed {
|
|||
logic is_nan;
|
||||
logic is_quiet;
|
||||
logic is_signaling;
|
||||
} fp_type_t;
|
||||
} fp_class_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic NV; // 4-Invalid
|
||||
|
|
28
hw/rtl/fp_cores/VX_fp_class.v
Normal file
28
hw/rtl/fp_cores/VX_fp_class.v
Normal file
|
@ -0,0 +1,28 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_class # (
|
||||
parameter MAN_BITS = 23,
|
||||
parameter EXP_BITS = 8
|
||||
) (
|
||||
input [EXP_BITS-1:0] exp_i,
|
||||
input [MAN_BITS-1:0] man_i,
|
||||
output fp_class_t clss_o
|
||||
);
|
||||
wire is_normal = (exp_i != '0) && (exp_i != '1);
|
||||
wire is_zero = (exp_i == '0) && (man_i == '0);
|
||||
wire is_subnormal = (exp_i == '0) && (man_i != '0);
|
||||
wire is_inf = (exp_i == '1) && (man_i == '0);
|
||||
wire is_nan = (exp_i == '1) && (man_i != '0);
|
||||
wire is_signaling = is_nan && ~man_i[MAN_BITS-1];
|
||||
wire is_quiet = is_nan && ~is_signaling;
|
||||
|
||||
assign clss_o.is_normal = is_normal;
|
||||
assign clss_o.is_zero = is_zero;
|
||||
assign clss_o.is_subnormal = is_subnormal;
|
||||
assign clss_o.is_inf = is_inf;
|
||||
assign clss_o.is_nan = is_nan;
|
||||
assign clss_o.is_quiet = is_quiet;
|
||||
assign clss_o.is_signaling = is_signaling;
|
||||
|
||||
endmodule
|
|
@ -59,13 +59,16 @@ module VX_fp_cvt #(
|
|||
|
||||
// Input processing
|
||||
|
||||
fp_type_t [LANES-1:0] in_a_type;
|
||||
fp_class_t [LANES-1:0] fp_clss;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
VX_fp_type fp_type (
|
||||
VX_fp_class #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class (
|
||||
.exp_i (dataa[i][30:23]),
|
||||
.man_i (dataa[i][22:0]),
|
||||
.type_o (in_a_type[i])
|
||||
.clss_o (fp_clss[i])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -74,16 +77,19 @@ module VX_fp_cvt #(
|
|||
wire [LANES-1:0] input_sign;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [INT_MAN_WIDTH-1:0] int_mantissa;
|
||||
wire [INT_MAN_WIDTH-1:0] fmt_mantissa;
|
||||
wire fmt_sign = dataa[i][31];
|
||||
wire int_sign = dataa[i][31] & is_signed;
|
||||
assign int_mantissa = int_sign ? (-dataa[i]) : dataa[i];
|
||||
assign fmt_mantissa = INT_MAN_WIDTH'({in_a_type[i].is_normal, dataa[i][MAN_BITS-1:0]});
|
||||
|
||||
assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]};
|
||||
assign fmt_mantissa = INT_MAN_WIDTH'({fp_clss[i].is_normal, dataa[i][MAN_BITS-1:0]});
|
||||
assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS +: EXP_BITS]} +
|
||||
{1'b0, fp_clss[i].is_subnormal} +
|
||||
(FMT_SHIFT_COMPENSATION - EXP_BIAS);
|
||||
assign encoded_mant[i] = is_itof ? int_mantissa : fmt_mantissa;
|
||||
assign input_sign[i] = is_itof ? int_sign : fmt_sign;
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
|
||||
// Pipeline stage0
|
||||
|
@ -93,7 +99,7 @@ module VX_fp_cvt #(
|
|||
wire is_itof_s0;
|
||||
wire unsigned_s0;
|
||||
wire [2:0] rnd_mode_s0;
|
||||
fp_type_t [LANES-1:0] in_a_type_s0;
|
||||
fp_class_t [LANES-1:0] fp_clss_s0;
|
||||
wire [LANES-1:0] input_sign_s0;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
|
||||
|
@ -101,14 +107,14 @@ module VX_fp_cvt #(
|
|||
wire stall;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
|
||||
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_class_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in, tag_in, is_itof, !is_signed, frm, in_a_type, input_sign, fmt_exponent, encoded_mant}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0})
|
||||
.data_in ({valid_in, tag_in, is_itof, !is_signed, frm, fp_clss, input_sign, fmt_exponent, encoded_mant}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, fp_clss_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0})
|
||||
);
|
||||
|
||||
// Normalization
|
||||
|
@ -119,8 +125,8 @@ module VX_fp_cvt #(
|
|||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire mant_is_nonzero;
|
||||
VX_lzc #(
|
||||
.WIDTH (INT_MAN_WIDTH),
|
||||
.MODE (1)
|
||||
.N (INT_MAN_WIDTH),
|
||||
.MODE (1)
|
||||
) lzc (
|
||||
.in_i (encoded_mant_s0[i]),
|
||||
.cnt_o (renorm_shamt_s0[i]),
|
||||
|
@ -134,20 +140,12 @@ module VX_fp_cvt #(
|
|||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Input mantissa needs to be normalized
|
||||
wire [INT_EXP_WIDTH-1:0] fp_input_exp;
|
||||
wire [INT_EXP_WIDTH-1:0] int_input_exp;
|
||||
|
||||
// Realign input mantissa, append zeroes if destination is wider
|
||||
// Realign input mantissa, append zeroes if destination is wider
|
||||
assign input_mant_s0[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
|
||||
|
||||
// Unbias exponent and compensate for shift
|
||||
assign fp_input_exp = fmt_exponent_s0[i] +
|
||||
{1'b0, in_a_type_s0[i].is_subnormal} +
|
||||
(FMT_SHIFT_COMPENSATION - EXP_BIAS) -
|
||||
{1'b0, renorm_shamt_s0[i]};
|
||||
|
||||
assign int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
|
||||
wire [INT_EXP_WIDTH-1:0] fp_input_exp = fmt_exponent_s0[i] - {1'b0, renorm_shamt_s0[i]};
|
||||
wire [INT_EXP_WIDTH-1:0] int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
|
||||
|
||||
assign input_exp_s0[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
@ -160,21 +158,21 @@ module VX_fp_cvt #(
|
|||
wire is_itof_s1;
|
||||
wire unsigned_s1;
|
||||
wire [2:0] rnd_mode_s1;
|
||||
fp_type_t [LANES-1:0] in_a_type_s1;
|
||||
fp_class_t [LANES-1:0] fp_clss_s1;
|
||||
wire [LANES-1:0] input_sign_s1;
|
||||
wire [LANES-1:0] mant_is_zero_s1;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s1;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
|
||||
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_class_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, mant_is_zero_s0, input_mant_s0, input_exp_s0}),
|
||||
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, input_sign_s1, mant_is_zero_s1, input_mant_s1, input_exp_s1})
|
||||
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, fp_clss_s0, input_sign_s0, mant_is_zero_s0, input_mant_s0, input_exp_s0}),
|
||||
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, fp_clss_s1, input_sign_s1, mant_is_zero_s1, input_mant_s1, input_exp_s1})
|
||||
);
|
||||
|
||||
// Perform adjustments to mantissa and exponent
|
||||
|
@ -245,7 +243,7 @@ module VX_fp_cvt #(
|
|||
wire is_itof_s2;
|
||||
wire unsigned_s2;
|
||||
wire [2:0] rnd_mode_s2;
|
||||
fp_type_t [LANES-1:0] in_a_type_s2;
|
||||
fp_class_t [LANES-1:0] fp_clss_s2;
|
||||
wire [LANES-1:0] mant_is_zero_s2;
|
||||
wire [LANES-1:0] input_sign_s2;
|
||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant_s2;
|
||||
|
@ -253,14 +251,14 @@ module VX_fp_cvt #(
|
|||
wire [LANES-1:0] of_before_round_s2;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1 + `INST_FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
|
||||
.DATAW (1 + TAGW + 1 + 1 + `INST_FRM_BITS + LANES * ($bits(fp_class_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}),
|
||||
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, rnd_mode_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2})
|
||||
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, fp_clss_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}),
|
||||
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, rnd_mode_s2, fp_clss_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2})
|
||||
);
|
||||
|
||||
wire [LANES-1:0] rounded_sign;
|
||||
|
@ -314,7 +312,7 @@ module VX_fp_cvt #(
|
|||
wire [TAGW-1:0] tag_in_s3;
|
||||
wire is_itof_s3;
|
||||
wire unsigned_s3;
|
||||
fp_type_t [LANES-1:0] in_a_type_s3;
|
||||
fp_class_t [LANES-1:0] fp_clss_s3;
|
||||
wire [LANES-1:0] mant_is_zero_s3;
|
||||
wire [LANES-1:0] input_sign_s3;
|
||||
wire [LANES-1:0] rounded_sign_s3;
|
||||
|
@ -322,14 +320,14 @@ module VX_fp_cvt #(
|
|||
wire [LANES-1:0] of_before_round_s3;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1 + 1)),
|
||||
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_class_t) + 1 + 1 + 32 + 1 + 1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg3 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs, rounded_sign, of_before_round_s2}),
|
||||
.data_out ({valid_in_s3, tag_in_s3, is_itof_s3, unsigned_s3, in_a_type_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3})
|
||||
.data_in ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, fp_clss_s2, mant_is_zero_s2, input_sign_s2, rounded_abs, rounded_sign, of_before_round_s2}),
|
||||
.data_out ({valid_in_s3, tag_in_s3, is_itof_s3, unsigned_s3, fp_clss_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3})
|
||||
);
|
||||
|
||||
wire [LANES-1:0] of_after_round;
|
||||
|
@ -362,14 +360,14 @@ module VX_fp_cvt #(
|
|||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
// Detect special case from source format, I2F casts don't produce a special result
|
||||
assign fp_result_is_special[i] = ~is_itof_s3 & (in_a_type_s3[i].is_zero | in_a_type_s3[i].is_nan);
|
||||
assign fp_result_is_special[i] = ~is_itof_s3 & (fp_clss_s3[i].is_zero | fp_clss_s3[i].is_nan);
|
||||
|
||||
// Signalling input NaNs raise invalid flag, otherwise no flags set
|
||||
assign fp_special_status[i] = in_a_type_s3[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation
|
||||
assign fp_special_status[i] = fp_clss_s3[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation
|
||||
|
||||
// Assemble result according to destination format
|
||||
assign fp_special_result[i] = in_a_type_s3[i].is_zero ? (32'(input_sign_s3) << 31) // signed zero
|
||||
: {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
|
||||
assign fp_special_result[i] = fp_clss_s3[i].is_zero ? (32'(input_sign_s3) << 31) // signed zero
|
||||
: {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
|
||||
end
|
||||
|
||||
// INT Special case handling
|
||||
|
@ -381,7 +379,7 @@ module VX_fp_cvt #(
|
|||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
// Assemble result according to destination format
|
||||
always @(*) begin
|
||||
if (input_sign_s3[i] && !in_a_type_s3[i].is_nan) begin
|
||||
if (input_sign_s3[i] && !fp_clss_s3[i].is_nan) begin
|
||||
int_special_result[i][30:0] = 0; // alone yields 2**(31)-1
|
||||
int_special_result[i][31] = ~unsigned_s3; // for unsigned casts yields 2**31
|
||||
end else begin
|
||||
|
@ -391,8 +389,8 @@ module VX_fp_cvt #(
|
|||
end
|
||||
|
||||
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
|
||||
assign int_result_is_special[i] = in_a_type_s3[i].is_nan
|
||||
| in_a_type_s3[i].is_inf
|
||||
assign int_result_is_special[i] = fp_clss_s3[i].is_nan
|
||||
| fp_clss_s3[i].is_inf
|
||||
| of_before_round_s3[i]
|
||||
| (input_sign_s3[i] & unsigned_s3 & ~rounded_int_res_zero[i]);
|
||||
|
||||
|
@ -411,11 +409,11 @@ module VX_fp_cvt #(
|
|||
wire [31:0] fp_result, int_result;
|
||||
|
||||
wire inexact = is_itof_s3 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
|
||||
: (| fp_round_sticky_bits[i]) | (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i]));
|
||||
: (| fp_round_sticky_bits[i]) | (~fp_clss_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i]));
|
||||
|
||||
assign fp_regular_status.NV = is_itof_s3 & (of_before_round_s3[i] | of_after_round[i]); // overflow is invalid for I2F casts
|
||||
assign fp_regular_status.DZ = 1'b0; // no divisions
|
||||
assign fp_regular_status.OF = ~is_itof_s3 & (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); // inf casts no OF
|
||||
assign fp_regular_status.OF = ~is_itof_s3 & (~fp_clss_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); // inf casts no OF
|
||||
assign fp_regular_status.UF = uf_after_round[i] & inexact;
|
||||
assign fp_regular_status.NX = inexact;
|
||||
|
||||
|
|
|
@ -30,6 +30,9 @@ module VX_fp_ncomp #(
|
|||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
localparam EXP_BITS = 8;
|
||||
localparam MAN_BITS = 23;
|
||||
|
||||
localparam NEG_INF = 32'h00000001,
|
||||
NEG_NORM = 32'h00000002,
|
||||
NEG_SUBNORM = 32'h00000004,
|
||||
|
@ -44,7 +47,7 @@ module VX_fp_ncomp #(
|
|||
wire [LANES-1:0] tmp_a_sign, tmp_b_sign;
|
||||
wire [LANES-1:0][7:0] tmp_a_exponent, tmp_b_exponent;
|
||||
wire [LANES-1:0][22:0] tmp_a_mantissa, tmp_b_mantissa;
|
||||
fp_type_t [LANES-1:0] tmp_a_type, tmp_b_type;
|
||||
fp_class_t [LANES-1:0] tmp_a_clss, tmp_b_clss;
|
||||
wire [LANES-1:0] tmp_a_smaller, tmp_ab_equal;
|
||||
|
||||
// Setup
|
||||
|
@ -57,20 +60,26 @@ module VX_fp_ncomp #(
|
|||
assign tmp_b_exponent[i] = datab[i][30:23];
|
||||
assign tmp_b_mantissa[i] = datab[i][22:0];
|
||||
|
||||
VX_fp_type fp_type_a (
|
||||
VX_fp_class #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class_a (
|
||||
.exp_i (tmp_a_exponent[i]),
|
||||
.man_i (tmp_a_mantissa[i]),
|
||||
.type_o (tmp_a_type[i])
|
||||
.clss_o (tmp_a_clss[i])
|
||||
);
|
||||
|
||||
VX_fp_type fp_type_b (
|
||||
VX_fp_class #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class_b (
|
||||
.exp_i (tmp_b_exponent[i]),
|
||||
.man_i (tmp_b_mantissa[i]),
|
||||
.type_o (tmp_b_type[i])
|
||||
.clss_o (tmp_b_clss[i])
|
||||
);
|
||||
|
||||
assign tmp_a_smaller[i] = $signed(dataa[i]) < $signed(datab[i]);
|
||||
assign tmp_ab_equal[i] = (dataa[i] == datab[i]) | (tmp_a_type[i].is_zero & tmp_b_type[i].is_zero);
|
||||
assign tmp_ab_equal[i] = (dataa[i] == datab[i]) | (tmp_a_clss[i].is_zero & tmp_b_clss[i].is_zero);
|
||||
end
|
||||
|
||||
// Pipeline stage0
|
||||
|
@ -83,41 +92,41 @@ module VX_fp_ncomp #(
|
|||
wire [LANES-1:0] a_sign_s0, b_sign_s0;
|
||||
wire [LANES-1:0][7:0] a_exponent_s0;
|
||||
wire [LANES-1:0][22:0] a_mantissa_s0;
|
||||
fp_type_t [LANES-1:0] a_type_s0, b_type_s0;
|
||||
fp_class_t [LANES-1:0] a_clss_s0, b_clss_s0;
|
||||
wire [LANES-1:0] a_smaller_s0, ab_equal_s0;
|
||||
|
||||
wire stall;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + `INST_FPU_BITS + `INST_FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)),
|
||||
.DATAW (1 + TAGW + `INST_FPU_BITS + `INST_FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_class_t) + 1 + 1)),
|
||||
.RESETW (1),
|
||||
.DEPTH (0)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall),
|
||||
.data_in ({valid_in, tag_in, op_type, frm, dataa, datab, tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_type_s0, b_type_s0, a_smaller_s0, ab_equal_s0})
|
||||
.data_in ({valid_in, tag_in, op_type, frm, dataa, datab, tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_clss, tmp_b_clss, tmp_a_smaller, tmp_ab_equal}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_clss_s0, b_clss_s0, a_smaller_s0, ab_equal_s0})
|
||||
);
|
||||
|
||||
// FCLASS
|
||||
reg [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
if (a_type_s0[i].is_normal) begin
|
||||
if (a_clss_s0[i].is_normal) begin
|
||||
fclass_mask[i] = a_sign_s0[i] ? NEG_NORM : POS_NORM;
|
||||
end
|
||||
else if (a_type_s0[i].is_inf) begin
|
||||
else if (a_clss_s0[i].is_inf) begin
|
||||
fclass_mask[i] = a_sign_s0[i] ? NEG_INF : POS_INF;
|
||||
end
|
||||
else if (a_type_s0[i].is_zero) begin
|
||||
else if (a_clss_s0[i].is_zero) begin
|
||||
fclass_mask[i] = a_sign_s0[i] ? NEG_ZERO : POS_ZERO;
|
||||
end
|
||||
else if (a_type_s0[i].is_subnormal) begin
|
||||
else if (a_clss_s0[i].is_subnormal) begin
|
||||
fclass_mask[i] = a_sign_s0[i] ? NEG_SUBNORM : POS_SUBNORM;
|
||||
end
|
||||
else if (a_type_s0[i].is_nan) begin
|
||||
fclass_mask[i] = {22'h0, a_type_s0[i].is_quiet, a_type_s0[i].is_signaling, 8'h0};
|
||||
else if (a_clss_s0[i].is_nan) begin
|
||||
fclass_mask[i] = {22'h0, a_clss_s0[i].is_quiet, a_clss_s0[i].is_signaling, 8'h0};
|
||||
end
|
||||
else begin
|
||||
fclass_mask[i] = QUT_NAN;
|
||||
|
@ -129,11 +138,11 @@ module VX_fp_ncomp #(
|
|||
reg [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
if (a_type_s0[i].is_nan && b_type_s0[i].is_nan)
|
||||
if (a_clss_s0[i].is_nan && b_clss_s0[i].is_nan)
|
||||
fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
|
||||
else if (a_type_s0[i].is_nan)
|
||||
else if (a_clss_s0[i].is_nan)
|
||||
fminmax_res[i] = datab_s0[i];
|
||||
else if (b_type_s0[i].is_nan)
|
||||
else if (b_clss_s0[i].is_nan)
|
||||
fminmax_res[i] = dataa_s0[i];
|
||||
else begin
|
||||
case (frm_s0) // use LSB to distinguish MIN and MAX
|
||||
|
@ -166,7 +175,7 @@ module VX_fp_ncomp #(
|
|||
case (frm_s0)
|
||||
`INST_FRM_RNE: begin // LE
|
||||
fcmp_fflags[i] = 5'h0;
|
||||
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
|
||||
if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin
|
||||
fcmp_res[i] = 32'h0;
|
||||
fcmp_fflags[i].NV = 1'b1;
|
||||
end else begin
|
||||
|
@ -175,7 +184,7 @@ module VX_fp_ncomp #(
|
|||
end
|
||||
`INST_FRM_RTZ: begin // LS
|
||||
fcmp_fflags[i] = 5'h0;
|
||||
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
|
||||
if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin
|
||||
fcmp_res[i] = 32'h0;
|
||||
fcmp_fflags[i].NV = 1'b1;
|
||||
end else begin
|
||||
|
@ -184,9 +193,9 @@ module VX_fp_ncomp #(
|
|||
end
|
||||
`INST_FRM_RDN: begin // EQ
|
||||
fcmp_fflags[i] = 5'h0;
|
||||
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
|
||||
if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin
|
||||
fcmp_res[i] = 32'h0;
|
||||
fcmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling;
|
||||
fcmp_fflags[i].NV = a_clss_s0[i].is_signaling | b_clss_s0[i].is_signaling;
|
||||
end else begin
|
||||
fcmp_res[i] = {31'h0, ab_equal_s0[i]};
|
||||
end
|
||||
|
@ -225,11 +234,11 @@ module VX_fp_ncomp #(
|
|||
3,4: begin
|
||||
tmp_result[i] = fminmax_res[i];
|
||||
tmp_fflags[i] = 0;
|
||||
tmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling;
|
||||
tmp_fflags[i].NV = a_clss_s0[i].is_signaling | b_clss_s0[i].is_signaling;
|
||||
end
|
||||
//5,6,7: MOVE
|
||||
default: begin
|
||||
tmp_result[i] = dataa[i];
|
||||
tmp_result[i] = dataa_s0[i];
|
||||
tmp_fflags[i] = 'x;
|
||||
end
|
||||
endcase
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_type (
|
||||
// inputs
|
||||
input [7:0] exp_i,
|
||||
input [22:0] man_i,
|
||||
// outputs
|
||||
output fp_type_t type_o
|
||||
);
|
||||
wire is_normal = (exp_i != 8'd0) && (exp_i != 8'hff);
|
||||
wire is_zero = (exp_i == 8'd0) && (man_i == 23'd0);
|
||||
wire is_subnormal = (exp_i == 8'd0) && (man_i != 23'd0);
|
||||
wire is_inf = (exp_i == 8'hff) && (man_i == 23'd0);
|
||||
wire is_nan = (exp_i == 8'hff) && (man_i != 23'd0);
|
||||
wire is_signaling = is_nan && (man_i[22] == 1'b0);
|
||||
wire is_quiet = is_nan && !is_signaling;
|
||||
|
||||
assign type_o.is_normal = is_normal;
|
||||
assign type_o.is_zero = is_zero;
|
||||
assign type_o.is_subnormal = is_subnormal;
|
||||
assign type_o.is_inf = is_inf;
|
||||
assign type_o.is_nan = is_nan;
|
||||
assign type_o.is_quiet = is_quiet;
|
||||
assign type_o.is_signaling = is_signaling;
|
||||
|
||||
endmodule
|
|
@ -1,7 +1,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fpu_fpga #(
|
||||
parameter TAGW = 1
|
||||
parameter TAGW = 4
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
61
hw/rtl/libs/VX_find_first.v
Normal file
61
hw/rtl/libs/VX_find_first.v
Normal file
|
@ -0,0 +1,61 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_find_first #(
|
||||
parameter N = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter REVERSE = 0,
|
||||
localparam LOGN = $clog2(N)
|
||||
) (
|
||||
input wire [N-1:0][DATAW-1:0] data_i,
|
||||
input wire [N-1:0] valid_i,
|
||||
output wire [DATAW-1:0] data_o,
|
||||
output wire valid_o
|
||||
);
|
||||
if (N > 1) begin
|
||||
wire [N-1:0] valid_r;
|
||||
wire [N-1:0][DATAW-1:0] data_r;
|
||||
|
||||
for (genvar i = 0; i < N; ++i) begin
|
||||
assign valid_r[i] = REVERSE ? valid_i[N-1-i] : valid_i[i];
|
||||
assign data_r[i] = REVERSE ? data_i[N-1-i] : data_i[i];
|
||||
end
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [2**LOGN-1:0] s_n;
|
||||
wire [2**LOGN-1:0][DATAW-1:0] d_n;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
for (genvar i = 0; i < LOGN; ++i) begin
|
||||
if (i == (LOGN-1)) begin
|
||||
for (genvar j = 0; j < 2**i; ++j) begin
|
||||
if ((j*2) < (N-1)) begin
|
||||
assign s_n[2**i-1+j] = valid_r[j*2] | valid_r[j*2+1];
|
||||
assign d_n[2**i-1+j] = valid_r[j*2] ? data_r[j*2] : data_r[j*2+1];
|
||||
end
|
||||
if ((j*2) == (N-1)) begin
|
||||
assign s_n[2**i-1+j] = valid_r[j*2];
|
||||
assign d_n[2**i-1+j] = data_r[j*2];
|
||||
end
|
||||
if ((j*2) > (N-1)) begin
|
||||
assign s_n[2**i-1+j] = 0;
|
||||
assign d_n[2**i-1+j] = 'x;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
for (genvar j = 0; j < 2**i; ++j) begin
|
||||
assign s_n[2**i-1+j] = s_n[2**(i+1)-1+j*2] | s_n[2**(i+1)-1+j*2+1];
|
||||
assign d_n[2**i-1+j] = s_n[2**(i+1)-1+j*2] ? d_n[2**(i+1)-1+j*2] : d_n[2**(i+1)-1+j*2+1];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign valid_o = s_n[0];
|
||||
assign data_o = d_n[0];
|
||||
end else begin
|
||||
assign valid_o = valid_i;
|
||||
assign data_o = data_i[0];
|
||||
end
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -1,88 +1,31 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
/// Modified port of lzc module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
||||
/// A trailing zero counter / leading zero counter.
|
||||
/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB)
|
||||
/// Set MODE to 1 for leading zero counter => cnt_o is the number of leading zeros (from the MSB)
|
||||
/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains
|
||||
/// the maximum number of zeros - 1. For example:
|
||||
/// in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0)
|
||||
/// in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0)
|
||||
/// in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0)
|
||||
/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only).
|
||||
/// This speeds up simulation significantly.
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_lzc #(
|
||||
/// The width of the input vector.
|
||||
parameter int unsigned WIDTH = 2,
|
||||
parameter bit MODE = 1'b0 // 0 -> trailing zero, 1 -> leading zero
|
||||
parameter N = 2,
|
||||
parameter MODE = 0, // 0 -> trailing zero, 1 -> leading zero
|
||||
localparam LOGN = $clog2(N)
|
||||
) (
|
||||
input logic [WIDTH-1:0] in_i,
|
||||
output logic [$clog2(WIDTH)-1:0] cnt_o,
|
||||
output logic valid_o
|
||||
input wire [N-1:0] in_i,
|
||||
output wire [LOGN-1:0] cnt_o,
|
||||
output wire valid_o
|
||||
);
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [N-1:0][LOGN-1:0] indices;
|
||||
|
||||
localparam int unsigned NUM_LEVELS = $clog2(WIDTH);
|
||||
|
||||
// pragma translate_off
|
||||
initial begin
|
||||
assert(WIDTH > 0) else $fatal("input must be at least one bit wide");
|
||||
end
|
||||
// pragma translate_on
|
||||
|
||||
logic [WIDTH-1:0][NUM_LEVELS-1:0] index_lut;
|
||||
logic [2**NUM_LEVELS-1:0] sel_nodes;
|
||||
logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0] index_nodes;
|
||||
|
||||
logic [WIDTH-1:0] in_tmp;
|
||||
|
||||
// reverse vector if required
|
||||
always_comb begin : flip_vector
|
||||
for (int unsigned i = 0; i < WIDTH; i++) begin
|
||||
in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i];
|
||||
end
|
||||
for (genvar i = 0; i < N; ++i) begin
|
||||
assign indices[i] = MODE ? LOGN'(N-1-i) : LOGN'(i);
|
||||
end
|
||||
|
||||
for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut
|
||||
assign index_lut[j] = NUM_LEVELS'(unsigned'(j));
|
||||
end
|
||||
|
||||
for (genvar level = 0; unsigned'(level) < NUM_LEVELS; level++) begin : g_levels
|
||||
if (unsigned'(level) == NUM_LEVELS-1) begin : g_last_level
|
||||
for (genvar k = 0; k < 2**level; k++) begin : g_level
|
||||
// if two successive indices are still in the vector...
|
||||
if (unsigned'(k) * 2 < WIDTH-1) begin
|
||||
assign sel_nodes[2**level-1+k] = in_tmp[k*2] | in_tmp[k*2+1];
|
||||
assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] :
|
||||
index_lut[k*2+1];
|
||||
end
|
||||
// if only the first index is still in the vector...
|
||||
if (unsigned'(k) * 2 == WIDTH-1) begin
|
||||
assign sel_nodes[2**level-1+k] = in_tmp[k*2];
|
||||
assign index_nodes[2**level-1+k] = index_lut[k*2];
|
||||
end
|
||||
// if index is out of range
|
||||
if (unsigned'(k) * 2 > WIDTH-1) begin
|
||||
assign sel_nodes[2**level-1+k] = 1'b0;
|
||||
assign index_nodes[2**level-1+k] = '0;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
for (genvar l = 0; l < 2**level; l++) begin : g_level
|
||||
assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
|
||||
assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ? index_nodes[2**(level+1)-1+l*2] :
|
||||
index_nodes[2**(level+1)-1+l*2+1];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign cnt_o = NUM_LEVELS > unsigned'(0) ? index_nodes[0] : $clog2(WIDTH)'(0);
|
||||
assign valid_o = NUM_LEVELS > unsigned'(0) ? sel_nodes[0] : (|in_i);
|
||||
|
||||
`IGNORE_WARNINGS_END
|
||||
VX_find_first #(
|
||||
.N (N),
|
||||
.DATAW (LOGN),
|
||||
.REVERSE (MODE)
|
||||
) find_first (
|
||||
.data_i (indices),
|
||||
.valid_i (in_i),
|
||||
.data_o (cnt_o),
|
||||
.valid_o (valid_o)
|
||||
);
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
Loading…
Add table
Add a link
Reference in a new issue