mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
timing optimization
This commit is contained in:
parent
ade6b2c985
commit
bcf7d9f960
7 changed files with 80 additions and 76 deletions
|
@ -1,17 +1,17 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Modified port of cast module from fpnew Libray
|
||||
// Modified port of cast module from fpnew Libray
|
||||
// reference: https://github.com/pulp-platform/fpnew
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
@ -22,7 +22,8 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
parameter LATENCY = 1,
|
||||
parameter INT_WIDTH = 32,
|
||||
parameter MAN_BITS = 23,
|
||||
parameter EXP_BITS = 8
|
||||
parameter EXP_BITS = 8,
|
||||
parameter OUT_REG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -35,10 +36,10 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
input wire is_signed,
|
||||
|
||||
input wire [31:0] dataa,
|
||||
output wire [31:0] result,
|
||||
output wire [31:0] result,
|
||||
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags
|
||||
);
|
||||
);
|
||||
// Constants
|
||||
localparam EXP_BIAS = 2**(EXP_BITS-1)-1;
|
||||
|
||||
|
@ -55,11 +56,11 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
localparam FMT_SHIFT_COMPENSATION = S_MAN_WIDTH - 1 - MAN_BITS;
|
||||
localparam NUM_FP_STICKY = 2 * S_MAN_WIDTH - MAN_BITS - 1; // removed mantissa, 1. and R
|
||||
localparam NUM_INT_STICKY = 2 * S_MAN_WIDTH - INT_WIDTH; // removed int and R
|
||||
|
||||
|
||||
// Input processing
|
||||
|
||||
fclass_t fclass;
|
||||
VX_fp_classifier #(
|
||||
|
||||
fclass_t fclass;
|
||||
VX_fp_classifier #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_classifier (
|
||||
|
@ -69,9 +70,9 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
);
|
||||
|
||||
wire [S_MAN_WIDTH-1:0] input_mant;
|
||||
wire [S_EXP_WIDTH-1:0] input_exp;
|
||||
wire [S_EXP_WIDTH-1:0] input_exp;
|
||||
wire input_sign;
|
||||
|
||||
|
||||
wire i2f_sign = dataa[INT_WIDTH-1];
|
||||
wire f2i_sign = dataa[INT_WIDTH-1] && is_signed;
|
||||
wire [S_MAN_WIDTH-1:0] f2i_mantissa = f2i_sign ? (-dataa) : dataa;
|
||||
|
@ -81,7 +82,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
assign input_sign = is_itof ? f2i_sign : i2f_sign;
|
||||
|
||||
// Pipeline stage0
|
||||
|
||||
|
||||
wire is_itof_s0;
|
||||
wire is_signed_s0;
|
||||
wire [2:0] rnd_mode_s0;
|
||||
|
@ -92,7 +93,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `INST_FRM_BITS + 1 + $bits(fclass_t) + 1 + S_EXP_WIDTH + S_MAN_WIDTH),
|
||||
.DEPTH (LATENCY > 2)
|
||||
.DEPTH (LATENCY > 1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -100,7 +101,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
.data_in ({is_itof, is_signed, frm, fclass, input_sign, input_exp, input_mant}),
|
||||
.data_out ({is_itof_s0, is_signed_s0, rnd_mode_s0, fclass_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0})
|
||||
);
|
||||
|
||||
|
||||
// Normalization
|
||||
|
||||
wire [LZC_RESULT_WIDTH-1:0] renorm_shamt_s0; // renormalization shift amount
|
||||
|
@ -113,12 +114,12 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
.data_out (renorm_shamt_s0),
|
||||
.valid_out (mant_is_nonzero_s0)
|
||||
);
|
||||
|
||||
|
||||
wire mant_is_zero_s0 = ~mant_is_nonzero_s0;
|
||||
|
||||
wire [S_MAN_WIDTH-1:0] input_mant_n_s0; // normalized input mantissa
|
||||
wire [S_MAN_WIDTH-1:0] input_mant_n_s0; // normalized input mantissa
|
||||
wire [S_EXP_WIDTH-1:0] input_exp_n_s0; // unbiased true exponent
|
||||
|
||||
|
||||
// Realign input mantissa, append zeroes if destination is wider
|
||||
assign input_mant_n_s0 = encoded_mant_s0 << renorm_shamt_s0;
|
||||
|
||||
|
@ -140,7 +141,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `INST_FRM_BITS + 1 + $bits(fclass_t) + 1 + 1 + S_MAN_WIDTH + S_EXP_WIDTH),
|
||||
.DEPTH (LATENCY > 1)
|
||||
.DEPTH (LATENCY > 2)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -169,30 +170,30 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
wire of_before_round_s1 = overflow;
|
||||
|
||||
// Pipeline stage2
|
||||
|
||||
|
||||
wire is_itof_s2;
|
||||
wire is_signed_s2;
|
||||
wire [2:0] rnd_mode_s2;
|
||||
fclass_t fclass_s2;
|
||||
fclass_t fclass_s2;
|
||||
wire mant_is_zero_s2;
|
||||
wire input_sign_s2;
|
||||
wire [2*S_MAN_WIDTH:0] destination_mant_s2;
|
||||
wire [EXP_BITS-1:0] final_exp_s2;
|
||||
wire of_before_round_s2;
|
||||
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + `INST_FRM_BITS + $bits(fclass_t) + 1 + 1 + (2*S_MAN_WIDTH+1) + EXP_BITS + 1),
|
||||
.DEPTH (LATENCY > 3)
|
||||
.DEPTH (LATENCY > 0)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({is_itof_s1, is_signed_s1, rnd_mode_s1, fclass_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}),
|
||||
.data_out ({is_itof_s2, is_signed_s2, rnd_mode_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2})
|
||||
);
|
||||
|
||||
);
|
||||
|
||||
// Rouding and classification
|
||||
|
||||
|
||||
wire [MAN_BITS-1:0] final_mant_s2; // mantissa after adjustments
|
||||
wire [INT_WIDTH-1:0] final_int_s2; // integer shifted in position
|
||||
wire [1:0] f2i_round_sticky_bits_s2, i2f_round_sticky_bits_s2;
|
||||
|
@ -237,20 +238,20 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
wire is_itof_s3;
|
||||
wire is_signed_s3;
|
||||
fclass_t fclass_s3;
|
||||
fclass_t fclass_s3;
|
||||
wire mant_is_zero_s3;
|
||||
wire input_sign_s3;
|
||||
wire rounded_sign_s3;
|
||||
wire [INT_WIDTH-1:0] rounded_abs_s3;
|
||||
wire of_before_round_s3;
|
||||
wire of_before_round_s3;
|
||||
wire f2i_round_has_sticky_s3;
|
||||
wire i2f_round_has_sticky_s3;
|
||||
|
||||
`UNUSED_VAR (fclass_s3)
|
||||
`UNUSED_VAR (fclass_s3)
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + $bits(fclass_t) + 1 + 1 + 32 + 1 + 1 + 1 + 1),
|
||||
.DEPTH (LATENCY > 4)
|
||||
.DEPTH (LATENCY > 3)
|
||||
) pipe_reg3 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -258,7 +259,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
.data_in ({is_itof_s2, is_signed_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2, f2i_round_has_sticky_s2, i2f_round_has_sticky_s2}),
|
||||
.data_out ({is_itof_s3, is_signed_s3, fclass_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3, f2i_round_has_sticky_s3, i2f_round_has_sticky_s3})
|
||||
);
|
||||
|
||||
|
||||
// Assemble regular result, nan box short ones. Int zeroes need to be detected
|
||||
wire [INT_WIDTH-1:0] fmt_result_s3 = mant_is_zero_s3 ? 0 : {rounded_sign_s3, rounded_abs_s3[EXP_BITS+MAN_BITS-1:0]};
|
||||
|
||||
|
@ -278,18 +279,18 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
f2i_special_result_s3[INT_WIDTH-2:0] = 2**(INT_WIDTH-1) - 1; // alone yields 2**(31)-1
|
||||
f2i_special_result_s3[INT_WIDTH-1] = ~is_signed_s3; // for unsigned casts yields 2**31
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
|
||||
wire f2i_result_is_special_s3 = fclass_s3.is_nan
|
||||
wire f2i_result_is_special_s3 = fclass_s3.is_nan
|
||||
| fclass_s3.is_inf
|
||||
| of_before_round_s3
|
||||
| (input_sign_s3 & ~is_signed_s3 & ~rounded_int_res_zero_s3);
|
||||
|
||||
|
||||
fflags_t f2i_special_status_s3;
|
||||
fflags_t i2f_status_s3, f2i_status_s3;
|
||||
fflags_t tmp_fflags_s3;
|
||||
|
||||
|
||||
// All integer special cases are invalid
|
||||
assign f2i_special_status_s3 = {1'b1, 4'h0};
|
||||
|
||||
|
@ -306,7 +307,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (32 + `FP_FLAGS_BITS),
|
||||
.DEPTH (LATENCY > 0)
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg4 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Modified port of noncomp module from fpnew Libray
|
||||
// Modified port of noncomp module from fpnew Libray
|
||||
// reference: https://github.com/pulp-platform/fpnew
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
@ -19,9 +19,10 @@
|
|||
`ifdef FPU_DSP
|
||||
|
||||
module VX_fncp_unit import VX_fpu_pkg::*; #(
|
||||
parameter LATENCY = 2,
|
||||
parameter LATENCY = 1,
|
||||
parameter EXP_BITS = 8,
|
||||
parameter MAN_BITS = 23
|
||||
parameter MAN_BITS = 23,
|
||||
parameter OUT_REG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -33,10 +34,10 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
input wire [31:0] dataa,
|
||||
input wire [31:0] datab,
|
||||
output wire [31:0] result,
|
||||
output wire [31:0] result,
|
||||
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags
|
||||
);
|
||||
);
|
||||
localparam NEG_INF = 32'h00000001,
|
||||
NEG_NORM = 32'h00000002,
|
||||
NEG_SUBNORM = 32'h00000004,
|
||||
|
@ -55,15 +56,15 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
wire a_smaller, ab_equal;
|
||||
|
||||
// Setup
|
||||
assign a_sign = dataa[31];
|
||||
assign a_sign = dataa[31];
|
||||
assign a_exponent = dataa[30:23];
|
||||
assign a_mantissa = dataa[22:0];
|
||||
|
||||
assign b_sign = datab[31];
|
||||
assign b_sign = datab[31];
|
||||
assign b_exponent = datab[30:23];
|
||||
assign b_mantissa = datab[22:0];
|
||||
|
||||
VX_fp_classifier #(
|
||||
VX_fp_classifier #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class_a (
|
||||
|
@ -72,7 +73,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
.clss_o (a_fclass)
|
||||
);
|
||||
|
||||
VX_fp_classifier #(
|
||||
VX_fp_classifier #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class_b (
|
||||
|
@ -82,7 +83,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
);
|
||||
|
||||
assign a_smaller = (dataa < datab) ^ (a_sign || b_sign);
|
||||
assign ab_equal = (dataa == datab)
|
||||
assign ab_equal = (dataa == datab)
|
||||
|| (a_fclass.is_zero && b_fclass.is_zero); // +0 == -0
|
||||
|
||||
// Pipeline stage0
|
||||
|
@ -101,54 +102,54 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (4 + 2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fclass_t) + 1 + 1),
|
||||
.DEPTH (LATENCY > 1)
|
||||
.DEPTH (LATENCY > 0)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({op_mod, dataa, datab, a_sign, b_sign, a_exponent, a_mantissa, a_fclass, b_fclass, a_smaller, ab_equal}),
|
||||
.data_out ({op_mod_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_fclass_s0, b_fclass_s0, a_smaller_s0, ab_equal_s0})
|
||||
);
|
||||
);
|
||||
|
||||
// FCLASS
|
||||
reg [31:0] fclass_mask_s0; // generate a 10-bit mask for integer reg
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
if (a_fclass_s0.is_normal) begin
|
||||
fclass_mask_s0 = a_sign_s0 ? NEG_NORM : POS_NORM;
|
||||
end
|
||||
end
|
||||
else if (a_fclass_s0.is_inf) begin
|
||||
fclass_mask_s0 = a_sign_s0 ? NEG_INF : POS_INF;
|
||||
end
|
||||
end
|
||||
else if (a_fclass_s0.is_zero) begin
|
||||
fclass_mask_s0 = a_sign_s0 ? NEG_ZERO : POS_ZERO;
|
||||
end
|
||||
end
|
||||
else if (a_fclass_s0.is_subnormal) begin
|
||||
fclass_mask_s0 = a_sign_s0 ? NEG_SUBNORM : POS_SUBNORM;
|
||||
end
|
||||
end
|
||||
else if (a_fclass_s0.is_nan) begin
|
||||
fclass_mask_s0 = {22'h0, a_fclass_s0.is_quiet, a_fclass_s0.is_signaling, 8'h0};
|
||||
end
|
||||
else begin
|
||||
end
|
||||
else begin
|
||||
fclass_mask_s0 = QUT_NAN;
|
||||
end
|
||||
end
|
||||
|
||||
// Min/Max
|
||||
// Min/Max
|
||||
reg [31:0] fminmax_res_s0;
|
||||
always @(*) begin
|
||||
if (a_fclass_s0.is_nan && b_fclass_s0.is_nan)
|
||||
fminmax_res_s0 = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
|
||||
else if (a_fclass_s0.is_nan)
|
||||
else if (a_fclass_s0.is_nan)
|
||||
fminmax_res_s0 = datab_s0;
|
||||
else if (b_fclass_s0.is_nan)
|
||||
else if (b_fclass_s0.is_nan)
|
||||
fminmax_res_s0 = dataa_s0;
|
||||
else begin
|
||||
else begin
|
||||
// FMIN, FMAX
|
||||
fminmax_res_s0 = (op_mod_s0[0] ^ a_smaller_s0) ? dataa_s0 : datab_s0;
|
||||
end
|
||||
end
|
||||
|
||||
// Sign injection
|
||||
// Sign injection
|
||||
reg [31:0] fsgnj_res_s0; // result of sign injection
|
||||
always @(*) begin
|
||||
case (op_mod_s0[1:0])
|
||||
|
@ -158,12 +159,12 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
endcase
|
||||
end
|
||||
|
||||
// Comparison
|
||||
// Comparison
|
||||
reg fcmp_res_s0; // result of comparison
|
||||
reg fcmp_fflags_NV_s0; // comparison fflags
|
||||
always @(*) begin
|
||||
case (op_mod_s0[1:0])
|
||||
0: begin // LE
|
||||
0: begin // LE
|
||||
if (a_fclass_s0.is_nan || b_fclass_s0.is_nan) begin
|
||||
fcmp_res_s0 = 0;
|
||||
fcmp_fflags_NV_s0 = 1;
|
||||
|
@ -179,12 +180,12 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
end else begin
|
||||
fcmp_res_s0 = (a_smaller_s0 & ~ab_equal_s0);
|
||||
fcmp_fflags_NV_s0 = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
2: begin // EQ
|
||||
if (a_fclass_s0.is_nan || b_fclass_s0.is_nan) begin
|
||||
fcmp_res_s0 = 0;
|
||||
fcmp_fflags_NV_s0 = a_fclass_s0.is_signaling | b_fclass_s0.is_signaling;
|
||||
fcmp_fflags_NV_s0 = a_fclass_s0.is_signaling | b_fclass_s0.is_signaling;
|
||||
end else begin
|
||||
fcmp_res_s0 = ab_equal_s0;
|
||||
fcmp_fflags_NV_s0 = 0;
|
||||
|
@ -192,7 +193,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
end
|
||||
default: begin
|
||||
fcmp_res_s0 = 'x;
|
||||
fcmp_fflags_NV_s0 = 'x;
|
||||
fcmp_fflags_NV_s0 = 'x;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
@ -216,7 +217,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
// FMV
|
||||
result_s0 = dataa_s0;
|
||||
fflags_NV_s0 = 0;
|
||||
end
|
||||
end
|
||||
6,7: begin
|
||||
// MIN/MAX
|
||||
result_s0 = fminmax_res_s0;
|
||||
|
@ -229,7 +230,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (32 + 1),
|
||||
.DEPTH (LATENCY > 0)
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -64,7 +64,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
.OUT_BUF (2)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -88,7 +88,8 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
VX_fcvt_unit #(
|
||||
.LATENCY (`LATENCY_FCVT)
|
||||
.LATENCY (`LATENCY_FCVT),
|
||||
.OUT_REG (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
) fcvt_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -68,7 +68,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -99,7 +99,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -69,7 +69,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
.OUT_BUF (2)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -93,7 +93,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
|
||||
for (genvar i = 0; i < NUM_PES; ++i) begin
|
||||
VX_fncp_unit #(
|
||||
.LATENCY (`LATENCY_FNCP)
|
||||
.LATENCY (`LATENCY_FNCP),
|
||||
.OUT_REG (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
) fncp_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -62,7 +62,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0),
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0)
|
||||
.OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0)
|
||||
) pe_serializer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue