mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
critical path optimizations
This commit is contained in:
parent
73d102afed
commit
feca2db24e
4 changed files with 49 additions and 50 deletions
12
hw/rtl/cache/VX_nc_bypass.v
vendored
12
hw/rtl/cache/VX_nc_bypass.v
vendored
|
@ -107,6 +107,7 @@ module VX_nc_bypass #(
|
|||
wire [NUM_REQS-1:0] core_req_valid_in_nc;
|
||||
wire [NUM_REQS-1:0] core_req_nc_tids;
|
||||
wire [`UP(CORE_REQ_TIDW)-1:0] core_req_nc_tid;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire core_req_nc_valid;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
@ -115,12 +116,13 @@ module VX_nc_bypass #(
|
|||
|
||||
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_tids;
|
||||
|
||||
VX_lzc #(
|
||||
VX_priority_encoder #(
|
||||
.N (NUM_REQS)
|
||||
) core_req_sel (
|
||||
.in_i (core_req_valid_in_nc),
|
||||
.cnt_o (core_req_nc_tid),
|
||||
.valid_o (core_req_nc_valid)
|
||||
.data_in (core_req_valid_in_nc),
|
||||
.index (core_req_nc_tid),
|
||||
.onehot (core_req_nc_sel),
|
||||
.valid_out (core_req_nc_valid)
|
||||
);
|
||||
|
||||
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids;
|
||||
|
@ -143,7 +145,7 @@ module VX_nc_bypass #(
|
|||
if (NUM_REQS > 1) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ?
|
||||
(~mem_req_valid_in && mem_req_ready_out && (core_req_nc_tid == i)) : core_req_ready_out[i];
|
||||
(~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]) : core_req_ready_out[i];
|
||||
end
|
||||
end else begin
|
||||
assign core_req_ready_in = core_req_valid_in_nc ? (~mem_req_valid_in && mem_req_ready_out) : core_req_ready_out;
|
||||
|
|
|
@ -180,39 +180,35 @@ module VX_fp_cvt #(
|
|||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
|
||||
wire [LANES-1:0] of_before_round_s1;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
|
||||
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
|
||||
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
|
||||
reg of_before_round;
|
||||
|
||||
// Rebias the exponent
|
||||
assign destination_exp = input_exp_s1[i] + EXP_BIAS;
|
||||
|
||||
always @(*) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Default assignment
|
||||
final_exp = destination_exp; // take exponent as is, only look at lower bits
|
||||
preshift_mant = {input_mant_s1[i], 33'b0}; // Place mantissa to the left of the shifter
|
||||
final_exp = input_exp_s1[i] + EXP_BIAS; // take exponent as is, only look at lower bits
|
||||
preshift_mant = {input_mant_s1[i], 33'b0}; // Place mantissa to the left of the shifter
|
||||
denorm_shamt = 0; // right of mantissa
|
||||
of_before_round = 1'b0;
|
||||
|
||||
// Handle INT casts
|
||||
if (is_itof_s1) begin
|
||||
if ($signed(destination_exp) >= $signed(2**EXP_BITS-1)) begin
|
||||
if ($signed(input_exp_s1[i]) >= $signed(2**EXP_BITS-1-EXP_BIAS)) begin
|
||||
// Overflow or infinities (for proper rounding)
|
||||
final_exp = (2**EXP_BITS-2); // largest normal value
|
||||
preshift_mant = ~0; // largest normal value and RS bits set
|
||||
of_before_round = 1'b1;
|
||||
end else if ($signed(destination_exp) < $signed(-MAN_BITS)) begin
|
||||
end else if ($signed(input_exp_s1[i]) < $signed(-MAN_BITS-EXP_BIAS)) begin
|
||||
// Limit the shift to retain sticky bits
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
|
||||
end else if ($signed(destination_exp) < $signed(1)) begin
|
||||
denorm_shamt = (2 + MAN_BITS); // to sticky
|
||||
end else if ($signed(input_exp_s1[i]) < $signed(1-EXP_BIAS)) begin
|
||||
// Denormalize underflowing values
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + 1 - destination_exp; // adjust right shifting
|
||||
denorm_shamt = (1-EXP_BIAS) - input_exp_s1[i]; // adjust right shifting
|
||||
end
|
||||
end else begin
|
||||
if ($signed(input_exp_s1[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s1)) begin
|
||||
|
@ -221,7 +217,7 @@ module VX_fp_cvt #(
|
|||
of_before_round = 1'b1;
|
||||
end else if ($signed(input_exp_s1[i]) < $signed(-1)) begin
|
||||
// underflow
|
||||
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
|
||||
denorm_shamt = MAX_INT_WIDTH+1; // all bits go to the sticky
|
||||
end else begin
|
||||
// By default right shift mantissa to be an integer
|
||||
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp_s1[i];
|
||||
|
|
|
@ -46,17 +46,17 @@ module VX_priority_encoder #(
|
|||
.data_out (scan_lo)
|
||||
);
|
||||
|
||||
VX_lzc #(
|
||||
.N (N)
|
||||
) lzc (
|
||||
.in_i (reversed),
|
||||
.cnt_o (index),
|
||||
`UNUSED_PIN (valid_o)
|
||||
);
|
||||
|
||||
assign onehot = scan_lo & {(~scan_lo[N-2:0]), 1'b1};
|
||||
assign valid_out = scan_lo[N-1];
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (N)
|
||||
) onehot_encoder (
|
||||
.data_in (onehot),
|
||||
.data_out (index),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
end else if (MODEL == 2) begin
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
|
@ -66,30 +66,26 @@ module VX_priority_encoder #(
|
|||
assign higher_pri_regs[0] = 1'b0;
|
||||
assign onehot[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0];
|
||||
|
||||
VX_onehot_encoder #(
|
||||
VX_lzc #(
|
||||
.N (N)
|
||||
) onehot_encoder (
|
||||
.data_in (onehot),
|
||||
.data_out (index),
|
||||
`UNUSED_PIN (valid_out)
|
||||
) lzc (
|
||||
.in_i (reversed),
|
||||
.cnt_o (index),
|
||||
.valid_o (valid_out)
|
||||
);
|
||||
|
||||
assign valid_out = (| reversed);
|
||||
|
||||
end else if (MODEL == 3) begin
|
||||
|
||||
assign onehot = reversed & ~(reversed-1);
|
||||
|
||||
VX_onehot_encoder #(
|
||||
VX_lzc #(
|
||||
.N (N)
|
||||
) onehot_encoder (
|
||||
.data_in (onehot),
|
||||
.data_out (index),
|
||||
`UNUSED_PIN (valid_out)
|
||||
) lzc (
|
||||
.in_i (reversed),
|
||||
.cnt_o (index),
|
||||
.valid_o (valid_out)
|
||||
);
|
||||
|
||||
assign valid_out = (| reversed);
|
||||
|
||||
end else begin
|
||||
|
||||
reg [LN-1:0] index_r;
|
||||
|
|
|
@ -25,6 +25,7 @@ module VX_stream_arbiter #(
|
|||
wire sel_valid;
|
||||
wire sel_ready;
|
||||
wire [LOG_NUM_REQS-1:0] sel_index;
|
||||
wire [NUM_REQS-1:0] sel_onehot;
|
||||
|
||||
wire [NUM_REQS-1:0] valid_in_any;
|
||||
wire [LANES-1:0] ready_in_sel;
|
||||
|
@ -42,13 +43,17 @@ module VX_stream_arbiter #(
|
|||
end
|
||||
|
||||
if (TYPE == "P") begin
|
||||
`UNUSED_VAR (sel_ready)
|
||||
VX_lzc #(
|
||||
.N (NUM_REQS)
|
||||
VX_fixed_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (LOCK_ENABLE)
|
||||
) sel_arb (
|
||||
.in_i (valid_in_any),
|
||||
.cnt_o (sel_index),
|
||||
.valid_o (sel_valid)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_in_any),
|
||||
.enable (sel_ready),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_index),
|
||||
.grant_onehot (sel_onehot)
|
||||
);
|
||||
end else if (TYPE == "R") begin
|
||||
VX_rr_arbiter #(
|
||||
|
@ -61,7 +66,7 @@ module VX_stream_arbiter #(
|
|||
.enable (sel_ready),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_index),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
.grant_onehot (sel_onehot)
|
||||
);
|
||||
end else if (TYPE == "F") begin
|
||||
VX_fair_arbiter #(
|
||||
|
@ -74,7 +79,7 @@ module VX_stream_arbiter #(
|
|||
.enable (sel_ready),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_index),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
.grant_onehot (sel_onehot)
|
||||
);
|
||||
end else if (TYPE == "M") begin
|
||||
VX_matrix_arbiter #(
|
||||
|
@ -87,7 +92,7 @@ module VX_stream_arbiter #(
|
|||
.enable (sel_ready),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_index),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
.grant_onehot (sel_onehot)
|
||||
);
|
||||
end else begin
|
||||
$error ("invalid parameter");
|
||||
|
@ -109,7 +114,7 @@ module VX_stream_arbiter #(
|
|||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign ready_in[i] = ready_in_sel & {LANES{(sel_index == LOG_NUM_REQS'(i))}};
|
||||
assign ready_in[i] = ready_in_sel & {LANES{sel_onehot[i]}};
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue