mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
critical path optimizations
This commit is contained in:
parent
b8fd2308e1
commit
a3a7239b4d
4 changed files with 287 additions and 60 deletions
|
@ -97,6 +97,7 @@ module VX_ibuffer #(
|
|||
reg [DATAW-1:0] deq_instr, deq_instr_n;
|
||||
reg [NWARPSW-1:0] num_warps;
|
||||
|
||||
// calculate valid table
|
||||
always @(*) begin
|
||||
valid_table_n = valid_table;
|
||||
if (deq_fire) begin
|
||||
|
@ -113,11 +114,10 @@ module VX_ibuffer #(
|
|||
deq_valid_n = 1;
|
||||
deq_wid_n = 'x;
|
||||
deq_instr_n = 'x;
|
||||
for (integer i = 0; i < `NUM_WARPS; i++) begin
|
||||
for (integer i = `NUM_WARPS-1; i >= 0; --i) begin
|
||||
if (schedule_table[i]) begin
|
||||
deq_wid_n = `NW_BITS'(i);
|
||||
deq_wid_n = `NW_BITS'(i);
|
||||
deq_instr_n = q_data_out[i];
|
||||
break;
|
||||
end
|
||||
end
|
||||
end else if (1 == num_warps && !(deq_fire && q_alm_empty[deq_wid])) begin
|
||||
|
@ -130,16 +130,16 @@ module VX_ibuffer #(
|
|||
deq_instr_n = q_data_in;
|
||||
end
|
||||
end
|
||||
|
||||
// do round-robin with multiple active warps
|
||||
|
||||
// do round-robin scheduling with multiple active warps
|
||||
always @(*) begin
|
||||
schedule_table_n = schedule_table;
|
||||
for (integer i = 0; i < `NUM_WARPS; i++) begin
|
||||
if (schedule_table[i]) begin
|
||||
schedule_table_n[i] = 0;
|
||||
break;
|
||||
end
|
||||
if (1 == $countones(schedule_table)
|
||||
|| (num_warps < 2)) begin
|
||||
schedule_table_n = valid_table_n;
|
||||
end else begin
|
||||
schedule_table_n = schedule_table;
|
||||
end
|
||||
schedule_table_n[deq_wid_n] = 0;
|
||||
end
|
||||
|
||||
wire warp_added = enq_fire && q_empty[ibuf_enq_if.wid];
|
||||
|
@ -148,21 +148,12 @@ module VX_ibuffer #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_table <= 0;
|
||||
schedule_table <= 0;
|
||||
deq_valid <= 0;
|
||||
num_warps <= 0;
|
||||
end else begin
|
||||
valid_table <= valid_table_n;
|
||||
|
||||
if (0 == (| schedule_table_n)
|
||||
|| (num_warps < 2)) begin
|
||||
schedule_table <= valid_table_n;
|
||||
schedule_table[deq_wid_n] <= 0;
|
||||
end else begin
|
||||
schedule_table <= schedule_table_n;
|
||||
end
|
||||
|
||||
deq_valid <= deq_valid_n;
|
||||
valid_table <= valid_table_n;
|
||||
deq_valid <= deq_valid_n;
|
||||
schedule_table <= schedule_table_n;
|
||||
|
||||
if (warp_added && !warp_removed) begin
|
||||
num_warps <= num_warps + NWARPSW'(1);
|
||||
|
|
|
@ -12,18 +12,11 @@ module VX_scoreboard #(
|
|||
);
|
||||
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs;
|
||||
|
||||
reg is_reg_busy;
|
||||
always @(*) begin
|
||||
is_reg_busy = 'x;
|
||||
for (integer i = 0; i < `NUM_WARPS; ++i) begin
|
||||
if (ibuf_deq_if.wid == `NW_BITS'(i)) begin
|
||||
is_reg_busy = | (inuse_regs[i] & ibuf_deq_if.used_regs);
|
||||
end
|
||||
end
|
||||
end
|
||||
assign delay = is_reg_busy;
|
||||
|
||||
wire reserve_reg = ibuf_deq_if.valid && ibuf_deq_if.ready && (ibuf_deq_if.wb != 0);
|
||||
wire [`NUM_REGS-1:0] deq_inuse_regs = inuse_regs[ibuf_deq_if.wid];
|
||||
|
||||
assign delay = | (deq_inuse_regs & ibuf_deq_if.used_regs);
|
||||
|
||||
wire reserve_reg = ibuf_deq_if.valid && ibuf_deq_if.ready && ibuf_deq_if.wb;
|
||||
|
||||
wire release_reg = writeback_if.valid && writeback_if.ready && writeback_if.eop;
|
||||
|
||||
|
@ -43,8 +36,6 @@ module VX_scoreboard #(
|
|||
end
|
||||
end
|
||||
|
||||
wire [`NUM_REGS-1:0] deq_inuse_regs = inuse_regs[ibuf_deq_if.wid];
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
|
||||
|
|
|
@ -4,8 +4,10 @@
|
|||
// Adapter from BaseJump STL: http://bjump.org/data_out.html
|
||||
|
||||
module VX_onehot_encoder #(
|
||||
parameter N = 1,
|
||||
parameter LN = `LOG2UP(N)
|
||||
parameter N = 1,
|
||||
parameter REVERSE = 0,
|
||||
parameter FAST = 1,
|
||||
parameter LN = `LOG2UP(N)
|
||||
) (
|
||||
input wire [N-1:0] data_in,
|
||||
output wire [LN-1:0] data_out,
|
||||
|
@ -18,14 +20,24 @@ module VX_onehot_encoder #(
|
|||
|
||||
end else if (N == 2) begin
|
||||
|
||||
assign data_out = data_in[1];
|
||||
assign data_out = data_in[!REVERSE];
|
||||
assign valid = (| data_in);
|
||||
|
||||
end else begin
|
||||
|
||||
end else if (N == 4) begin
|
||||
|
||||
reg [LN-1:0] index_r;
|
||||
|
||||
if (N == 4) begin
|
||||
if (REVERSE) begin
|
||||
always @(*) begin
|
||||
casez (data_in)
|
||||
4'b1000: index_r = LN'(0);
|
||||
4'b?100: index_r = LN'(1);
|
||||
4'b??10: index_r = LN'(2);
|
||||
4'b???1: index_r = LN'(3);
|
||||
default: index_r = 'x;
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
casez (data_in)
|
||||
4'b0001: index_r = LN'(0);
|
||||
|
@ -35,7 +47,30 @@ module VX_onehot_encoder #(
|
|||
default: index_r = 'x;
|
||||
endcase
|
||||
end
|
||||
end else if (N == 8) begin
|
||||
end
|
||||
|
||||
assign data_out = index_r;
|
||||
assign valid = (| data_in);
|
||||
|
||||
end else if (N == 8) begin
|
||||
|
||||
reg [LN-1:0] index_r;
|
||||
|
||||
if (REVERSE) begin
|
||||
always @(*) begin
|
||||
casez (data_in)
|
||||
8'b10000000: index_r = LN'(0);
|
||||
8'b?1000000: index_r = LN'(1);
|
||||
8'b??100000: index_r = LN'(2);
|
||||
8'b???10000: index_r = LN'(3);
|
||||
8'b????1000: index_r = LN'(4);
|
||||
8'b?????100: index_r = LN'(5);
|
||||
8'b??????10: index_r = LN'(6);
|
||||
8'b???????1: index_r = LN'(7);
|
||||
default: index_r = 'x;
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
casez (data_in)
|
||||
8'b00000001: index_r = LN'(0);
|
||||
|
@ -49,7 +84,38 @@ module VX_onehot_encoder #(
|
|||
default: index_r = 'x;
|
||||
endcase
|
||||
end
|
||||
end else if (N == 16) begin
|
||||
end
|
||||
|
||||
assign data_out = index_r;
|
||||
assign valid = (| data_in);
|
||||
|
||||
end else if (N == 16) begin
|
||||
|
||||
reg [LN-1:0] index_r;
|
||||
|
||||
if (REVERSE) begin
|
||||
always @(*) begin
|
||||
casez (data_in)
|
||||
16'b1000000000000000: index_r = LN'(0);
|
||||
16'b?100000000000000: index_r = LN'(1);
|
||||
16'b??10000000000000: index_r = LN'(2);
|
||||
16'b???1000000000000: index_r = LN'(3);
|
||||
16'b????100000000000: index_r = LN'(4);
|
||||
16'b?????10000000000: index_r = LN'(5);
|
||||
16'b??????1000000000: index_r = LN'(6);
|
||||
16'b???????100000000: index_r = LN'(7);
|
||||
16'b????????10000000: index_r = LN'(8);
|
||||
16'b?????????1000000: index_r = LN'(9);
|
||||
16'b??????????100000: index_r = LN'(10);
|
||||
16'b???????????10000: index_r = LN'(11);
|
||||
16'b????????????1000: index_r = LN'(12);
|
||||
16'b?????????????100: index_r = LN'(13);
|
||||
16'b??????????????10: index_r = LN'(14);
|
||||
16'b???????????????1: index_r = LN'(15);
|
||||
default: index_r = 'x;
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
casez (data_in)
|
||||
16'b0000000000000001: index_r = LN'(0);
|
||||
|
@ -71,7 +137,66 @@ module VX_onehot_encoder #(
|
|||
default: index_r = 'x;
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
end
|
||||
|
||||
assign data_out = index_r;
|
||||
assign valid = (| data_in);
|
||||
|
||||
end if (FAST) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
localparam levels_lp = $clog2(N);
|
||||
localparam aligned_width_lp = 1 << $clog2(N);
|
||||
|
||||
wire [levels_lp:0][aligned_width_lp-1:0] addr;
|
||||
wire [levels_lp:0][aligned_width_lp-1:0] v;
|
||||
|
||||
// base case, also handle padding for non-power of two inputs
|
||||
assign v[0] = REVERSE ? (data_in << (aligned_width_lp - N)) : ((aligned_width_lp)'(data_in));
|
||||
assign addr[0] = 'x;
|
||||
|
||||
for (genvar level = 1; level < levels_lp+1; level=level+1) begin
|
||||
localparam segments_lp = 2**(levels_lp-level);
|
||||
localparam segment_slot_lp = aligned_width_lp/segments_lp;
|
||||
localparam segment_width_lp = level; // how many bits are needed at each level
|
||||
|
||||
for (genvar segment = 0; segment < segments_lp; segment=segment+1) begin
|
||||
wire [1:0] vs = {
|
||||
v[level-1][segment*segment_slot_lp+(segment_slot_lp >> 1)],
|
||||
v[level-1][segment*segment_slot_lp]
|
||||
};
|
||||
|
||||
assign v[level][segment*segment_slot_lp] = (| vs);
|
||||
|
||||
if (level == 1) begin
|
||||
assign addr[level][(segment*segment_slot_lp)+:segment_width_lp] = vs[!REVERSE];
|
||||
end else begin
|
||||
assign addr[level][(segment*segment_slot_lp)+:segment_width_lp] = {
|
||||
vs[!REVERSE],
|
||||
addr[level-1][segment*segment_slot_lp+:segment_width_lp-1] | addr[level-1][segment*segment_slot_lp+(segment_slot_lp >> 1)+:segment_width_lp-1]
|
||||
};
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = addr[levels_lp][`LOG2UP(N)-1:0];
|
||||
assign valid = v[levels_lp][0];
|
||||
`IGNORE_WARNINGS_END
|
||||
end else begin
|
||||
|
||||
reg [LN-1:0] index_r;
|
||||
|
||||
if (REVERSE) begin
|
||||
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
for (integer i = N-1; i >= 0; --i) begin
|
||||
if (data_in[i]) begin
|
||||
index_r = `LOG2UP(N)'(i);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
for (integer i = 0; i < N; i++) begin
|
||||
|
@ -84,7 +209,6 @@ module VX_onehot_encoder #(
|
|||
|
||||
assign data_out = index_r;
|
||||
assign valid = (| data_in);
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -1,9 +1,10 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
module VX_priority_encoder #(
|
||||
parameter N = 1,
|
||||
parameter FAST = 1,
|
||||
parameter LN = `LOG2UP(N)
|
||||
parameter N = 1,
|
||||
parameter REVERSE = 0,
|
||||
parameter FAST = 1,
|
||||
parameter LN = `LOG2UP(N)
|
||||
) (
|
||||
input wire [N-1:0] data_in,
|
||||
output wire [N-1:0] onehot,
|
||||
|
@ -19,16 +20,26 @@ module VX_priority_encoder #(
|
|||
|
||||
end else if (N == 2) begin
|
||||
|
||||
assign onehot = {~data_in[0], data_in[0]};
|
||||
assign index = ~data_in[0];
|
||||
assign onehot = {~data_in[REVERSE], data_in[REVERSE]};
|
||||
assign index = ~data_in[REVERSE];
|
||||
assign valid_out = (| data_in);
|
||||
|
||||
end else begin
|
||||
|
||||
end else if (N == 4) begin
|
||||
|
||||
reg [LN-1:0] index_r;
|
||||
reg [N-1:0] onehot_r;
|
||||
|
||||
if (N == 4) begin
|
||||
if (REVERSE) begin
|
||||
always @(*) begin
|
||||
casez (data_in)
|
||||
4'b1???: begin onehot_r = 4'b0001; index_r = LN'(0); end
|
||||
4'b01??: begin onehot_r = 4'b0010; index_r = LN'(1); end
|
||||
4'b001?: begin onehot_r = 4'b0100; index_r = LN'(2); end
|
||||
4'b0001: begin onehot_r = 4'b1000; index_r = LN'(3); end
|
||||
default: begin onehot_r = 'x; index_r = 'x; end
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
casez (data_in)
|
||||
4'b???1: begin onehot_r = 4'b0001; index_r = LN'(0); end
|
||||
|
@ -38,7 +49,31 @@ module VX_priority_encoder #(
|
|||
default: begin onehot_r = 'x; index_r = 'x; end
|
||||
endcase
|
||||
end
|
||||
end else if (N == 8) begin
|
||||
end
|
||||
|
||||
assign index = index_r;
|
||||
assign onehot = onehot_r;
|
||||
|
||||
end else if (N == 8) begin
|
||||
|
||||
reg [LN-1:0] index_r;
|
||||
reg [N-1:0] onehot_r;
|
||||
|
||||
if (REVERSE) begin
|
||||
always @(*) begin
|
||||
casez (data_in)
|
||||
8'b1???????: begin onehot_r = 8'b00000001; index_r = LN'(0); end
|
||||
8'b01??????: begin onehot_r = 8'b00000010; index_r = LN'(1); end
|
||||
8'b001?????: begin onehot_r = 8'b00000100; index_r = LN'(2); end
|
||||
8'b0001????: begin onehot_r = 8'b00001000; index_r = LN'(3); end
|
||||
8'b00001???: begin onehot_r = 8'b00010000; index_r = LN'(4); end
|
||||
8'b000001??: begin onehot_r = 8'b00100000; index_r = LN'(5); end
|
||||
8'b0000001?: begin onehot_r = 8'b01000000; index_r = LN'(6); end
|
||||
8'b00000001: begin onehot_r = 8'b10000000; index_r = LN'(7); end
|
||||
default: begin onehot_r = 'x; index_r = 'x; end
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
casez (data_in)
|
||||
8'b???????1: begin onehot_r = 8'b00000001; index_r = LN'(0); end
|
||||
|
@ -52,7 +87,39 @@ module VX_priority_encoder #(
|
|||
default: begin onehot_r = 'x; index_r = 'x; end
|
||||
endcase
|
||||
end
|
||||
end else if (N == 16) begin
|
||||
end
|
||||
|
||||
assign index = index_r;
|
||||
assign onehot = onehot_r;
|
||||
|
||||
end else if (N == 16) begin
|
||||
|
||||
reg [LN-1:0] index_r;
|
||||
reg [N-1:0] onehot_r;
|
||||
|
||||
if (REVERSE) begin
|
||||
always @(*) begin
|
||||
casez (data_in)
|
||||
16'b1???????????????: begin onehot_r = 16'b0000000000000001; index_r = LN'(0); end
|
||||
16'b01??????????????: begin onehot_r = 16'b0000000000000010; index_r = LN'(1); end
|
||||
16'b001?????????????: begin onehot_r = 16'b0000000000000100; index_r = LN'(2); end
|
||||
16'b0001????????????: begin onehot_r = 16'b0000000000001000; index_r = LN'(3); end
|
||||
16'b00001???????????: begin onehot_r = 16'b0000000000010000; index_r = LN'(4); end
|
||||
16'b000001??????????: begin onehot_r = 16'b0000000000100000; index_r = LN'(5); end
|
||||
16'b0000001?????????: begin onehot_r = 16'b0000000001000000; index_r = LN'(6); end
|
||||
16'b00000001????????: begin onehot_r = 16'b0000000010000000; index_r = LN'(7); end
|
||||
16'b000000001???????: begin onehot_r = 16'b0000000100000000; index_r = LN'(8); end
|
||||
16'b0000000001??????: begin onehot_r = 16'b0000001000000000; index_r = LN'(9); end
|
||||
16'b00000000001?????: begin onehot_r = 16'b0000010000000000; index_r = LN'(10); end
|
||||
16'b000000000001????: begin onehot_r = 16'b0000100000000000; index_r = LN'(11); end
|
||||
16'b0000000000001???: begin onehot_r = 16'b0001000000000000; index_r = LN'(12); end
|
||||
16'b00000000000001??: begin onehot_r = 16'b0010000000000000; index_r = LN'(13); end
|
||||
16'b000000000000001?: begin onehot_r = 16'b0100000000000000; index_r = LN'(14); end
|
||||
16'b0000000000000001: begin onehot_r = 16'b1000000000000000; index_r = LN'(15); end
|
||||
default: begin onehot_r = 'x; index_r = 'x; end
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
casez (data_in)
|
||||
16'b???????????????1: begin onehot_r = 16'b0000000000000001; index_r = LN'(0); end
|
||||
|
@ -74,6 +141,58 @@ module VX_priority_encoder #(
|
|||
default: begin onehot_r = 'x; index_r = 'x; end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign index = index_r;
|
||||
assign onehot = onehot_r;
|
||||
|
||||
end else if (FAST) begin
|
||||
|
||||
wire [N-1:0] scan_lo;
|
||||
|
||||
VX_scan #(
|
||||
.N (N),
|
||||
.OP (2),
|
||||
.REVERSE (REVERSE)
|
||||
) scan (
|
||||
.data_in (data_in),
|
||||
.data_out (scan_lo)
|
||||
);
|
||||
|
||||
if (REVERSE) begin
|
||||
assign onehot = scan_lo & {1'b1, (~scan_lo[N-1:1])};
|
||||
assign valid_out = scan_lo[0];
|
||||
end else begin
|
||||
assign onehot = scan_lo & {(~scan_lo[N-2:0]), 1'b1};
|
||||
assign valid_out = scan_lo[N-1];
|
||||
end
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (N),
|
||||
.REVERSE (REVERSE)
|
||||
) onehot_encoder (
|
||||
.data_in (onehot),
|
||||
.data_out (index),
|
||||
`UNUSED_PIN (valid)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
reg [LN-1:0] index_r;
|
||||
reg [N-1:0] onehot_r;
|
||||
|
||||
if (REVERSE) begin
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
onehot_r = 'x;
|
||||
for (integer i = 0; i < N; ++i) begin
|
||||
if (data_in[i]) begin
|
||||
index_r = LN'(i);
|
||||
onehot_r = 0;
|
||||
onehot_r[i] = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
|
@ -86,11 +205,13 @@ module VX_priority_encoder #(
|
|||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign index = index_r;
|
||||
assign onehot = onehot_r;
|
||||
assign valid_out = (| data_in);
|
||||
assign onehot = onehot_r;
|
||||
|
||||
end
|
||||
|
||||
assign valid_out = (| data_in);
|
||||
|
||||
endmodule
|
Loading…
Add table
Add a link
Reference in a new issue