critical path optimizations

This commit is contained in:
Blaise Tine 2021-06-23 01:51:23 -07:00
parent b8fd2308e1
commit a3a7239b4d
4 changed files with 287 additions and 60 deletions

View file

@ -97,6 +97,7 @@ module VX_ibuffer #(
reg [DATAW-1:0] deq_instr, deq_instr_n;
reg [NWARPSW-1:0] num_warps;
// calculate valid table
always @(*) begin
valid_table_n = valid_table;
if (deq_fire) begin
@ -113,11 +114,10 @@ module VX_ibuffer #(
deq_valid_n = 1;
deq_wid_n = 'x;
deq_instr_n = 'x;
for (integer i = 0; i < `NUM_WARPS; i++) begin
for (integer i = `NUM_WARPS-1; i >= 0; --i) begin
if (schedule_table[i]) begin
deq_wid_n = `NW_BITS'(i);
deq_wid_n = `NW_BITS'(i);
deq_instr_n = q_data_out[i];
break;
end
end
end else if (1 == num_warps && !(deq_fire && q_alm_empty[deq_wid])) begin
@ -130,16 +130,16 @@ module VX_ibuffer #(
deq_instr_n = q_data_in;
end
end
// do round-robin with multiple active warps
// do round-robin scheduling with multiple active warps
always @(*) begin
schedule_table_n = schedule_table;
for (integer i = 0; i < `NUM_WARPS; i++) begin
if (schedule_table[i]) begin
schedule_table_n[i] = 0;
break;
end
if (1 == $countones(schedule_table)
|| (num_warps < 2)) begin
schedule_table_n = valid_table_n;
end else begin
schedule_table_n = schedule_table;
end
schedule_table_n[deq_wid_n] = 0;
end
wire warp_added = enq_fire && q_empty[ibuf_enq_if.wid];
@ -148,21 +148,12 @@ module VX_ibuffer #(
always @(posedge clk) begin
if (reset) begin
valid_table <= 0;
schedule_table <= 0;
deq_valid <= 0;
num_warps <= 0;
end else begin
valid_table <= valid_table_n;
if (0 == (| schedule_table_n)
|| (num_warps < 2)) begin
schedule_table <= valid_table_n;
schedule_table[deq_wid_n] <= 0;
end else begin
schedule_table <= schedule_table_n;
end
deq_valid <= deq_valid_n;
valid_table <= valid_table_n;
deq_valid <= deq_valid_n;
schedule_table <= schedule_table_n;
if (warp_added && !warp_removed) begin
num_warps <= num_warps + NWARPSW'(1);

View file

@ -12,18 +12,11 @@ module VX_scoreboard #(
);
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs;
reg is_reg_busy;
always @(*) begin
is_reg_busy = 'x;
for (integer i = 0; i < `NUM_WARPS; ++i) begin
if (ibuf_deq_if.wid == `NW_BITS'(i)) begin
is_reg_busy = | (inuse_regs[i] & ibuf_deq_if.used_regs);
end
end
end
assign delay = is_reg_busy;
wire reserve_reg = ibuf_deq_if.valid && ibuf_deq_if.ready && (ibuf_deq_if.wb != 0);
wire [`NUM_REGS-1:0] deq_inuse_regs = inuse_regs[ibuf_deq_if.wid];
assign delay = | (deq_inuse_regs & ibuf_deq_if.used_regs);
wire reserve_reg = ibuf_deq_if.valid && ibuf_deq_if.ready && ibuf_deq_if.wb;
wire release_reg = writeback_if.valid && writeback_if.ready && writeback_if.eop;
@ -43,8 +36,6 @@ module VX_scoreboard #(
end
end
wire [`NUM_REGS-1:0] deq_inuse_regs = inuse_regs[ibuf_deq_if.wid];
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin

View file

@ -4,8 +4,10 @@
// Adapter from BaseJump STL: http://bjump.org/data_out.html
module VX_onehot_encoder #(
parameter N = 1,
parameter LN = `LOG2UP(N)
parameter N = 1,
parameter REVERSE = 0,
parameter FAST = 1,
parameter LN = `LOG2UP(N)
) (
input wire [N-1:0] data_in,
output wire [LN-1:0] data_out,
@ -18,14 +20,24 @@ module VX_onehot_encoder #(
end else if (N == 2) begin
assign data_out = data_in[1];
assign data_out = data_in[!REVERSE];
assign valid = (| data_in);
end else begin
end else if (N == 4) begin
reg [LN-1:0] index_r;
if (N == 4) begin
if (REVERSE) begin
always @(*) begin
casez (data_in)
4'b1000: index_r = LN'(0);
4'b?100: index_r = LN'(1);
4'b??10: index_r = LN'(2);
4'b???1: index_r = LN'(3);
default: index_r = 'x;
endcase
end
end else begin
always @(*) begin
casez (data_in)
4'b0001: index_r = LN'(0);
@ -35,7 +47,30 @@ module VX_onehot_encoder #(
default: index_r = 'x;
endcase
end
end else if (N == 8) begin
end
assign data_out = index_r;
assign valid = (| data_in);
end else if (N == 8) begin
reg [LN-1:0] index_r;
if (REVERSE) begin
always @(*) begin
casez (data_in)
8'b10000000: index_r = LN'(0);
8'b?1000000: index_r = LN'(1);
8'b??100000: index_r = LN'(2);
8'b???10000: index_r = LN'(3);
8'b????1000: index_r = LN'(4);
8'b?????100: index_r = LN'(5);
8'b??????10: index_r = LN'(6);
8'b???????1: index_r = LN'(7);
default: index_r = 'x;
endcase
end
end else begin
always @(*) begin
casez (data_in)
8'b00000001: index_r = LN'(0);
@ -49,7 +84,38 @@ module VX_onehot_encoder #(
default: index_r = 'x;
endcase
end
end else if (N == 16) begin
end
assign data_out = index_r;
assign valid = (| data_in);
end else if (N == 16) begin
reg [LN-1:0] index_r;
if (REVERSE) begin
always @(*) begin
casez (data_in)
16'b1000000000000000: index_r = LN'(0);
16'b?100000000000000: index_r = LN'(1);
16'b??10000000000000: index_r = LN'(2);
16'b???1000000000000: index_r = LN'(3);
16'b????100000000000: index_r = LN'(4);
16'b?????10000000000: index_r = LN'(5);
16'b??????1000000000: index_r = LN'(6);
16'b???????100000000: index_r = LN'(7);
16'b????????10000000: index_r = LN'(8);
16'b?????????1000000: index_r = LN'(9);
16'b??????????100000: index_r = LN'(10);
16'b???????????10000: index_r = LN'(11);
16'b????????????1000: index_r = LN'(12);
16'b?????????????100: index_r = LN'(13);
16'b??????????????10: index_r = LN'(14);
16'b???????????????1: index_r = LN'(15);
default: index_r = 'x;
endcase
end
end else begin
always @(*) begin
casez (data_in)
16'b0000000000000001: index_r = LN'(0);
@ -71,7 +137,66 @@ module VX_onehot_encoder #(
default: index_r = 'x;
endcase
end
end else begin
end
assign data_out = index_r;
assign valid = (| data_in);
end if (FAST) begin
`IGNORE_WARNINGS_BEGIN
localparam levels_lp = $clog2(N);
localparam aligned_width_lp = 1 << $clog2(N);
wire [levels_lp:0][aligned_width_lp-1:0] addr;
wire [levels_lp:0][aligned_width_lp-1:0] v;
// base case, also handle padding for non-power of two inputs
assign v[0] = REVERSE ? (data_in << (aligned_width_lp - N)) : ((aligned_width_lp)'(data_in));
assign addr[0] = 'x;
for (genvar level = 1; level < levels_lp+1; level=level+1) begin
localparam segments_lp = 2**(levels_lp-level);
localparam segment_slot_lp = aligned_width_lp/segments_lp;
localparam segment_width_lp = level; // how many bits are needed at each level
for (genvar segment = 0; segment < segments_lp; segment=segment+1) begin
wire [1:0] vs = {
v[level-1][segment*segment_slot_lp+(segment_slot_lp >> 1)],
v[level-1][segment*segment_slot_lp]
};
assign v[level][segment*segment_slot_lp] = (| vs);
if (level == 1) begin
assign addr[level][(segment*segment_slot_lp)+:segment_width_lp] = vs[!REVERSE];
end else begin
assign addr[level][(segment*segment_slot_lp)+:segment_width_lp] = {
vs[!REVERSE],
addr[level-1][segment*segment_slot_lp+:segment_width_lp-1] | addr[level-1][segment*segment_slot_lp+(segment_slot_lp >> 1)+:segment_width_lp-1]
};
end
end
end
assign data_out = addr[levels_lp][`LOG2UP(N)-1:0];
assign valid = v[levels_lp][0];
`IGNORE_WARNINGS_END
end else begin
reg [LN-1:0] index_r;
if (REVERSE) begin
always @(*) begin
index_r = 'x;
for (integer i = N-1; i >= 0; --i) begin
if (data_in[i]) begin
index_r = `LOG2UP(N)'(i);
end
end
end
end else begin
always @(*) begin
index_r = 'x;
for (integer i = 0; i < N; i++) begin
@ -84,7 +209,6 @@ module VX_onehot_encoder #(
assign data_out = index_r;
assign valid = (| data_in);
end
endmodule

View file

@ -1,9 +1,10 @@
`include "VX_platform.vh"
module VX_priority_encoder #(
parameter N = 1,
parameter FAST = 1,
parameter LN = `LOG2UP(N)
parameter N = 1,
parameter REVERSE = 0,
parameter FAST = 1,
parameter LN = `LOG2UP(N)
) (
input wire [N-1:0] data_in,
output wire [N-1:0] onehot,
@ -19,16 +20,26 @@ module VX_priority_encoder #(
end else if (N == 2) begin
assign onehot = {~data_in[0], data_in[0]};
assign index = ~data_in[0];
assign onehot = {~data_in[REVERSE], data_in[REVERSE]};
assign index = ~data_in[REVERSE];
assign valid_out = (| data_in);
end else begin
end else if (N == 4) begin
reg [LN-1:0] index_r;
reg [N-1:0] onehot_r;
if (N == 4) begin
if (REVERSE) begin
always @(*) begin
casez (data_in)
4'b1???: begin onehot_r = 4'b0001; index_r = LN'(0); end
4'b01??: begin onehot_r = 4'b0010; index_r = LN'(1); end
4'b001?: begin onehot_r = 4'b0100; index_r = LN'(2); end
4'b0001: begin onehot_r = 4'b1000; index_r = LN'(3); end
default: begin onehot_r = 'x; index_r = 'x; end
endcase
end
end else begin
always @(*) begin
casez (data_in)
4'b???1: begin onehot_r = 4'b0001; index_r = LN'(0); end
@ -38,7 +49,31 @@ module VX_priority_encoder #(
default: begin onehot_r = 'x; index_r = 'x; end
endcase
end
end else if (N == 8) begin
end
assign index = index_r;
assign onehot = onehot_r;
end else if (N == 8) begin
reg [LN-1:0] index_r;
reg [N-1:0] onehot_r;
if (REVERSE) begin
always @(*) begin
casez (data_in)
8'b1???????: begin onehot_r = 8'b00000001; index_r = LN'(0); end
8'b01??????: begin onehot_r = 8'b00000010; index_r = LN'(1); end
8'b001?????: begin onehot_r = 8'b00000100; index_r = LN'(2); end
8'b0001????: begin onehot_r = 8'b00001000; index_r = LN'(3); end
8'b00001???: begin onehot_r = 8'b00010000; index_r = LN'(4); end
8'b000001??: begin onehot_r = 8'b00100000; index_r = LN'(5); end
8'b0000001?: begin onehot_r = 8'b01000000; index_r = LN'(6); end
8'b00000001: begin onehot_r = 8'b10000000; index_r = LN'(7); end
default: begin onehot_r = 'x; index_r = 'x; end
endcase
end
end else begin
always @(*) begin
casez (data_in)
8'b???????1: begin onehot_r = 8'b00000001; index_r = LN'(0); end
@ -52,7 +87,39 @@ module VX_priority_encoder #(
default: begin onehot_r = 'x; index_r = 'x; end
endcase
end
end else if (N == 16) begin
end
assign index = index_r;
assign onehot = onehot_r;
end else if (N == 16) begin
reg [LN-1:0] index_r;
reg [N-1:0] onehot_r;
if (REVERSE) begin
always @(*) begin
casez (data_in)
16'b1???????????????: begin onehot_r = 16'b0000000000000001; index_r = LN'(0); end
16'b01??????????????: begin onehot_r = 16'b0000000000000010; index_r = LN'(1); end
16'b001?????????????: begin onehot_r = 16'b0000000000000100; index_r = LN'(2); end
16'b0001????????????: begin onehot_r = 16'b0000000000001000; index_r = LN'(3); end
16'b00001???????????: begin onehot_r = 16'b0000000000010000; index_r = LN'(4); end
16'b000001??????????: begin onehot_r = 16'b0000000000100000; index_r = LN'(5); end
16'b0000001?????????: begin onehot_r = 16'b0000000001000000; index_r = LN'(6); end
16'b00000001????????: begin onehot_r = 16'b0000000010000000; index_r = LN'(7); end
16'b000000001???????: begin onehot_r = 16'b0000000100000000; index_r = LN'(8); end
16'b0000000001??????: begin onehot_r = 16'b0000001000000000; index_r = LN'(9); end
16'b00000000001?????: begin onehot_r = 16'b0000010000000000; index_r = LN'(10); end
16'b000000000001????: begin onehot_r = 16'b0000100000000000; index_r = LN'(11); end
16'b0000000000001???: begin onehot_r = 16'b0001000000000000; index_r = LN'(12); end
16'b00000000000001??: begin onehot_r = 16'b0010000000000000; index_r = LN'(13); end
16'b000000000000001?: begin onehot_r = 16'b0100000000000000; index_r = LN'(14); end
16'b0000000000000001: begin onehot_r = 16'b1000000000000000; index_r = LN'(15); end
default: begin onehot_r = 'x; index_r = 'x; end
endcase
end
end else begin
always @(*) begin
casez (data_in)
16'b???????????????1: begin onehot_r = 16'b0000000000000001; index_r = LN'(0); end
@ -74,6 +141,58 @@ module VX_priority_encoder #(
default: begin onehot_r = 'x; index_r = 'x; end
endcase
end
end
assign index = index_r;
assign onehot = onehot_r;
end else if (FAST) begin
wire [N-1:0] scan_lo;
VX_scan #(
.N (N),
.OP (2),
.REVERSE (REVERSE)
) scan (
.data_in (data_in),
.data_out (scan_lo)
);
if (REVERSE) begin
assign onehot = scan_lo & {1'b1, (~scan_lo[N-1:1])};
assign valid_out = scan_lo[0];
end else begin
assign onehot = scan_lo & {(~scan_lo[N-2:0]), 1'b1};
assign valid_out = scan_lo[N-1];
end
VX_onehot_encoder #(
.N (N),
.REVERSE (REVERSE)
) onehot_encoder (
.data_in (onehot),
.data_out (index),
`UNUSED_PIN (valid)
);
end else begin
reg [LN-1:0] index_r;
reg [N-1:0] onehot_r;
if (REVERSE) begin
always @(*) begin
index_r = 'x;
onehot_r = 'x;
for (integer i = 0; i < N; ++i) begin
if (data_in[i]) begin
index_r = LN'(i);
onehot_r = 0;
onehot_r[i] = 1'b1;
end
end
end
end else begin
always @(*) begin
index_r = 'x;
@ -86,11 +205,13 @@ module VX_priority_encoder #(
end
end
end
end
end
assign index = index_r;
assign onehot = onehot_r;
assign valid_out = (| data_in);
assign onehot = onehot_r;
end
assign valid_out = (| data_in);
endmodule