bus arbiters refactoring

This commit is contained in:
Blaise Tine 2021-07-30 16:00:09 -07:00
parent b6596494ff
commit 3d19588e57
5 changed files with 330 additions and 146 deletions

View file

@ -5,13 +5,14 @@ module VX_mem_arb #(
parameter DATA_WIDTH = 1,
parameter ADDR_WIDTH = 1,
parameter TAG_IN_WIDTH = 1,
parameter TAG_SEL_IDX = 0,
parameter BUFFERED_REQ = 0,
parameter BUFFERED_RSP = 0,
parameter TYPE = "R",
parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
parameter TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
localparam DATA_SIZE = (DATA_WIDTH / 8),
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS),
localparam TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
) (
input wire clk,
input wire reset,
@ -52,8 +53,21 @@ module VX_mem_arb #(
if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in_merged;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign req_data_in_merged[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
wire [TAG_OUT_WIDTH-1:0] req_tag_in_w;
VX_bits_insert #(
.N (TAG_IN_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_insert (
.data_in (req_tag_in[i]),
.sel_in (LOG_NUM_REQS'(i)),
.data_out (req_tag_in_w)
);
assign req_data_in_merged[i] = {req_tag_in_w, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
VX_stream_arbiter #(
@ -74,12 +88,20 @@ module VX_mem_arb #(
///////////////////////////////////////////////////////////////////////
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out_merged;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_data_out_merged[i];
end
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[TAG_SEL_IDX +: LOG_NUM_REQS];
wire [TAG_IN_WIDTH-1:0] rsp_tag_in_w;
VX_bits_remove #(
.N (TAG_OUT_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_remove (
.data_in (rsp_tag_in),
.data_out (rsp_tag_in_w)
);
VX_stream_demux #(
.NUM_REQS (NUM_REQS),
@ -88,14 +110,18 @@ module VX_mem_arb #(
) rsp_demux (
.clk (clk),
.reset (reset),
.sel (rsp_sel),
.sel_in (rsp_sel),
.valid_in (rsp_valid_in),
.data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}),
.data_in ({rsp_tag_in_w, rsp_data_in}),
.ready_in (rsp_ready_in),
.valid_out (rsp_valid_out),
.data_out (rsp_data_out_merged),
.ready_out (rsp_ready_out)
);
);
for (genvar i = 0; i < NUM_REQS; i++) begin
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_data_out_merged[i];
end
end else begin

View file

@ -197,17 +197,49 @@ module VX_mem_unit # (
.TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE)
) smem_rsp_if();
VX_smem_arb smem_arb (
VX_smem_arb #(
.NUM_REQS (2),
.LANES (`NUM_THREADS),
.DATA_SIZE (4),
.TAG_IN_WIDTH (`DCORE_TAG_WIDTH),
.TYPE ("X"),
.BUFFERED_REQ (2),
.BUFFERED_RSP (1)
) smem_arb (
.clk (clk),
.reset (reset),
.core_req_if (dcache_req_if),
.cache_req_if (dcache_req_tmp_if),
.smem_req_if (smem_req_if),
// input request
.req_valid_in (dcache_req_if.valid),
.req_rw_in (dcache_req_if.rw),
.req_byteen_in (dcache_req_if.byteen),
.req_addr_in (dcache_req_if.addr),
.req_data_in (dcache_req_if.data),
.req_tag_in (dcache_req_if.tag),
.req_ready_in (dcache_req_if.ready),
// output requests
.req_valid_out ({smem_req_if.valid, dcache_req_tmp_if.valid}),
.req_rw_out ({smem_req_if.rw, dcache_req_tmp_if.rw}),
.req_byteen_out ({smem_req_if.byteen, dcache_req_tmp_if.byteen}),
.req_addr_out ({smem_req_if.addr, dcache_req_tmp_if.addr}),
.req_data_out ({smem_req_if.data, dcache_req_tmp_if.data}),
.req_tag_out ({smem_req_if.tag, dcache_req_tmp_if.tag}),
.req_ready_out ({smem_req_if.ready, dcache_req_tmp_if.ready}),
// input responses
.rsp_valid_in ({smem_rsp_if.valid, dcache_rsp_tmp_if.valid}),
.rsp_tmask_in ({smem_rsp_if.tmask, dcache_rsp_tmp_if.tmask}),
.rsp_data_in ({smem_rsp_if.data, dcache_rsp_tmp_if.data}),
.rsp_tag_in ({smem_rsp_if.tag, dcache_rsp_tmp_if.tag}),
.rsp_ready_in ({smem_rsp_if.ready, dcache_rsp_tmp_if.ready}),
.cache_rsp_if (dcache_rsp_tmp_if),
.smem_rsp_if (smem_rsp_if),
.core_rsp_if (dcache_rsp_if)
// output response
.rsp_valid_out (dcache_rsp_if.valid),
.rsp_tmask_out (dcache_rsp_if.tmask),
.rsp_tag_out (dcache_rsp_if.tag),
.rsp_data_out (dcache_rsp_if.data),
.rsp_ready_out (dcache_rsp_if.ready)
);
`RESET_RELAY (smem_reset);

View file

@ -1,73 +1,160 @@
`include "VX_define.vh"
module VX_smem_arb (
input wire clk,
input wire reset,
module VX_smem_arb #(
parameter NUM_REQS = 1,
parameter LANES = 1,
parameter DATA_SIZE = 1,
parameter TAG_IN_WIDTH = 1,
parameter TAG_SEL_IDX = 0,
parameter BUFFERED_REQ = 0,
parameter BUFFERED_RSP = 0,
parameter TYPE = "R",
localparam ADDR_WIDTH = (32-`CLOG2(DATA_SIZE)),
localparam DATA_WIDTH = (8 * DATA_SIZE),
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS),
localparam TAG_OUT_WIDTH = TAG_IN_WIDTH - LOG_NUM_REQS
) (
input wire clk,
input wire reset,
// input request
VX_dcache_req_if core_req_if,
input wire [LANES-1:0] req_valid_in,
input wire [LANES-1:0] req_rw_in,
input wire [LANES-1:0][DATA_SIZE-1:0] req_byteen_in,
input wire [LANES-1:0][ADDR_WIDTH-1:0] req_addr_in,
input wire [LANES-1:0][DATA_WIDTH-1:0] req_data_in,
input wire [LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
output wire [LANES-1:0] req_ready_in,
// output requests
VX_dcache_req_if cache_req_if,
VX_dcache_req_if smem_req_if,
// output requests
output wire [NUM_REQS-1:0][LANES-1:0] req_valid_out,
output wire [NUM_REQS-1:0][LANES-1:0] req_rw_out,
output wire [NUM_REQS-1:0][LANES-1:0][DATA_SIZE-1:0] req_byteen_out,
output wire [NUM_REQS-1:0][LANES-1:0][ADDR_WIDTH-1:0] req_addr_out,
output wire [NUM_REQS-1:0][LANES-1:0][DATA_WIDTH-1:0] req_data_out,
output wire [NUM_REQS-1:0][LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_out,
input wire [NUM_REQS-1:0][LANES-1:0] req_ready_out,
// input responses
VX_dcache_rsp_if cache_rsp_if,
VX_dcache_rsp_if smem_rsp_if,
input wire [NUM_REQS-1:0] rsp_valid_in,
input wire [NUM_REQS-1:0][LANES-1:0] rsp_tmask_in,
input wire [NUM_REQS-1:0][LANES-1:0][DATA_WIDTH-1:0] rsp_data_in,
input wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] rsp_tag_in,
output wire [NUM_REQS-1:0] rsp_ready_in,
// output response
VX_dcache_rsp_if core_rsp_if
);
localparam REQ_DATAW = `DCORE_ADDR_WIDTH + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + (`DCORE_TAG_WIDTH-1);
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
output wire rsp_valid_out,
output wire [LANES-1:0] rsp_tmask_out,
output wire [LANES-1:0][DATA_WIDTH-1:0] rsp_data_out,
output wire [TAG_IN_WIDTH-1:0] rsp_tag_out,
input wire rsp_ready_out
);
localparam REQ_DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = LANES * (1 + DATA_WIDTH) + TAG_IN_WIDTH;
//
// handle requests
//
if (NUM_REQS > 1) begin
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [LANES-1:0][REQ_DATAW-1:0] req_data_in_merged;
wire [NUM_REQS-1:0][LANES-1:0][REQ_DATAW-1:0] req_data_out_merged;
wire [LANES-1:0][LOG_NUM_REQS-1:0] req_sel;
wire [LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_in_w;
for (genvar i = 0; i < LANES; ++i) begin
assign req_sel[i] = req_tag_in[i][TAG_SEL_IDX +: LOG_NUM_REQS];
VX_bits_remove #(
.N (TAG_IN_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_remove (
.data_in (req_tag_in[i]),
.data_out (req_tag_in_w[i])
);
assign req_data_in_merged[i] = {req_tag_in_w[i], req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
wire [1:0][REQ_DATAW-1:0] req_data_out;
VX_stream_demux #(
.NUM_REQS (2),
.NUM_REQS (NUM_REQS),
.LANES (LANES),
.DATAW (REQ_DATAW),
.BUFFERED (2)
.BUFFERED (BUFFERED_REQ)
) req_demux (
.clk (clk),
.reset (reset),
.sel (core_req_if.tag[i][0]),
.valid_in (core_req_if.valid[i]),
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i][`DCORE_TAG_WIDTH-1:1]}),
.ready_in (core_req_if.ready[i]),
.valid_out ({smem_req_if.valid[i], cache_req_if.valid[i]}),
.data_out (req_data_out),
.ready_out ({smem_req_if.ready[i], cache_req_if.ready[i]})
);
.sel_in (req_sel),
.valid_in (req_valid_in),
.data_in (req_data_in_merged),
.ready_in (req_ready_in),
.valid_out (req_valid_out),
.data_out (req_data_out_merged),
.ready_out (req_ready_out)
);
for (genvar i = 0; i < NUM_REQS; i++) begin
for (genvar j = 0; j < LANES; ++j) begin
assign {req_tag_out[i][j], req_addr_out[i][j], req_rw_out[i][j], req_byteen_out[i][j], req_data_out[i][j]} = req_data_out_merged[i][j];
end
end
///////////////////////////////////////////////////////////////////////
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_in_merged;
for (genvar i = 0; i < NUM_REQS; i++) begin
wire [TAG_IN_WIDTH-1:0] rsp_tag_in_w;
VX_bits_insert #(
.N (TAG_OUT_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_insert (
.data_in (rsp_tag_in[i]),
.sel_in (LOG_NUM_REQS'(i)),
.data_out (rsp_tag_in_w)
);
assign rsp_data_in_merged[i] = {rsp_tag_in_w, rsp_tmask_in[i], rsp_data_in[i]};
end
VX_stream_arbiter #(
.NUM_REQS (NUM_REQS),
.LANES (1),
.DATAW (RSP_DATAW),
.BUFFERED (BUFFERED_RSP),
.TYPE (TYPE)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in),
.data_in (rsp_data_in_merged),
.ready_in (rsp_ready_in),
.valid_out (rsp_valid_out),
.data_out ({rsp_tag_out, rsp_tmask_out, rsp_data_out}),
.ready_out (rsp_ready_out)
);
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign req_valid_out = req_valid_in;
assign req_tag_out = req_tag_in;
assign req_addr_out = req_addr_in;
assign req_rw_out = req_rw_in;
assign req_byteen_out = req_byteen_in;
assign req_data_out = req_data_in;
assign req_ready_in = req_ready_out;
assign rsp_valid_out = rsp_valid_in;
assign rsp_tmask_out = rsp_tmask_in;
assign rsp_tag_out = rsp_tag_in;
assign rsp_data_out = rsp_data_in;
assign rsp_ready_in = rsp_ready_out;
assign {cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]} = req_data_out[0];
assign {smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]} = req_data_out[1];
end
//
// handle responses
//
VX_stream_arbiter #(
.NUM_REQS (2),
.DATAW (RSP_DATAW),
.TYPE ("X"),
.BUFFERED (1)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in ({smem_rsp_if.valid, cache_rsp_if.valid}),
.data_in ({{smem_rsp_if.tmask, smem_rsp_if.data, {smem_rsp_if.tag, 1'b1}},
{cache_rsp_if.tmask, cache_rsp_if.data, {cache_rsp_if.tag, 1'b0}}}),
.ready_in ({smem_rsp_if.ready, cache_rsp_if.ready}),
.valid_out (core_rsp_if.valid),
.data_out ({core_rsp_if.tmask, core_rsp_if.data, core_rsp_if.tag}),
.ready_out (core_rsp_if.ready)
);
endmodule

View file

@ -2,6 +2,7 @@
module VX_stream_arbiter #(
parameter NUM_REQS = 1,
parameter LANES = 1,
parameter DATAW = 1,
parameter TYPE = "R",
parameter LOCK_ENABLE = 1,
@ -10,21 +11,36 @@ module VX_stream_arbiter #(
input wire clk,
input wire reset,
input wire [NUM_REQS-1:0] valid_in,
input wire [NUM_REQS-1:0][DATAW-1:0] data_in,
output wire [NUM_REQS-1:0] ready_in,
input wire [NUM_REQS-1:0][LANES-1:0] valid_in,
input wire [NUM_REQS-1:0][LANES-1:0][DATAW-1:0] data_in,
output wire [NUM_REQS-1:0][LANES-1:0] ready_in,
output wire valid_out,
output wire [DATAW-1:0] data_out,
input wire ready_out
output wire [LANES-1:0] valid_out,
output wire [LANES-1:0][DATAW-1:0] data_out,
input wire [LANES-1:0] ready_out
);
localparam LOG_NUM_REQS = $clog2(NUM_REQS);
if (NUM_REQS > 1) begin
wire sel_valid;
wire sel_ready;
wire [NUM_REQS-1:0] sel_1hot;
wire sel_valid;
wire sel_ready;
wire [NUM_REQS-1:0] sel_1hot;
wire [NUM_REQS-1:0] valid_in_any;
wire [LANES-1:0] ready_in_sel;
if (LANES > 1) begin
for (genvar i = 0; i < NUM_REQS; i++) begin
assign valid_in_any[i] = (| valid_in[i]);
end
assign sel_ready = (| ready_in_sel);
end else begin
for (genvar i = 0; i < NUM_REQS; i++) begin
assign valid_in_any[i] = valid_in[i];
end
assign sel_ready = ready_in_sel;
end
if (TYPE == "X") begin
VX_fixed_arbiter #(
@ -33,7 +49,7 @@ module VX_stream_arbiter #(
) sel_arb (
.clk (clk),
.reset (reset),
.requests (valid_in),
.requests (valid_in_any),
.enable (sel_ready),
.grant_valid (sel_valid),
.grant_onehot (sel_1hot),
@ -46,7 +62,7 @@ module VX_stream_arbiter #(
) sel_arb (
.clk (clk),
.reset (reset),
.requests (valid_in),
.requests (valid_in_any),
.enable (sel_ready),
.grant_valid (sel_valid),
.grant_onehot (sel_1hot),
@ -59,7 +75,7 @@ module VX_stream_arbiter #(
) sel_arb (
.clk (clk),
.reset (reset),
.requests (valid_in),
.requests (valid_in_any),
.enable (sel_ready),
.grant_valid (sel_valid),
.grant_onehot (sel_1hot),
@ -72,7 +88,7 @@ module VX_stream_arbiter #(
) sel_arb (
.clk (clk),
.reset (reset),
.requests (valid_in),
.requests (valid_in_any),
.enable (sel_ready),
.grant_valid (sel_valid),
.grant_onehot (sel_1hot),
@ -82,34 +98,58 @@ module VX_stream_arbiter #(
$error ("invalid parameter");
end
wire [DATAW-1:0] data_in_sel;
wire [LANES-1:0] valid_in_sel;
wire [LANES-1:0][DATAW-1:0] data_in_sel;
VX_onehot_mux #(
.DATAW (DATAW),
.N (NUM_REQS)
) data_in_mux (
.data_in (data_in),
.sel_in (sel_1hot),
.data_out (data_in_sel)
);
if (LANES > 1) begin
wire [NUM_REQS-1:0][(LANES * (1 + DATAW))-1:0] valid_data_in;
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (0 == BUFFERED),
.OUTPUT_REG (2 == BUFFERED)
) out_buffer (
.clk (clk),
.reset (reset),
.valid_in (sel_valid),
.data_in (data_in_sel),
.ready_in (sel_ready),
.valid_out (valid_out),
.data_out (data_out),
.ready_out (ready_out)
);
for (genvar i = 0; i < NUM_REQS; i++) begin
assign valid_data_in[i] = {valid_in[i], data_in[i]};
end
VX_onehot_mux #(
.DATAW (LANES * (1 + DATAW)),
.N (NUM_REQS)
) data_in_mux (
.data_in (valid_data_in),
.sel_in (sel_1hot),
.data_out ({valid_in_sel, data_in_sel})
);
`UNUSED_VAR (sel_valid)
end else begin
VX_onehot_mux #(
.DATAW (DATAW),
.N (NUM_REQS)
) data_in_mux (
.data_in (data_in),
.sel_in (sel_1hot),
.data_out (data_in_sel)
);
assign valid_in_sel = sel_valid;
end
for (genvar i = 0; i < NUM_REQS; i++) begin
assign ready_in[i] = sel_1hot[i] && sel_ready;
assign ready_in[i] = ready_in_sel & {LANES{sel_1hot[i]}};
end
for (genvar i = 0; i < LANES; ++i) begin
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (0 == BUFFERED),
.OUTPUT_REG (2 == BUFFERED)
) out_buffer (
.clk (clk),
.reset (reset),
.valid_in (valid_in_sel[i]),
.data_in (data_in_sel[i]),
.ready_in (ready_in_sel[i]),
.valid_out (valid_out[i]),
.data_out (data_out[i]),
.ready_out (ready_out[i])
);
end
end else begin

View file

@ -2,6 +2,7 @@
module VX_stream_demux #(
parameter NUM_REQS = 1,
parameter LANES = 1,
parameter DATAW = 1,
parameter BUFFERED = 0,
localparam LOG_NUM_REQS = `LOG2UP(NUM_REQS)
@ -9,60 +10,58 @@ module VX_stream_demux #(
input wire clk,
input wire reset,
input wire [LOG_NUM_REQS-1:0] sel,
input wire [LANES-1:0][LOG_NUM_REQS-1:0] sel_in,
input wire valid_in,
input wire [DATAW-1:0] data_in,
output wire ready_in,
input wire [LANES-1:0] valid_in,
input wire [LANES-1:0][DATAW-1:0] data_in,
output wire [LANES-1:0] ready_in,
output wire [NUM_REQS-1:0] valid_out,
output wire [NUM_REQS-1:0][DATAW-1:0] data_out,
input wire [NUM_REQS-1:0] ready_out
output wire [NUM_REQS-1:0][LANES-1:0] valid_out,
output wire [NUM_REQS-1:0][LANES-1:0][DATAW-1:0] data_out,
input wire [NUM_REQS-1:0][LANES-1:0] ready_out
);
if (NUM_REQS > 1) begin
reg [NUM_REQS-1:0] valid_out_unqual;
wire [NUM_REQS-1:0][DATAW-1:0] data_out_unqual;
wire [NUM_REQS-1:0] ready_out_unqual;
for (genvar j = 0; j < LANES; ++j) begin
always @(*) begin
valid_out_unqual = '0;
valid_out_unqual[sel] = valid_in;
end
for (genvar i = 0; i < NUM_REQS; i++) begin
assign data_out_unqual[i] = data_in;
end
assign ready_in = ready_out_unqual[sel];
reg [NUM_REQS-1:0] valid_in_sel;
wire [NUM_REQS-1:0] ready_in_sel;
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (0 == BUFFERED),
.OUTPUT_REG (2 == BUFFERED)
) out_buffer (
.clk (clk),
.reset (reset),
.valid_in (valid_out_unqual[i]),
.data_in (data_out_unqual[i]),
.ready_in (ready_out_unqual[i]),
.valid_out (valid_out[i]),
.data_out (data_out[i]),
.ready_out (ready_out[i])
);
always @(*) begin
valid_in_sel = '0;
valid_in_sel[sel_in[j]] = valid_in[j];
end
assign ready_in[j] = ready_in_sel[sel_in[j]];
for (genvar i = 0; i < NUM_REQS; i++)
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (0 == BUFFERED),
.OUTPUT_REG (2 == BUFFERED)
) out_buffer (
.clk (clk),
.reset (reset),
.valid_in (valid_in_sel[i]),
.data_in (data_in[j]),
.ready_in (ready_in_sel[i]),
.valid_out (valid_out[i][j]),
.data_out (data_out[i][j]),
.ready_out (ready_out[i][j])
);
end
end
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (sel)
`UNUSED_VAR (sel_in)
assign valid_out = valid_in;
assign data_out = data_in;
assign ready_in = ready_out;
assign ready_in = ready_out;
end