fixed AXI adapter

This commit is contained in:
tinebp 2024-12-12 20:52:45 -08:00
parent f635d71ba4
commit 7975a5a38c
10 changed files with 418 additions and 250 deletions

View file

@ -324,9 +324,11 @@ config2()
# test memory ports
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=4" ./ci/blackbox.sh --driver=simx --app=sgemmx --threads=16
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --threads=16
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=opae --app=sgemmx --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=xrt --app=sgemmx --threads=8
echo "configuration-2 tests done!"
}

View file

@ -21,7 +21,7 @@ module Vortex import VX_gpu_pkg::*; (
input wire reset,
// Memory request
output wire mem_req_valid [`VX_MEM_PORTS-1:0],
output wire mem_req_valid [`VX_MEM_PORTS],
output wire mem_req_rw [`VX_MEM_PORTS],
output wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen [`VX_MEM_PORTS],
output wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr [`VX_MEM_PORTS],

View file

@ -15,7 +15,7 @@
module Vortex_axi import VX_gpu_pkg::*; #(
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH + (`VX_MEM_DATA_WIDTH/8),
parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
parameter AXI_NUM_BANKS = 1
)(
@ -88,18 +88,18 @@ module Vortex_axi import VX_gpu_pkg::*; #(
localparam VX_MEM_TAG_A_WIDTH = `VX_MEM_TAG_WIDTH + `MAX(SUB_LDATAW, 0);
localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH - SUB_LDATAW;
wire mem_req_valid;
wire mem_req_rw;
wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen;
wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr;
wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data;
wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag;
wire mem_req_ready;
wire mem_req_valid [`VX_MEM_PORTS];
wire mem_req_rw [`VX_MEM_PORTS];
wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen [`VX_MEM_PORTS];
wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr [`VX_MEM_PORTS];
wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data [`VX_MEM_PORTS];
wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag [`VX_MEM_PORTS];
wire mem_req_ready [`VX_MEM_PORTS];
wire mem_rsp_valid;
wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data;
wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag;
wire mem_rsp_ready;
wire mem_rsp_valid [`VX_MEM_PORTS];
wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data [`VX_MEM_PORTS];
wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag [`VX_MEM_PORTS];
wire mem_rsp_ready [`VX_MEM_PORTS];
`SCOPE_IO_SWITCH (1);
@ -129,58 +129,61 @@ module Vortex_axi import VX_gpu_pkg::*; #(
.busy (busy)
);
wire mem_req_valid_a;
wire mem_req_rw_a;
wire [(AXI_DATA_WIDTH/8)-1:0] mem_req_byteen_a;
wire [VX_MEM_ADDR_A_WIDTH-1:0] mem_req_addr_a;
wire [AXI_DATA_WIDTH-1:0] mem_req_data_a;
wire [VX_MEM_TAG_A_WIDTH-1:0] mem_req_tag_a;
wire mem_req_ready_a;
wire mem_req_valid_a [`VX_MEM_PORTS];
wire mem_req_rw_a [`VX_MEM_PORTS];
wire [(AXI_DATA_WIDTH/8)-1:0] mem_req_byteen_a [`VX_MEM_PORTS];
wire [VX_MEM_ADDR_A_WIDTH-1:0] mem_req_addr_a [`VX_MEM_PORTS];
wire [AXI_DATA_WIDTH-1:0] mem_req_data_a [`VX_MEM_PORTS];
wire [VX_MEM_TAG_A_WIDTH-1:0] mem_req_tag_a [`VX_MEM_PORTS];
wire mem_req_ready_a [`VX_MEM_PORTS];
wire mem_rsp_valid_a;
wire [AXI_DATA_WIDTH-1:0] mem_rsp_data_a;
wire [VX_MEM_TAG_A_WIDTH-1:0] mem_rsp_tag_a;
wire mem_rsp_ready_a;
wire mem_rsp_valid_a [`VX_MEM_PORTS];
wire [AXI_DATA_WIDTH-1:0] mem_rsp_data_a [`VX_MEM_PORTS];
wire [VX_MEM_TAG_A_WIDTH-1:0] mem_rsp_tag_a [`VX_MEM_PORTS];
wire mem_rsp_ready_a [`VX_MEM_PORTS];
VX_mem_adapter #(
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.DST_DATA_WIDTH (AXI_DATA_WIDTH),
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
.DST_ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH),
.SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.DST_TAG_WIDTH (VX_MEM_TAG_A_WIDTH),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) mem_adapter (
.clk (clk),
.reset (reset),
// Adjust memory data width to match AXI interface
for (genvar i = 0; i < `VX_MEM_PORTS; i++) begin : g_mem_adapter
VX_mem_adapter #(
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.DST_DATA_WIDTH (AXI_DATA_WIDTH),
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
.DST_ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH),
.SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.DST_TAG_WIDTH (VX_MEM_TAG_A_WIDTH),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) mem_adapter (
.clk (clk),
.reset (reset),
.mem_req_valid_in (mem_req_valid),
.mem_req_addr_in (mem_req_addr),
.mem_req_rw_in (mem_req_rw),
.mem_req_byteen_in (mem_req_byteen),
.mem_req_data_in (mem_req_data),
.mem_req_tag_in (mem_req_tag),
.mem_req_ready_in (mem_req_ready),
.mem_req_valid_in (mem_req_valid[i]),
.mem_req_addr_in (mem_req_addr[i]),
.mem_req_rw_in (mem_req_rw[i]),
.mem_req_byteen_in (mem_req_byteen[i]),
.mem_req_data_in (mem_req_data[i]),
.mem_req_tag_in (mem_req_tag[i]),
.mem_req_ready_in (mem_req_ready[i]),
.mem_rsp_valid_in (mem_rsp_valid),
.mem_rsp_data_in (mem_rsp_data),
.mem_rsp_tag_in (mem_rsp_tag),
.mem_rsp_ready_in (mem_rsp_ready),
.mem_rsp_valid_in (mem_rsp_valid[i]),
.mem_rsp_data_in (mem_rsp_data[i]),
.mem_rsp_tag_in (mem_rsp_tag[i]),
.mem_rsp_ready_in (mem_rsp_ready[i]),
.mem_req_valid_out (mem_req_valid_a),
.mem_req_addr_out (mem_req_addr_a),
.mem_req_rw_out (mem_req_rw_a),
.mem_req_byteen_out (mem_req_byteen_a),
.mem_req_data_out (mem_req_data_a),
.mem_req_tag_out (mem_req_tag_a),
.mem_req_ready_out (mem_req_ready_a),
.mem_req_valid_out (mem_req_valid_a[i]),
.mem_req_addr_out (mem_req_addr_a[i]),
.mem_req_rw_out (mem_req_rw_a[i]),
.mem_req_byteen_out (mem_req_byteen_a[i]),
.mem_req_data_out (mem_req_data_a[i]),
.mem_req_tag_out (mem_req_tag_a[i]),
.mem_req_ready_out (mem_req_ready_a[i]),
.mem_rsp_valid_out (mem_rsp_valid_a),
.mem_rsp_data_out (mem_rsp_data_a),
.mem_rsp_tag_out (mem_rsp_tag_a),
.mem_rsp_ready_out (mem_rsp_ready_a)
);
.mem_rsp_valid_out (mem_rsp_valid_a[i]),
.mem_rsp_data_out (mem_rsp_data_a[i]),
.mem_rsp_tag_out (mem_rsp_tag_a[i]),
.mem_rsp_ready_out (mem_rsp_ready_a[i])
);
end
VX_axi_adapter #(
.DATA_WIDTH (AXI_DATA_WIDTH),
@ -188,8 +191,10 @@ module Vortex_axi import VX_gpu_pkg::*; #(
.ADDR_WIDTH_OUT (AXI_ADDR_WIDTH),
.TAG_WIDTH_IN (VX_MEM_TAG_A_WIDTH),
.TAG_WIDTH_OUT (AXI_TID_WIDTH),
.NUM_BANKS (AXI_NUM_BANKS),
.NUM_BANKS_IN (`VX_MEM_PORTS),
.NUM_BANKS_OUT (AXI_NUM_BANKS),
.BANK_INTERLEAVE(0),
.REQ_OUT_BUF ((`VX_MEM_PORTS > 1) ? 2 : 0),
.RSP_OUT_BUF ((AXI_NUM_BANKS > 1) ? 2 : 0)
) axi_adapter (
.clk (clk),

View file

@ -611,8 +611,8 @@ module VX_cache_bank #(
end else begin : g_wt
wire [LINE_SIZE-1:0] line_byteen;
VX_demux #(
.N (`CS_WORD_SEL_BITS),
.M (WORD_SIZE)
.DATAW (WORD_SIZE),
.N (`CS_WORDS_PER_LINE)
) byteen_demux (
.sel_in (word_idx_st1),
.data_in (byteen_st1),

View file

@ -16,118 +16,171 @@
`TRACING_OFF
module VX_axi_adapter #(
parameter DATA_WIDTH = 512,
parameter ADDR_WIDTH_IN = 1,
parameter ADDR_WIDTH_OUT = 32,
parameter ADDR_WIDTH_IN = 26, // word-addressable
parameter ADDR_WIDTH_OUT = 32, // byte-addressable
parameter TAG_WIDTH_IN = 8,
parameter TAG_WIDTH_OUT = 8,
parameter NUM_BANKS = 1,
parameter BANK_INTERLEAVE= 0,
parameter NUM_PORTS_IN = 1,
parameter NUM_PORTS_OUT = 1,
parameter INTERLEAVE = 0,
parameter TAG_BUFFER_SIZE= 32,
parameter RSP_OUT_BUF = 0
) (
parameter ARBITER = "R",
parameter REQ_OUT_BUF = 1,
parameter RSP_OUT_BUF = 1,
parameter DATA_SIZE = DATA_WIDTH/8
) (
input wire clk,
input wire reset,
// Vortex request
input wire mem_req_valid,
input wire mem_req_rw,
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
input wire [ADDR_WIDTH_IN-1:0] mem_req_addr,
input wire [DATA_WIDTH-1:0] mem_req_data,
input wire [TAG_WIDTH_IN-1:0] mem_req_tag,
output wire mem_req_ready,
input wire mem_req_valid [NUM_PORTS_IN],
input wire mem_req_rw [NUM_PORTS_IN],
input wire [DATA_SIZE-1:0] mem_req_byteen [NUM_PORTS_IN],
input wire [ADDR_WIDTH_IN-1:0] mem_req_addr [NUM_PORTS_IN],
input wire [DATA_WIDTH-1:0] mem_req_data [NUM_PORTS_IN],
input wire [TAG_WIDTH_IN-1:0] mem_req_tag [NUM_PORTS_IN],
output wire mem_req_ready [NUM_PORTS_IN],
// Vortex response
output wire mem_rsp_valid,
output wire [DATA_WIDTH-1:0] mem_rsp_data,
output wire [TAG_WIDTH_IN-1:0] mem_rsp_tag,
input wire mem_rsp_ready,
output wire mem_rsp_valid [NUM_PORTS_IN],
output wire [DATA_WIDTH-1:0] mem_rsp_data [NUM_PORTS_IN],
output wire [TAG_WIDTH_IN-1:0] mem_rsp_tag [NUM_PORTS_IN],
input wire mem_rsp_ready [NUM_PORTS_IN],
// AXI write request address channel
output wire m_axi_awvalid [NUM_BANKS],
input wire m_axi_awready [NUM_BANKS],
output wire [ADDR_WIDTH_OUT-1:0] m_axi_awaddr [NUM_BANKS],
output wire [TAG_WIDTH_OUT-1:0] m_axi_awid [NUM_BANKS],
output wire [7:0] m_axi_awlen [NUM_BANKS],
output wire [2:0] m_axi_awsize [NUM_BANKS],
output wire [1:0] m_axi_awburst [NUM_BANKS],
output wire [1:0] m_axi_awlock [NUM_BANKS],
output wire [3:0] m_axi_awcache [NUM_BANKS],
output wire [2:0] m_axi_awprot [NUM_BANKS],
output wire [3:0] m_axi_awqos [NUM_BANKS],
output wire [3:0] m_axi_awregion [NUM_BANKS],
output wire m_axi_awvalid [NUM_PORTS_OUT],
input wire m_axi_awready [NUM_PORTS_OUT],
output wire [ADDR_WIDTH_OUT-1:0] m_axi_awaddr [NUM_PORTS_OUT],
output wire [TAG_WIDTH_OUT-1:0] m_axi_awid [NUM_PORTS_OUT],
output wire [7:0] m_axi_awlen [NUM_PORTS_OUT],
output wire [2:0] m_axi_awsize [NUM_PORTS_OUT],
output wire [1:0] m_axi_awburst [NUM_PORTS_OUT],
output wire [1:0] m_axi_awlock [NUM_PORTS_OUT],
output wire [3:0] m_axi_awcache [NUM_PORTS_OUT],
output wire [2:0] m_axi_awprot [NUM_PORTS_OUT],
output wire [3:0] m_axi_awqos [NUM_PORTS_OUT],
output wire [3:0] m_axi_awregion [NUM_PORTS_OUT],
// AXI write request data channel
output wire m_axi_wvalid [NUM_BANKS],
input wire m_axi_wready [NUM_BANKS],
output wire [DATA_WIDTH-1:0] m_axi_wdata [NUM_BANKS],
output wire [DATA_WIDTH/8-1:0] m_axi_wstrb [NUM_BANKS],
output wire m_axi_wlast [NUM_BANKS],
output wire m_axi_wvalid [NUM_PORTS_OUT],
input wire m_axi_wready [NUM_PORTS_OUT],
output wire [DATA_WIDTH-1:0] m_axi_wdata [NUM_PORTS_OUT],
output wire [DATA_SIZE-1:0] m_axi_wstrb [NUM_PORTS_OUT],
output wire m_axi_wlast [NUM_PORTS_OUT],
// AXI write response channel
input wire m_axi_bvalid [NUM_BANKS],
output wire m_axi_bready [NUM_BANKS],
input wire [TAG_WIDTH_OUT-1:0] m_axi_bid [NUM_BANKS],
input wire [1:0] m_axi_bresp [NUM_BANKS],
input wire m_axi_bvalid [NUM_PORTS_OUT],
output wire m_axi_bready [NUM_PORTS_OUT],
input wire [TAG_WIDTH_OUT-1:0] m_axi_bid [NUM_PORTS_OUT],
input wire [1:0] m_axi_bresp [NUM_PORTS_OUT],
// AXI read address channel
output wire m_axi_arvalid [NUM_BANKS],
input wire m_axi_arready [NUM_BANKS],
output wire [ADDR_WIDTH_OUT-1:0] m_axi_araddr [NUM_BANKS],
output wire [TAG_WIDTH_OUT-1:0] m_axi_arid [NUM_BANKS],
output wire [7:0] m_axi_arlen [NUM_BANKS],
output wire [2:0] m_axi_arsize [NUM_BANKS],
output wire [1:0] m_axi_arburst [NUM_BANKS],
output wire [1:0] m_axi_arlock [NUM_BANKS],
output wire [3:0] m_axi_arcache [NUM_BANKS],
output wire [2:0] m_axi_arprot [NUM_BANKS],
output wire [3:0] m_axi_arqos [NUM_BANKS],
output wire [3:0] m_axi_arregion [NUM_BANKS],
output wire m_axi_arvalid [NUM_PORTS_OUT],
input wire m_axi_arready [NUM_PORTS_OUT],
output wire [ADDR_WIDTH_OUT-1:0] m_axi_araddr [NUM_PORTS_OUT],
output wire [TAG_WIDTH_OUT-1:0] m_axi_arid [NUM_PORTS_OUT],
output wire [7:0] m_axi_arlen [NUM_PORTS_OUT],
output wire [2:0] m_axi_arsize [NUM_PORTS_OUT],
output wire [1:0] m_axi_arburst [NUM_PORTS_OUT],
output wire [1:0] m_axi_arlock [NUM_PORTS_OUT],
output wire [3:0] m_axi_arcache [NUM_PORTS_OUT],
output wire [2:0] m_axi_arprot [NUM_PORTS_OUT],
output wire [3:0] m_axi_arqos [NUM_PORTS_OUT],
output wire [3:0] m_axi_arregion [NUM_PORTS_OUT],
// AXI read response channel
input wire m_axi_rvalid [NUM_BANKS],
output wire m_axi_rready [NUM_BANKS],
input wire [DATA_WIDTH-1:0] m_axi_rdata [NUM_BANKS],
input wire m_axi_rlast [NUM_BANKS],
input wire [TAG_WIDTH_OUT-1:0] m_axi_rid [NUM_BANKS],
input wire [1:0] m_axi_rresp [NUM_BANKS]
input wire m_axi_rvalid [NUM_PORTS_OUT],
output wire m_axi_rready [NUM_PORTS_OUT],
input wire [DATA_WIDTH-1:0] m_axi_rdata [NUM_PORTS_OUT],
input wire m_axi_rlast [NUM_PORTS_OUT],
input wire [TAG_WIDTH_OUT-1:0] m_axi_rid [NUM_PORTS_OUT],
input wire [1:0] m_axi_rresp [NUM_PORTS_OUT]
);
localparam DATA_SIZE = `CLOG2(DATA_WIDTH/8);
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS - `CLOG2(DATA_WIDTH/8); // to input space
localparam BANK_OFFSETW = DST_ADDR_WDITH - BANK_SEL_BITS;
localparam LOG2_DATA_SIZE = `CLOG2(DATA_SIZE);
localparam PORT_SEL_BITS = `CLOG2(NUM_PORTS_OUT);
localparam PORT_SEL_WIDTH = `UP(PORT_SEL_BITS);
localparam DST_ADDR_WDITH = (ADDR_WIDTH_OUT - LOG2_DATA_SIZE) + PORT_SEL_BITS; // convert output addresss to byte-addressable input space
localparam PORT_OFFSETW = DST_ADDR_WDITH - PORT_SEL_BITS;
localparam NUM_PORTS_IN_BITS = `CLOG2(NUM_PORTS_IN);
localparam NUM_PORTS_IN_WIDTH = `UP(NUM_PORTS_IN_BITS);
localparam TAG_BUFFER_ADDRW = `CLOG2(TAG_BUFFER_SIZE);
localparam NEEDED_TAG_WIDTH = TAG_WIDTH_IN + NUM_PORTS_IN_BITS;
localparam RD_TAG_WIDTH = (NEEDED_TAG_WIDTH > TAG_WIDTH_OUT) ? TAG_BUFFER_ADDRW : TAG_WIDTH_IN;
localparam RD_FULL_TAG_WIDTH = RD_TAG_WIDTH + PORT_SEL_BITS;
localparam DST_TAG_WIDTH = `MAX(RD_FULL_TAG_WIDTH, TAG_WIDTH_IN);
`STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN))
`STATIC_ASSERT ((TAG_WIDTH_OUT >= DST_TAG_WIDTH), ("invalid output tag width: current=%0d, expected=%0d", TAG_WIDTH_OUT, DST_TAG_WIDTH))
wire [BANK_OFFSETW-1:0] req_bank_off;
wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
// PORT selection
wire [NUM_PORTS_IN-1:0][PORT_SEL_WIDTH-1:0] req_port_out_sel;
wire [NUM_PORTS_IN-1:0][PORT_OFFSETW-1:0] req_port_out_off;
wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr);
if (NUM_BANKS > 1) begin : g_bank_sel
if (BANK_INTERLEAVE) begin : g_interleave
assign req_bank_sel = mem_req_addr_out[BANK_SEL_BITS-1:0];
assign req_bank_off = mem_req_addr_out[BANK_SEL_BITS +: BANK_OFFSETW];
end else begin : g_no_interleave
assign req_bank_sel = mem_req_addr_out[BANK_OFFSETW +: BANK_SEL_BITS];
assign req_bank_off = mem_req_addr_out[BANK_OFFSETW-1:0];
if (NUM_PORTS_OUT > 1) begin : g_port_sel
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_i
wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr[i]);
if (PORT_INTERLEAVE) begin : g_interleave
assign req_port_out_sel[i] = mem_req_addr_out[PORT_SEL_BITS-1:0];
assign req_port_out_off[i] = mem_req_addr_out[PORT_SEL_BITS +: PORT_OFFSETW];
end else begin : g_no_interleave
assign req_port_out_sel[i] = mem_req_addr_out[PORT_OFFSETW +: PORT_SEL_BITS];
assign req_port_out_off[i] = mem_req_addr_out[PORT_OFFSETW-1:0];
end
end
end else begin : g_no_port_sel
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_i
assign req_port_out_sel[i] = '0;
assign req_port_out_off[i] = DST_ADDR_WDITH'(mem_req_addr[i]);
end
end
// Tag handling logic
wire [NUM_PORTS_IN-1:0] mem_rd_req_tag_ready;
wire [NUM_PORTS_IN-1:0][RD_TAG_WIDTH-1:0] mem_rd_req_tag;
wire [NUM_PORTS_IN-1:0][RD_TAG_WIDTH-1:0] mem_rd_rsp_tag;
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_tag_buf
if (NEEDED_TAG_WIDTH > TAG_WIDTH_OUT) begin : g_enabled
wire [TAG_BUFFER_ADDRW-1:0] tbuf_waddr, tbuf_raddr;
wire tbuf_full;
VX_index_buffer #(
.DATAW (TAG_WIDTH_IN),
.SIZE (TAG_BUFFER_SIZE)
) tag_buf (
.clk (clk),
.reset (reset),
.acquire_en (mem_req_valid[i] && ~mem_req_rw[i] && mem_req_ready[i]),
.write_addr (tbuf_waddr),
.write_data (mem_req_tag[i]),
.read_data (mem_rsp_tag[i]),
.read_addr (tbuf_raddr),
.release_en (mem_rsp_valid[i] && mem_rsp_ready[i]),
.full (tbuf_full),
`UNUSED_PIN (empty)
);
assign mem_rd_req_tag_ready[i] = ~tbuf_full;
assign mem_rd_req_tag[i] = tbuf_waddr;
assign tbuf_raddr = mem_rd_rsp_tag[i];
end else begin : g_none
assign mem_rd_req_tag_ready[i] = 1;
assign mem_rd_req_tag[i] = mem_req_tag[i];
assign mem_rsp_tag[i] = mem_rd_rsp_tag[i];
end
end else begin : g_no_bank_sel
assign req_bank_sel = '0;
assign req_bank_off = mem_req_addr_out;
end
// AXi write request synchronization
reg [NUM_BANKS-1:0] m_axi_aw_ack, m_axi_w_ack, axi_write_ready;
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready
wire [NUM_PORTS_OUT-1:0] m_axi_awvalid_w, m_axi_wvalid_w;
wire [NUM_PORTS_OUT-1:0] m_axi_awready_w, m_axi_wready_w;
reg [NUM_PORTS_OUT-1:0] m_axi_aw_ack, m_axi_w_ack, axi_write_ready;
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_axi_write_ready
VX_axi_write_ack axi_write_ack (
.clk (clk),
.reset (reset),
.awvalid(m_axi_awvalid[i]),
.awready(m_axi_awready[i]),
.wvalid (m_axi_wvalid[i]),
.wready (m_axi_wready[i]),
.awvalid(m_axi_awvalid_w[i]),
.awready(m_axi_awready_w[i]),
.wvalid (m_axi_wvalid_w[i]),
.wready (m_axi_wready_w[i]),
.aw_ack (m_axi_aw_ack[i]),
.w_ack (m_axi_w_ack[i]),
.tx_rdy (axi_write_ready[i]),
@ -135,84 +188,156 @@ module VX_axi_adapter #(
);
end
wire mem_req_tag_ready;
wire [TAG_WIDTH_OUT-1:0] mem_req_tag_out;
wire [TAG_WIDTH_OUT-1:0] mem_rsp_tag_out;
// Request ack
// handle tag width mismatch
if (TAG_WIDTH_IN > TAG_WIDTH_OUT) begin : g_tag_buf
localparam TBUF_ADDRW = `CLOG2(TAG_BUFFER_SIZE);
wire [TBUF_ADDRW-1:0] tbuf_waddr, tbuf_raddr;
wire tbuf_full;
VX_index_buffer #(
.DATAW (TAG_WIDTH_IN),
.SIZE (TAG_BUFFER_SIZE)
) tag_buf (
.clk (clk),
.reset (reset),
.acquire_en (mem_req_valid && ~mem_req_rw && mem_req_ready),
.write_addr (tbuf_waddr),
.write_data (mem_req_tag),
.read_data (mem_rsp_tag),
.read_addr (tbuf_raddr),
.release_en (mem_rsp_valid && mem_rsp_ready),
.full (tbuf_full),
`UNUSED_PIN (empty)
);
assign mem_req_tag_ready = mem_req_rw || ~tbuf_full;
assign mem_req_tag_out = TAG_WIDTH_OUT'(tbuf_waddr);
assign tbuf_raddr = mem_rsp_tag_out[TBUF_ADDRW-1:0];
`UNUSED_VAR (mem_rsp_tag_out)
end else begin : g_no_tag_buf
assign mem_req_tag_ready = 1;
assign mem_req_tag_out = TAG_WIDTH_OUT'(mem_req_tag);
assign mem_rsp_tag = mem_rsp_tag_out[TAG_WIDTH_IN-1:0];
`UNUSED_VAR (mem_rsp_tag_out)
wire [NUM_PORTS_OUT-1:0][NUM_PORTS_IN-1:0] arb_ready_in;
wire [NUM_PORTS_IN-1:0][NUM_PORTS_OUT-1:0] arb_ready_in_w;
VX_transpose #(
.N (NUM_PORTS_OUT),
.M (NUM_PORTS_IN)
) rdy_in_transpose (
.data_in (arb_ready_in),
.data_out (arb_ready_in_w)
);
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_ready_in
assign mem_req_ready[i] = | arb_ready_in_w[i];
end
// request ack
assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] :
(m_axi_arready[req_bank_sel] && mem_req_tag_ready);
// AXI request handling
// AXI write request address channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i];
assign m_axi_awaddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8);
assign m_axi_awid[i] = mem_req_tag_out;
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_axi_write_req
localparam ARB_TAG_WIDTH = `MAX(RD_TAG_WIDTH, TAG_WIDTH_IN);
localparam ARB_DATAW = 1 + PORT_OFFSETW + DATA_SIZE + DATA_WIDTH + ARB_TAG_WIDTH;
wire [PORT_OFFSETW-1:0] arb_addr_out, buf_addr_r_out, buf_addr_w_out;
wire [ARB_TAG_WIDTH-1:0] arb_tag_out;
wire [TAG_WIDTH_IN-1:0] buf_tag_w_out;
wire [RD_TAG_WIDTH-1:0] buf_tag_r_out;
wire [NUM_PORTS_IN_WIDTH-1:0] arb_sel_out, buf_sel_out;
wire [DATA_WIDTH-1:0] arb_data_out;
wire [DATA_SIZE-1:0] arb_byteen_out;
wire arb_valid_out, arb_ready_out;
wire arb_rw_out;
wire [NUM_PORTS_IN-1:0][ARB_DATAW-1:0] arb_data_in;
wire [NUM_PORTS_IN-1:0] arb_valid_in;
for (genvar j = 0; j < NUM_PORTS_IN; ++j) begin : g_valid_in
wire tag_ready = mem_req_rw[j] || mem_rd_req_tag_ready[j];
assign arb_valid_in[j] = mem_req_valid[j] && tag_ready && (req_port_out_sel[j] == i);
end
for (genvar j = 0; j < NUM_PORTS_IN; ++j) begin : g_data_in
wire [ARB_TAG_WIDTH-1:0] tag_value = mem_req_rw[j] ? ARB_TAG_WIDTH'(mem_req_tag[j]) : ARB_TAG_WIDTH'(mem_rd_req_tag[j]);
assign arb_data_in[j] = {mem_req_rw[j], req_port_out_off[j], mem_req_byteen[j], mem_req_data[j], tag_value};
end
VX_stream_arb #(
.NUM_INPUTS (NUM_PORTS_IN),
.NUM_OUTPUTS(1),
.DATAW (ARB_DATAW),
.ARBITER (ARBITER)
) aw_arb (
.clk (clk),
.reset (reset),
.valid_in (arb_valid_in),
.ready_in (arb_ready_in[i]),
.data_in (arb_data_in),
.data_out ({arb_rw_out, arb_addr_out, arb_byteen_out, arb_data_out, arb_tag_out}),
.valid_out (arb_valid_out),
.ready_out (arb_ready_out),
.sel_out (arb_sel_out)
);
wire m_axi_arready_w;
assign arb_ready_out = axi_write_ready[i] || m_axi_arready_w;
// AXI write address channel
assign m_axi_awvalid_w[i] = arb_valid_out && arb_rw_out && ~m_axi_aw_ack[i];
VX_elastic_buffer #(
.DATAW (PORT_OFFSETW + TAG_WIDTH_IN),
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF)),
.LUTRAM (`TO_OUT_BUF_LUTRAM(REQ_OUT_BUF))
) aw_buf (
.clk (clk),
.reset (reset),
.valid_in (m_axi_awvalid_w[i]),
.ready_in (m_axi_awready_w[i]),
.data_in ({arb_addr_out, TAG_WIDTH_IN'(arb_tag_out)}),
.data_out ({buf_addr_w_out, buf_tag_w_out}),
.valid_out (m_axi_awvalid[i]),
.ready_out (m_axi_awready[i])
);
assign m_axi_awaddr[i] = ADDR_WIDTH_OUT'(buf_addr_w_out) << LOG2_DATA_SIZE;
assign m_axi_awid[i] = TAG_WIDTH_OUT'(buf_tag_w_out);
assign m_axi_awlen[i] = 8'b00000000;
assign m_axi_awsize[i] = 3'(DATA_SIZE);
assign m_axi_awsize[i] = 3'(LOG2_DATA_SIZE);
assign m_axi_awburst[i] = 2'b00;
assign m_axi_awlock[i] = 2'b00;
assign m_axi_awcache[i] = 4'b0000;
assign m_axi_awprot[i] = 3'b000;
assign m_axi_awqos[i] = 4'b0000;
assign m_axi_awregion[i]= 4'b0000;
end
// AXI write request data channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data
assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i];
assign m_axi_wdata[i] = mem_req_data;
assign m_axi_wstrb[i] = mem_req_byteen;
assign m_axi_wlast[i] = 1'b1;
end
// AXI write data channel
// AXI write response channel (ignore)
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_rsp
`UNUSED_VAR (m_axi_bvalid[i])
`UNUSED_VAR (m_axi_bid[i])
`UNUSED_VAR (m_axi_bresp[i])
assign m_axi_bready[i] = 1'b1;
`RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time))
end
assign m_axi_wvalid_w[i] = arb_valid_out && arb_rw_out && ~m_axi_w_ack[i];
VX_elastic_buffer #(
.DATAW (DATA_SIZE + DATA_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF)),
.LUTRAM (`TO_OUT_BUF_LUTRAM(REQ_OUT_BUF))
) w_buf (
.clk (clk),
.reset (reset),
.valid_in (m_axi_wvalid_w[i]),
.ready_in (m_axi_wready_w[i]),
.data_in ({arb_byteen_out, arb_data_out}),
.data_out ({m_axi_wstrb[i], m_axi_wdata[i]}),
.valid_out (m_axi_wvalid[i]),
.ready_out (m_axi_wready[i])
);
assign m_axi_wlast[i] = 1'b1;
// AXI read address channel
VX_elastic_buffer #(
.DATAW (PORT_OFFSETW + RD_TAG_WIDTH + NUM_PORTS_IN_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF)),
.LUTRAM (`TO_OUT_BUF_LUTRAM(REQ_OUT_BUF))
) ar_buf (
.clk (clk),
.reset (reset),
.valid_in (arb_valid_out && ~arb_rw_out),
.ready_in (m_axi_arready_w),
.data_in ({arb_addr_out, RD_TAG_WIDTH'(arb_tag_out), arb_sel_out}),
.data_out ({buf_addr_r_out, buf_tag_r_out, buf_sel_out}),
.valid_out (m_axi_arvalid[i]),
.ready_out (m_axi_arready[i])
);
assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(buf_addr_r_out) << LOG2_DATA_SIZE;
if (NUM_PORTS_IN > 1) begin : g_input_sel
assign m_axi_arid[i] = TAG_WIDTH_OUT'({buf_tag_r_out, buf_sel_out});
end else begin : g_no_input_sel
`UNUSED_VAR (buf_sel_out)
assign m_axi_arid[i] = TAG_WIDTH_OUT'(buf_tag_r_out);
end
// AXI read request channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i) && mem_req_tag_ready;
assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8);
assign m_axi_arid[i] = mem_req_tag_out;
assign m_axi_arlen[i] = 8'b00000000;
assign m_axi_arsize[i] = 3'(DATA_SIZE);
assign m_axi_arsize[i] = 3'(LOG2_DATA_SIZE);
assign m_axi_arburst[i] = 2'b00;
assign m_axi_arlock[i] = 2'b00;
assign m_axi_arcache[i] = 4'b0000;
@ -221,36 +346,69 @@ module VX_axi_adapter #(
assign m_axi_arregion[i]= 4'b0000;
end
// AXI write response channel (ignore)
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_axi_write_rsp
`UNUSED_VAR (m_axi_bvalid[i])
`UNUSED_VAR (m_axi_bid[i])
`UNUSED_VAR (m_axi_bresp[i])
assign m_axi_bready[i] = 1'b1;
`RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time))
end
// AXI read response channel
wire [NUM_BANKS-1:0] rsp_arb_valid_in;
wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH_OUT-1:0] rsp_arb_data_in;
wire [NUM_BANKS-1:0] rsp_arb_ready_in;
wire [NUM_PORTS_OUT-1:0] rd_rsp_valid_in;
wire [NUM_PORTS_OUT-1:0][DATA_WIDTH+RD_TAG_WIDTH-1:0] rd_rsp_data_in;
wire [NUM_PORTS_OUT-1:0] rd_rsp_ready_in;
wire [NUM_PORTS_OUT-1:0][NUM_PORTS_IN_WIDTH-1:0] rd_rsp_sel_in;
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_rsp
assign rsp_arb_valid_in[i] = m_axi_rvalid[i];
assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]};
assign m_axi_rready[i] = rsp_arb_ready_in[i];
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_rd_rsp_data_in
assign rd_rsp_valid_in[i] = m_axi_rvalid[i];
assign rd_rsp_data_in[i] = {m_axi_rdata[i], m_axi_rid[i][NUM_PORTS_IN_BITS +: RD_TAG_WIDTH]};
if (NUM_PORTS_IN > 1) begin : g_input_sel
assign rd_rsp_sel_in[i] = m_axi_rid[i][0 +: NUM_PORTS_IN_BITS];
end else begin : g_no_input_sel
assign rd_rsp_sel_in[i] = 0;
end
assign m_axi_rready[i] = rd_rsp_ready_in[i];
`RUNTIME_ASSERT(~(m_axi_rvalid[i] && m_axi_rlast[i] == 0), ("%t: *** AXI response error", $time))
`RUNTIME_ASSERT(~(m_axi_rvalid[i] && m_axi_rresp[i] != 0), ("%t: *** AXI response error", $time))
end
VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS),
.DATAW (DATA_WIDTH + TAG_WIDTH_OUT),
.ARBITER ("R"),
wire [NUM_PORTS_IN-1:0] rd_rsp_valid_out;
wire [NUM_PORTS_IN-1:0][DATA_WIDTH+RD_TAG_WIDTH-1:0] rd_rsp_data_out;
wire [NUM_PORTS_IN-1:0] rd_rsp_ready_out;
VX_stream_xbar #(
.NUM_INPUTS (NUM_PORTS_OUT),
.NUM_OUTPUTS(NUM_PORTS_IN),
.DATAW (DATA_WIDTH + RD_TAG_WIDTH),
.ARBITER (ARBITER),
.OUT_BUF (RSP_OUT_BUF)
) rsp_arb (
) rd_rsp_xbar (
.clk (clk),
.reset (reset),
.valid_in (rsp_arb_valid_in),
.data_in (rsp_arb_data_in),
.ready_in (rsp_arb_ready_in),
.data_out ({mem_rsp_data, mem_rsp_tag_out}),
.valid_out (mem_rsp_valid),
.ready_out (mem_rsp_ready),
.valid_in (rd_rsp_valid_in),
.data_in (rd_rsp_data_in),
.ready_in (rd_rsp_ready_in),
.sel_in (rd_rsp_sel_in),
.data_out (rd_rsp_data_out),
.valid_out (rd_rsp_valid_out),
.ready_out (rd_rsp_ready_out),
`UNUSED_PIN (collisions),
`UNUSED_PIN (sel_out)
);
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_rd_rsp_data_out
assign mem_rsp_valid[i] = rd_rsp_valid_out[i];
if (NUM_PORTS_IN > 1) begin : g_input_sel
assign {mem_rsp_data[i], mem_rd_rsp_tag[i]} = rd_rsp_data_out[i];
end else begin : g_no_input_sel
assign {mem_rsp_data[i], mem_rd_rsp_tag[i]} = rd_rsp_data_out[i];
end
assign rd_rsp_ready_out[i] = mem_rsp_ready[i];
end
endmodule
`TRACING_ON

View file

@ -66,8 +66,8 @@ module VX_cyclic_arbiter #(
);
VX_demux #(
.N (LOG_NUM_REQS),
.D (NUM_REQS)
.DATAW (1),
.N (NUM_REQS)
) grant_decoder (
.sel_in (grant_index),
.data_in (1'b1),

View file

@ -18,26 +18,26 @@
`TRACING_OFF
module VX_demux #(
parameter DATAW = 1,
parameter N = 0,
parameter M = 1,
parameter MODEL = 0,
parameter D = 1 << N
parameter LN = `LOG2UP(N)
) (
input wire [`UP(N)-1:0] sel_in,
input wire [M-1:0] data_in,
output wire [D-1:0][M-1:0] data_out
input wire [LN-1:0] sel_in,
input wire [DATAW-1:0] data_in,
output wire [N-1:0][DATAW-1:0] data_out
);
if (N != 0) begin : g_decoder
logic [D-1:0][M-1:0] shift;
if (N > 1) begin : g_demux
logic [N-1:0][DATAW-1:0] shift;
if (MODEL == 1) begin : g_model1
always @(*) begin
shift = '0;
shift[sel_in] = {M{1'b1}};
shift[sel_in] = {DATAW{1'b1}};
end
end else begin : g_model0
assign shift = ((D*M)'({M{1'b1}})) << (sel_in * M);
assign shift = ((N*DATAW)'({DATAW{1'b1}})) << (sel_in * DATAW);
end
assign data_out = {D{data_in}} & shift;
assign data_out = {N{data_in}} & shift;
end else begin : g_passthru
`UNUSED_VAR (sel_in)
assign data_out = data_in;

View file

@ -101,8 +101,8 @@ module VX_mem_adapter #(
end
VX_demux #(
.N (D),
.M (SRC_DATA_WIDTH/8)
.DATAW (SRC_DATA_WIDTH/8),
.N (P)
) req_be_demux (
.sel_in (req_idx),
.data_in (mem_req_byteen_in),
@ -110,8 +110,8 @@ module VX_mem_adapter #(
);
VX_demux #(
.N (D),
.M (SRC_DATA_WIDTH)
.DATAW (SRC_DATA_WIDTH),
.N (P)
) req_data_demux (
.sel_in (req_idx),
.data_in (mem_req_data_in),

View file

@ -481,8 +481,8 @@ module VX_rr_arbiter #(
end
VX_demux #(
.N (LOG_NUM_REQS),
.D (NUM_REQS)
.DATAW (1),
.N (NUM_REQS)
) grant_decoder (
.sel_in (grant_index),
.data_in (grant_valid),

View file

@ -63,16 +63,19 @@ module VX_stream_xbar #(
.data_out (per_output_ready_in_w)
);
for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_sel_in_decoders
for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_ready_in
assign ready_in[i] = | per_output_ready_in_w[i];
end
for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_sel_in_demux
VX_demux #(
.N (OUT_WIDTH),
.D (NUM_OUTPUTS)
.DATAW (1),
.N (NUM_OUTPUTS)
) sel_in_demux (
.sel_in (sel_in[i]),
.data_in (valid_in[i]),
.data_out (per_output_valid_in[i])
);
assign ready_in[i] = | per_output_ready_in_w[i];
end
VX_transpose #(
@ -138,8 +141,8 @@ module VX_stream_xbar #(
wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w;
VX_demux #(
.N (OUT_WIDTH),
.D (NUM_OUTPUTS)
.DATAW (1),
.N (NUM_OUTPUTS)
) sel_in_demux (
.sel_in (sel_in[0]),
.data_in (valid_in[0]),