Intel Opae AFU support for multiport
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions

This commit is contained in:
tinebp 2024-12-13 21:20:38 -08:00
parent 7975a5a38c
commit 461f2cbbc9
12 changed files with 530 additions and 366 deletions

View file

@ -323,12 +323,12 @@ config2()
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
# test memory ports
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=opae --app=sgemmx --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=xrt --app=sgemmx --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=opae --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=xrt --app=mstress --threads=8
echo "configuration-2 tests done!"
}

View file

@ -173,7 +173,7 @@
`endif
`ifndef PLATFORM_MEMORY_BANKS
`define PLATFORM_MEMORY_BANKS 1
`define PLATFORM_MEMORY_BANKS 2
`endif
`ifdef XLEN_64
@ -241,7 +241,7 @@
`ifndef IO_COUT_ADDR
`define IO_COUT_ADDR `IO_BASE_ADDR
`endif
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
`define IO_COUT_SIZE 64
`ifndef IO_MPM_ADDR
`define IO_MPM_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE)

View file

@ -191,11 +191,11 @@ module Vortex_axi import VX_gpu_pkg::*; #(
.ADDR_WIDTH_OUT (AXI_ADDR_WIDTH),
.TAG_WIDTH_IN (VX_MEM_TAG_A_WIDTH),
.TAG_WIDTH_OUT (AXI_TID_WIDTH),
.NUM_BANKS_IN (`VX_MEM_PORTS),
.NUM_BANKS_OUT (AXI_NUM_BANKS),
.BANK_INTERLEAVE(0),
.NUM_PORTS_IN (`VX_MEM_PORTS),
.NUM_PORTS_OUT (AXI_NUM_BANKS),
.INTERLEAVE (0),
.REQ_OUT_BUF ((`VX_MEM_PORTS > 1) ? 2 : 0),
.RSP_OUT_BUF ((AXI_NUM_BANKS > 1) ? 2 : 0)
.RSP_OUT_BUF ((`VX_MEM_PORTS > 1 || AXI_NUM_BANKS > 1) ? 2 : 0)
) axi_adapter (
.clk (clk),
.reset (reset),

View file

@ -54,6 +54,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt);
localparam MEM_PORTS_BITS = `CLOG2(`VX_MEM_PORTS);
localparam MEM_PORTS_WIDTH = `UP(MEM_PORTS_BITS);
localparam CCI_DATA_WIDTH = $bits(t_ccip_clData);
localparam CCI_DATA_SIZE = CCI_DATA_WIDTH / 8;
localparam CCI_ADDR_WIDTH = $bits(t_ccip_clAddr);
@ -61,12 +64,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
localparam RESET_CTR_WIDTH = `CLOG2(`RESET_DELAY+1);
localparam AVS_RD_QUEUE_SIZE = 32;
localparam _VX_MEM_TAG_WIDTH = `VX_MEM_TAG_WIDTH;
localparam _AVS_REQ_TAGW_VX = _VX_MEM_TAG_WIDTH + `CLOG2(LMEM_DATA_WIDTH) - `CLOG2(`VX_MEM_DATA_WIDTH);
localparam _AVS_REQ_TAGW_VX2 = `MAX(_VX_MEM_TAG_WIDTH, _AVS_REQ_TAGW_VX);
localparam _AVS_REQ_TAGW_CCI = CCI_ADDR_WIDTH + `CLOG2(LMEM_DATA_WIDTH) - `CLOG2(CCI_DATA_WIDTH);
localparam _AVS_REQ_TAGW_CCI2 = `MAX(CCI_ADDR_WIDTH, _AVS_REQ_TAGW_CCI);
localparam AVS_REQ_TAGW = `MAX(_AVS_REQ_TAGW_VX2, _AVS_REQ_TAGW_CCI2);
localparam VX_AVS_REQ_TAGW = `VX_MEM_TAG_WIDTH + `CLOG2(LMEM_DATA_WIDTH) - `CLOG2(`VX_MEM_DATA_WIDTH);
localparam CCI_AVS_REQ_TAGW = CCI_ADDR_WIDTH + `CLOG2(LMEM_DATA_WIDTH) - `CLOG2(CCI_DATA_WIDTH);
localparam VX_AVS_REQ_TAGW2 = `MAX(`VX_MEM_TAG_WIDTH, VX_AVS_REQ_TAGW);
localparam CCI_AVS_REQ_TAGW2 = `MAX(CCI_ADDR_WIDTH, CCI_AVS_REQ_TAGW);
localparam CCI_VX_TAG_WIDTH = `MAX(VX_AVS_REQ_TAGW2, CCI_AVS_REQ_TAGW2);
localparam AVS_TAG_WIDTH = CCI_VX_TAG_WIDTH + 1; // adding the arbiter bit
localparam CCI_RD_WINDOW_SIZE = 8;
localparam CCI_RW_PENDING_SIZE= 256;
@ -122,22 +125,22 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
reg [STATE_WIDTH-1:0] state;
// Vortex ports ///////////////////////////////////////////////////////////////
// Vortex ports ///////////////////////////////////////////////////////////
wire vx_mem_req_valid;
wire vx_mem_req_rw;
wire [`VX_MEM_BYTEEN_WIDTH-1:0] vx_mem_req_byteen;
wire [`VX_MEM_ADDR_WIDTH-1:0] vx_mem_req_addr;
wire [`VX_MEM_DATA_WIDTH-1:0] vx_mem_req_data;
wire [`VX_MEM_TAG_WIDTH-1:0] vx_mem_req_tag;
wire vx_mem_req_ready;
wire vx_mem_req_valid [`VX_MEM_PORTS];
wire vx_mem_req_rw [`VX_MEM_PORTS];
wire [`VX_MEM_BYTEEN_WIDTH-1:0] vx_mem_req_byteen [`VX_MEM_PORTS];
wire [`VX_MEM_ADDR_WIDTH-1:0] vx_mem_req_addr [`VX_MEM_PORTS];
wire [`VX_MEM_DATA_WIDTH-1:0] vx_mem_req_data [`VX_MEM_PORTS];
wire [`VX_MEM_TAG_WIDTH-1:0] vx_mem_req_tag [`VX_MEM_PORTS];
wire vx_mem_req_ready [`VX_MEM_PORTS];
wire vx_mem_rsp_valid;
wire [`VX_MEM_DATA_WIDTH-1:0] vx_mem_rsp_data;
wire [`VX_MEM_TAG_WIDTH-1:0] vx_mem_rsp_tag;
wire vx_mem_rsp_ready;
wire vx_mem_rsp_valid [`VX_MEM_PORTS];
wire [`VX_MEM_DATA_WIDTH-1:0] vx_mem_rsp_data [`VX_MEM_PORTS];
wire [`VX_MEM_TAG_WIDTH-1:0] vx_mem_rsp_tag [`VX_MEM_PORTS];
wire vx_mem_rsp_ready [`VX_MEM_PORTS];
// CMD variables //////////////////////////////////////////////////////////////
// CMD variables //////////////////////////////////////////////////////////
reg [2:0][63:0] cmd_args;
@ -150,7 +153,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
wire [`VX_DCR_ADDR_WIDTH-1:0] cmd_dcr_addr = `VX_DCR_ADDR_WIDTH'(cmd_args[0]);
wire [`VX_DCR_DATA_WIDTH-1:0] cmd_dcr_data = `VX_DCR_DATA_WIDTH'(cmd_args[1]);
// MMIO controller ////////////////////////////////////////////////////////////
// MMIO controller ////////////////////////////////////////////////////////
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr[$bits(t_ccip_c0_ReqMmioHdr)-1:0]);
@ -216,10 +219,31 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
`endif
wire [COUT_QUEUE_DATAW-1:0] cout_q_dout;
wire cout_q_full, cout_q_empty;
// Console output queue read //////////////////////////////////////////////
wire [COUT_QUEUE_DATAW-1:0] cout_q_dout_s = cout_q_dout & {COUT_QUEUE_DATAW{!cout_q_empty}};
wire [`VX_MEM_PORTS-1:0][COUT_QUEUE_DATAW-1:0] cout_q_dout;
wire [`VX_MEM_PORTS-1:0] cout_q_full, cout_q_empty, cout_q_pop;
reg [MEM_PORTS_WIDTH-1:0] cout_q_id;
always @(posedge clk) begin
if (reset) begin
cout_q_id <= 0;
end else begin
if (cp2af_sRxPort.c0.mmioRdValid && mmio_req_hdr.address == MMIO_STATUS) begin
cout_q_id <= cout_q_id + 1;
end
end
end
for (genvar i = 0; i < `VX_MEM_PORTS; ++i) begin : g_cout_q_pop
assign cout_q_pop[i] = (cp2af_sRxPort.c0.mmioRdValid && mmio_req_hdr.address == MMIO_STATUS)
&& (cout_q_id == i)
&& ~cout_q_empty[i];
end
wire [COUT_QUEUE_DATAW-1:0] cout_q_dout_s = cout_q_dout[cout_q_id] & {COUT_QUEUE_DATAW{!cout_q_empty[cout_q_id]}};
wire cout_q_empty_all = & cout_q_empty;
`ifdef SIMULATION
`ifndef VERILATOR
@ -241,12 +265,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
`endif
`endif
// MMIO controller ////////////////////////////////////////////////////////////
// MMIO controller ////////////////////////////////////////////////////////
// Handle MMIO read requests
always @(posedge clk) begin
if (reset) begin
mmio_rsp.mmioRdValid <= 0;
cout_q_id <= 0;
end else begin
mmio_rsp.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid;
end
@ -271,7 +296,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
16'h0006: mmio_rsp.data <= 64'h0; // next AFU
16'h0008: mmio_rsp.data <= 64'h0; // reserved
MMIO_STATUS: begin
mmio_rsp.data <= 64'({cout_q_dout_s, !cout_q_empty, 8'(state)});
mmio_rsp.data <= 64'({cout_q_dout_s, ~cout_q_empty_all, 8'(state)});
`ifdef DBG_TRACE_AFU
if (state != STATE_WIDTH'(mmio_rsp.data)) begin
`TRACE(2, ("%t: AFU: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state))
@ -353,7 +378,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
end
end
// COMMAND FSM ////////////////////////////////////////////////////////////////
// COMMAND FSM ////////////////////////////////////////////////////////////
wire cmd_mem_rd_done;
reg cmd_mem_wr_done;
@ -364,8 +389,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
wire vx_busy;
wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_req_hdr.address);
wire [CMD_TYPE_WIDTH-1:0] cmd_type = is_mmio_wr_cmd ?
CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(CMD_IDLE);
wire [CMD_TYPE_WIDTH-1:0] cmd_type = is_mmio_wr_cmd ? CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(CMD_IDLE);
always @(posedge clk) begin
if (reset) begin
@ -463,7 +487,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
end
end
// AVS Controller /////////////////////////////////////////////////////////////
// AVS Controller /////////////////////////////////////////////////////////
wire cci_mem_rd_req_valid;
wire cci_mem_wr_req_valid;
@ -481,13 +505,67 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
wire [CCI_ADDR_WIDTH-1:0] cci_mem_rsp_tag;
wire cci_mem_rsp_ready;
//--
// adjust VX mnemory interface to be compatible with CCI
VX_mem_bus_if #(
.DATA_SIZE (LMEM_DATA_SIZE),
.ADDR_WIDTH (CCI_VX_ADDR_WIDTH),
.TAG_WIDTH (AVS_REQ_TAGW)
) cci_vx_mem_bus_if[2]();
.TAG_WIDTH (CCI_VX_TAG_WIDTH)
) vx_mem_bus_if[`VX_MEM_PORTS]();
wire [`VX_MEM_PORTS-1:0] vx_mem_req_valid_qual;
wire [`VX_MEM_PORTS-1:0] vx_mem_req_ready_qual;
for (genvar i = 0; i < `VX_MEM_PORTS; ++i) begin : g_vx_mem_adapter
VX_mem_adapter #(
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
.DST_ADDR_WIDTH (CCI_VX_ADDR_WIDTH),
.SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.DST_TAG_WIDTH (CCI_VX_TAG_WIDTH),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (2)
) vx_mem_adapter (
.clk (clk),
.reset (reset),
.mem_req_valid_in (vx_mem_req_valid_qual[i]),
.mem_req_addr_in (vx_mem_req_addr[i]),
.mem_req_rw_in (vx_mem_req_rw[i]),
.mem_req_byteen_in (vx_mem_req_byteen[i]),
.mem_req_data_in (vx_mem_req_data[i]),
.mem_req_tag_in (vx_mem_req_tag[i]),
.mem_req_ready_in (vx_mem_req_ready_qual[i]),
.mem_rsp_valid_in (vx_mem_rsp_valid[i]),
.mem_rsp_data_in (vx_mem_rsp_data[i]),
.mem_rsp_tag_in (vx_mem_rsp_tag[i]),
.mem_rsp_ready_in (vx_mem_rsp_ready[i]),
.mem_req_valid_out (vx_mem_bus_if[i].req_valid),
.mem_req_addr_out (vx_mem_bus_if[i].req_data.addr),
.mem_req_rw_out (vx_mem_bus_if[i].req_data.rw),
.mem_req_byteen_out (vx_mem_bus_if[i].req_data.byteen),
.mem_req_data_out (vx_mem_bus_if[i].req_data.data),
.mem_req_tag_out (vx_mem_bus_if[i].req_data.tag),
.mem_req_ready_out (vx_mem_bus_if[i].req_ready),
.mem_rsp_valid_out (vx_mem_bus_if[i].rsp_valid),
.mem_rsp_data_out (vx_mem_bus_if[i].rsp_data.data),
.mem_rsp_tag_out (vx_mem_bus_if[i].rsp_data.tag),
.mem_rsp_ready_out (vx_mem_bus_if[i].rsp_ready)
);
assign vx_mem_bus_if[i].req_data.flags = '0;
end
// adjust CCI mnemory interface to be compatible with VX
VX_mem_bus_if #(
.DATA_SIZE (LMEM_DATA_SIZE),
.ADDR_WIDTH (CCI_VX_ADDR_WIDTH),
.TAG_WIDTH (CCI_VX_TAG_WIDTH)
) cci_vx_mem_arb_in_if[2]();
VX_mem_adapter #(
.SRC_DATA_WIDTH (CCI_DATA_WIDTH),
@ -495,7 +573,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.SRC_ADDR_WIDTH (CCI_ADDR_WIDTH),
.DST_ADDR_WIDTH (CCI_VX_ADDR_WIDTH),
.SRC_TAG_WIDTH (CCI_ADDR_WIDTH),
.DST_TAG_WIDTH (AVS_REQ_TAGW),
.DST_TAG_WIDTH (CCI_VX_TAG_WIDTH),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) cci_mem_adapter (
@ -515,125 +593,122 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.mem_rsp_tag_in (cci_mem_rsp_tag),
.mem_rsp_ready_in (cci_mem_rsp_ready),
.mem_req_valid_out (cci_vx_mem_bus_if[1].req_valid),
.mem_req_addr_out (cci_vx_mem_bus_if[1].req_data.addr),
.mem_req_rw_out (cci_vx_mem_bus_if[1].req_data.rw),
.mem_req_byteen_out (cci_vx_mem_bus_if[1].req_data.byteen),
.mem_req_data_out (cci_vx_mem_bus_if[1].req_data.data),
.mem_req_tag_out (cci_vx_mem_bus_if[1].req_data.tag),
.mem_req_ready_out (cci_vx_mem_bus_if[1].req_ready),
.mem_req_valid_out (cci_vx_mem_arb_in_if[1].req_valid),
.mem_req_addr_out (cci_vx_mem_arb_in_if[1].req_data.addr),
.mem_req_rw_out (cci_vx_mem_arb_in_if[1].req_data.rw),
.mem_req_byteen_out (cci_vx_mem_arb_in_if[1].req_data.byteen),
.mem_req_data_out (cci_vx_mem_arb_in_if[1].req_data.data),
.mem_req_tag_out (cci_vx_mem_arb_in_if[1].req_data.tag),
.mem_req_ready_out (cci_vx_mem_arb_in_if[1].req_ready),
.mem_rsp_valid_out (cci_vx_mem_bus_if[1].rsp_valid),
.mem_rsp_data_out (cci_vx_mem_bus_if[1].rsp_data.data),
.mem_rsp_tag_out (cci_vx_mem_bus_if[1].rsp_data.tag),
.mem_rsp_ready_out (cci_vx_mem_bus_if[1].rsp_ready)
.mem_rsp_valid_out (cci_vx_mem_arb_in_if[1].rsp_valid),
.mem_rsp_data_out (cci_vx_mem_arb_in_if[1].rsp_data.data),
.mem_rsp_tag_out (cci_vx_mem_arb_in_if[1].rsp_data.tag),
.mem_rsp_ready_out (cci_vx_mem_arb_in_if[1].rsp_ready)
);
assign cci_vx_mem_arb_in_if[1].req_data.flags = '0;
assign cci_vx_mem_bus_if[1].req_data.flags = '0;
// arbitrate between CCI and VX memory interfaces
//--
`ASSIGN_VX_MEM_BUS_IF(cci_vx_mem_arb_in_if[0], vx_mem_bus_if[0]);
wire vx_mem_is_cout;
wire vx_mem_req_valid_qual;
wire vx_mem_req_ready_qual;
assign vx_mem_req_valid_qual = vx_mem_req_valid && ~vx_mem_is_cout;
VX_mem_adapter #(
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
.DST_ADDR_WIDTH (CCI_VX_ADDR_WIDTH),
.SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.DST_TAG_WIDTH (AVS_REQ_TAGW),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (2)
) vx_mem_adapter (
.clk (clk),
.reset (reset),
.mem_req_valid_in (vx_mem_req_valid_qual),
.mem_req_addr_in (vx_mem_req_addr),
.mem_req_rw_in (vx_mem_req_rw),
.mem_req_byteen_in (vx_mem_req_byteen),
.mem_req_data_in (vx_mem_req_data),
.mem_req_tag_in (vx_mem_req_tag),
.mem_req_ready_in (vx_mem_req_ready_qual),
.mem_rsp_valid_in (vx_mem_rsp_valid),
.mem_rsp_data_in (vx_mem_rsp_data),
.mem_rsp_tag_in (vx_mem_rsp_tag),
.mem_rsp_ready_in (vx_mem_rsp_ready),
.mem_req_valid_out (cci_vx_mem_bus_if[0].req_valid),
.mem_req_addr_out (cci_vx_mem_bus_if[0].req_data.addr),
.mem_req_rw_out (cci_vx_mem_bus_if[0].req_data.rw),
.mem_req_byteen_out (cci_vx_mem_bus_if[0].req_data.byteen),
.mem_req_data_out (cci_vx_mem_bus_if[0].req_data.data),
.mem_req_tag_out (cci_vx_mem_bus_if[0].req_data.tag),
.mem_req_ready_out (cci_vx_mem_bus_if[0].req_ready),
.mem_rsp_valid_out (cci_vx_mem_bus_if[0].rsp_valid),
.mem_rsp_data_out (cci_vx_mem_bus_if[0].rsp_data.data),
.mem_rsp_tag_out (cci_vx_mem_bus_if[0].rsp_data.tag),
.mem_rsp_ready_out (cci_vx_mem_bus_if[0].rsp_ready)
);
assign cci_vx_mem_bus_if[0].req_data.flags = '0;
//--
VX_mem_bus_if #(
.DATA_SIZE (LMEM_DATA_SIZE),
.ADDR_WIDTH (CCI_VX_ADDR_WIDTH),
.TAG_WIDTH (AVS_REQ_TAGW+1)
) mem_bus_if[1]();
.TAG_WIDTH (AVS_TAG_WIDTH)
) cci_vx_mem_arb_out_if[1]();
VX_mem_arb #(
.NUM_INPUTS (2),
.DATA_SIZE (LMEM_DATA_SIZE),
.ADDR_WIDTH (CCI_VX_ADDR_WIDTH),
.TAG_WIDTH (AVS_REQ_TAGW),
.TAG_WIDTH (CCI_VX_TAG_WIDTH),
.ARBITER ("P"), // prioritize VX requests
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) mem_arb (
.clk (clk),
.reset (reset),
.bus_in_if (cci_vx_mem_bus_if),
.bus_out_if (mem_bus_if)
.bus_in_if (cci_vx_mem_arb_in_if),
.bus_out_if (cci_vx_mem_arb_out_if)
);
//--
// final merged memory interface
wire mem_req_valid [`VX_MEM_PORTS];
wire mem_req_rw [`VX_MEM_PORTS];
wire [CCI_VX_ADDR_WIDTH-1:0] mem_req_addr [`VX_MEM_PORTS];
wire [LMEM_DATA_SIZE-1:0] mem_req_byteen [`VX_MEM_PORTS];
wire [LMEM_DATA_WIDTH-1:0] mem_req_data [`VX_MEM_PORTS];
wire [AVS_TAG_WIDTH-1:0] mem_req_tag [`VX_MEM_PORTS];
wire mem_req_ready [`VX_MEM_PORTS];
wire mem_rsp_valid [`VX_MEM_PORTS];
wire [LMEM_DATA_WIDTH-1:0] mem_rsp_data [`VX_MEM_PORTS];
wire [AVS_TAG_WIDTH-1:0] mem_rsp_tag [`VX_MEM_PORTS];
wire mem_rsp_ready [`VX_MEM_PORTS];
// assign port0 to CCI/VX arbiter
assign mem_req_valid[0] = cci_vx_mem_arb_out_if[0].req_valid;
assign mem_req_rw[0] = cci_vx_mem_arb_out_if[0].req_data.rw;
assign mem_req_addr[0] = cci_vx_mem_arb_out_if[0].req_data.addr;
assign mem_req_byteen[0]= cci_vx_mem_arb_out_if[0].req_data.byteen;
assign mem_req_data[0] = cci_vx_mem_arb_out_if[0].req_data.data;
assign mem_req_tag[0] = cci_vx_mem_arb_out_if[0].req_data.tag;
assign cci_vx_mem_arb_out_if[0].req_ready = mem_req_ready[0];
assign cci_vx_mem_arb_out_if[0].rsp_valid = mem_rsp_valid[0];
assign cci_vx_mem_arb_out_if[0].rsp_data.data = mem_rsp_data[0];
assign cci_vx_mem_arb_out_if[0].rsp_data.tag = mem_rsp_tag[0];
assign mem_rsp_ready[0] = cci_vx_mem_arb_out_if[0].rsp_ready;
`UNUSED_VAR (cci_vx_mem_arb_out_if[0].req_data.flags)
// assign other ports to VX memory bus
for (genvar i = 1; i < `VX_MEM_PORTS; ++i) begin : g_mem_bus_if
assign mem_req_valid[i] = vx_mem_bus_if[i].req_valid;
assign mem_req_rw[i] = vx_mem_bus_if[i].req_data.rw;
assign mem_req_addr[i] = vx_mem_bus_if[i].req_data.addr;
assign mem_req_byteen[i]= vx_mem_bus_if[i].req_data.byteen;
assign mem_req_data[i] = vx_mem_bus_if[i].req_data.data;
assign mem_req_tag[i] = AVS_TAG_WIDTH'(vx_mem_bus_if[i].req_data.tag);
assign vx_mem_bus_if[i].req_ready = mem_req_ready[i];
assign vx_mem_bus_if[i].rsp_valid = mem_rsp_valid[i];
assign vx_mem_bus_if[i].rsp_data.data = mem_rsp_data[i];
assign vx_mem_bus_if[i].rsp_data.tag = CCI_VX_TAG_WIDTH'(mem_rsp_tag[i]);
assign mem_rsp_ready[i] = vx_mem_bus_if[i].rsp_ready;
end
// convert merged memory interface to AVS
VX_avs_adapter #(
.DATA_WIDTH (LMEM_DATA_WIDTH),
.ADDR_WIDTH_IN (CCI_VX_ADDR_WIDTH),
.ADDR_WIDTH_OUT(LMEM_ADDR_WIDTH),
.BURST_WIDTH (LMEM_BURST_CTRW),
.NUM_BANKS (NUM_LOCAL_MEM_BANKS),
.TAG_WIDTH (AVS_REQ_TAGW + 1),
.NUM_PORTS_IN (`VX_MEM_PORTS),
.NUM_PORTS_OUT (NUM_LOCAL_MEM_BANKS),
.TAG_WIDTH (AVS_TAG_WIDTH),
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE),
.BANK_INTERLEAVE(`PLATFORM_MEMORY_INTERLEAVE),
.REQ_OUT_BUF (2),
.RSP_OUT_BUF (0)
.INTERLEAVE (`PLATFORM_MEMORY_INTERLEAVE),
.REQ_OUT_BUF (2), // always needed due to CCI/VX arbiter
.RSP_OUT_BUF ((`VX_MEM_PORTS > 1 || NUM_LOCAL_MEM_BANKS > 1) ? 2 : 0)
) avs_adapter (
.clk (clk),
.reset (reset),
// Memory request
.mem_req_valid (mem_bus_if[0].req_valid),
.mem_req_rw (mem_bus_if[0].req_data.rw),
.mem_req_byteen (mem_bus_if[0].req_data.byteen),
.mem_req_addr (mem_bus_if[0].req_data.addr),
.mem_req_data (mem_bus_if[0].req_data.data),
.mem_req_tag (mem_bus_if[0].req_data.tag),
.mem_req_ready (mem_bus_if[0].req_ready),
.mem_req_valid (mem_req_valid),
.mem_req_rw (mem_req_rw),
.mem_req_byteen (mem_req_byteen),
.mem_req_addr (mem_req_addr),
.mem_req_data (mem_req_data),
.mem_req_tag (mem_req_tag),
.mem_req_ready (mem_req_ready),
// Memory response
.mem_rsp_valid (mem_bus_if[0].rsp_valid),
.mem_rsp_data (mem_bus_if[0].rsp_data.data),
.mem_rsp_tag (mem_bus_if[0].rsp_data.tag),
.mem_rsp_ready (mem_bus_if[0].rsp_ready),
.mem_rsp_valid (mem_rsp_valid),
.mem_rsp_data (mem_rsp_data),
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_ready (mem_rsp_ready),
// AVS bus
.avs_writedata (avs_writedata),
@ -647,9 +722,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.avs_readdatavalid(avs_readdatavalid)
);
`UNUSED_VAR (mem_bus_if[0].req_data.flags)
// CCI-P Read Request ///////////////////////////////////////////////////////////
// CCI-P Read Request /////////////////////////////////////////////////////
reg [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_ctr;
wire [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_addr;
@ -818,7 +891,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
end
)
// CCI-P Write Request //////////////////////////////////////////////////////////
// CCI-P Write Request ////////////////////////////////////////////////////
reg [CCI_ADDR_WIDTH-1:0] cci_mem_rd_req_ctr;
reg [CCI_ADDR_WIDTH-1:0] cci_mem_rd_req_addr;
@ -865,14 +938,11 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
`UNUSED_VAR (cci_pending_writes)
assign cci_mem_rd_req_valid = (STATE_MEM_READ == state)
&& ~cci_mem_rd_req_done;
assign cci_mem_rd_req_valid = (STATE_MEM_READ == state) && ~cci_mem_rd_req_done;
assign cci_mem_rsp_ready = ~cp2af_sRxPort.c1TxAlmFull
&& ~cci_pending_writes_full;
assign cci_mem_rsp_ready = ~cp2af_sRxPort.c1TxAlmFull && ~cci_pending_writes_full;
assign cmd_mem_rd_done = cci_wr_req_done
&& cci_pending_writes_empty;
assign cmd_mem_rd_done = cci_wr_req_done && cci_pending_writes_empty;
// Send write requests to CCI
always @(posedge clk) begin
@ -931,11 +1001,11 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
assign cci_mem_req_data = cci_rdq_dout[CCI_RD_QUEUE_DATAW-1:CCI_ADDR_WIDTH];
assign cci_mem_req_tag = cci_mem_req_rw ? cci_mem_wr_req_ctr : cci_mem_rd_req_ctr;
// Vortex ///////////////////////////////////////////////////////////////////
// Vortex /////////////////////////////////////////////////////////////////
wire vx_dcr_wr_valid = (STATE_DCR_WRITE == state);
wire [`VX_DCR_ADDR_WIDTH-1:0] vx_dcr_wr_addr = cmd_dcr_addr;
wire [`VX_DCR_DATA_WIDTH-1:0] vx_dcr_wr_data = cmd_dcr_data;
wire [`VX_DCR_ADDR_WIDTH-1:0] vx_dcr_wr_addr = cmd_dcr_addr;
wire [`VX_DCR_DATA_WIDTH-1:0] vx_dcr_wr_data = cmd_dcr_data;
`SCOPE_IO_SWITCH (2);
@ -969,52 +1039,52 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.busy (vx_busy)
);
// COUT HANDLING //////////////////////////////////////////////////////////////
// COUT HANDLING //////////////////////////////////////////////////////////
wire [COUT_TID_WIDTH-1:0] cout_tid;
for (genvar i = 0; i < `VX_MEM_PORTS; ++i) begin : g_cout
VX_onehot_encoder #(
.N (`VX_MEM_BYTEEN_WIDTH)
) cout_tid_enc (
.data_in (vx_mem_req_byteen),
.data_out (cout_tid),
`UNUSED_PIN (valid_out)
);
wire [COUT_TID_WIDTH-1:0] cout_tid;
wire [`VX_MEM_ADDR_WIDTH-1:0] io_cout_addr_b = `VX_MEM_ADDR_WIDTH'(`IO_COUT_ADDR >> `CLOG2(`MEM_BLOCK_SIZE));
VX_onehot_encoder #(
.N (`VX_MEM_BYTEEN_WIDTH)
) cout_tid_enc (
.data_in (vx_mem_req_byteen[i]),
.data_out (cout_tid),
`UNUSED_PIN (valid_out)
);
assign vx_mem_is_cout = (vx_mem_req_addr == io_cout_addr_b);
wire [`VX_MEM_BYTEEN_WIDTH-1:0][7:0] vx_mem_req_data_m = vx_mem_req_data[i];
assign vx_mem_req_ready = vx_mem_is_cout ? ~cout_q_full : vx_mem_req_ready_qual;
wire [7:0] cout_char = vx_mem_req_data_m[cout_tid];
wire [`VX_MEM_BYTEEN_WIDTH-1:0][7:0] vx_mem_req_data_m = vx_mem_req_data;
wire [`VX_MEM_ADDR_WIDTH-1:0] io_cout_addr_b = `VX_MEM_ADDR_WIDTH'(`IO_COUT_ADDR >> `CLOG2(`MEM_BLOCK_SIZE));
wire [7:0] cout_char = vx_mem_req_data_m[cout_tid];
wire vx_mem_is_cout = (vx_mem_req_addr[i] == io_cout_addr_b);
wire cout_q_push = vx_mem_req_valid && vx_mem_is_cout && ~cout_q_full;
assign vx_mem_req_valid_qual[i] = vx_mem_req_valid[i] && ~vx_mem_is_cout;
assign vx_mem_req_ready[i] = vx_mem_is_cout ? ~cout_q_full[i] : vx_mem_req_ready_qual[i];
wire cout_q_pop = cp2af_sRxPort.c0.mmioRdValid
&& (mmio_req_hdr.address == MMIO_STATUS)
&& ~cout_q_empty;
wire cout_q_push = vx_mem_req_valid[i] && vx_mem_is_cout && ~cout_q_full[i];
VX_fifo_queue #(
.DATAW (COUT_QUEUE_DATAW),
.DEPTH (COUT_QUEUE_SIZE)
) cout_queue (
.clk (clk),
.reset (reset),
.push (cout_q_push),
.pop (cout_q_pop),
.data_in ({cout_tid, cout_char}),
.data_out (cout_q_dout),
.empty (cout_q_empty),
.full (cout_q_full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
VX_fifo_queue #(
.DATAW (COUT_QUEUE_DATAW),
.DEPTH (COUT_QUEUE_SIZE)
) cout_queue (
.clk (clk),
.reset (reset),
.push (cout_q_push),
.pop (cout_q_pop[i]),
.data_in ({cout_tid, cout_char}),
.data_out (cout_q_dout[i]),
.empty (cout_q_empty[i]),
.full (cout_q_full[i]),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
// SCOPE //////////////////////////////////////////////////////////////////////
// SCOPE //////////////////////////////////////////////////////////////////
`ifdef DBG_SCOPE_AFU
reg [STATE_WIDTH-1:0] state_prev;
@ -1022,18 +1092,18 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
state_prev <= state;
end
wire state_changed = (state != state_prev);
wire vx_mem_req_fire = vx_mem_req_valid && vx_mem_req_ready;
wire vx_mem_rsp_fire = vx_mem_rsp_valid && vx_mem_rsp_ready;
wire vx_mem_req_fire = vx_mem_req_valid[0] && vx_mem_req_ready[0];
wire vx_mem_rsp_fire = vx_mem_rsp_valid[0] && vx_mem_rsp_ready[0];
wire avs_req_fire = (avs_write[0] || avs_read[0]) && ~avs_waitrequest[0];
`NEG_EDGE (reset_negedge, reset);
`SCOPE_TAP (0, 0, {
vx_reset,
vx_busy,
vx_mem_req_valid,
vx_mem_req_ready,
vx_mem_rsp_valid,
vx_mem_rsp_ready,
vx_mem_req_valid[0],
vx_mem_req_ready[0],
vx_mem_rsp_valid[0],
vx_mem_rsp_ready[0],
avs_read[0],
avs_write[0],
avs_waitrequest[0],
@ -1060,13 +1130,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
},{
cmd_type,
state,
vx_mem_req_rw,
vx_mem_req_byteen,
vx_mem_req_addr,
vx_mem_req_data,
vx_mem_req_tag,
vx_mem_rsp_data,
vx_mem_rsp_tag,
vx_mem_req_rw[0],
vx_mem_req_byteen[0],
vx_mem_req_addr[0],
vx_mem_req_data[0],
vx_mem_req_tag[0],
vx_mem_rsp_data[0],
vx_mem_rsp_tag[0],
vx_dcr_wr_addr,
vx_dcr_wr_data,
mmio_req_hdr.address,
@ -1089,19 +1159,19 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
`SCOPE_IO_UNUSED(0)
`endif
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
`ifdef DBG_TRACE_AFU
always @(posedge clk) begin
for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin
if (avs_write[i] && ~avs_waitrequest[i]) begin
`TRACE(2, ("%t: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]))
`TRACE(2, ("%t: AVS Wr Req[%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]))
end
if (avs_read[i] && ~avs_waitrequest[i]) begin
`TRACE(2, ("%t: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i]))
`TRACE(2, ("%t: AVS Rd Req[%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i]))
end
if (avs_readdatavalid[i]) begin
`TRACE(2, ("%t: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i]))
`TRACE(2, ("%t: AVS Rd Rsp[%0d]: data=0x%h\n", $time, i, avs_readdata[i]))
end
end
end

View file

@ -595,7 +595,7 @@ module VX_cache_bank #(
if (DIRTY_BYTES) begin : g_dirty_bytes
// ensure dirty bytes match the tag info
wire has_dirty_bytes = (| evict_byteen_st1);
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (is_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, is_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)))
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (is_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, is_dirty_st1, has_dirty_bytes, `CS_BANK_TO_FULL_ADDR(addr_st1, BANK_ID)))
end
// issue a fill request on a read/write miss
// issue a writeback on a dirty line eviction
@ -691,6 +691,14 @@ module VX_cache_bank #(
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
wire input_stall = (replay_valid || mem_rsp_valid || core_req_valid || flush_valid)
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
wire [`XLEN-1:0] mem_rsp_full_addr = `CS_BANK_TO_FULL_ADDR(mem_rsp_addr, BANK_ID);
wire [`XLEN-1:0] replay_full_addr = `CS_BANK_TO_FULL_ADDR(replay_addr, BANK_ID);
wire [`XLEN-1:0] core_req_full_addr = `CS_BANK_TO_FULL_ADDR(core_req_addr, BANK_ID);
wire [`XLEN-1:0] full_addr_st0 = `CS_BANK_TO_FULL_ADDR(addr_st0, BANK_ID);
wire [`XLEN-1:0] full_addr_st1 = `CS_BANK_TO_FULL_ADDR(addr_st1, BANK_ID);
wire [`XLEN-1:0] mreq_queue_full_addr = `CS_BANK_TO_FULL_ADDR(mreq_queue_addr, BANK_ID);
always @(posedge clk) begin
if (input_stall || pipe_stall) begin
`TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID,
@ -698,71 +706,71 @@ module VX_cache_bank #(
end
if (mem_rsp_fire) begin
`TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel))
mem_rsp_full_addr, mem_rsp_id, mem_rsp_data, req_uuid_sel))
end
if (replay_fire) begin
`TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel))
replay_full_addr, replay_tag, replay_idx, req_uuid_sel))
end
if (core_req_fire) begin
if (core_req_rw) begin
`TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel))
core_req_full_addr, core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel))
end else begin
`TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel))
core_req_full_addr, core_req_tag, core_req_idx, req_uuid_sel))
end
end
if (do_init_st0) begin
`TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), line_idx_st0))
`TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, full_addr_st0, line_idx_st0))
end
if (do_fill_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%0d, line=%0d, dirty=%b (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, is_dirty_st0, req_uuid_st0))
full_addr_st0, evict_way_st0, line_idx_st0, is_dirty_st0, req_uuid_st0))
end
if (do_flush_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%0d, line=%0d, dirty=%b (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, is_dirty_st0, req_uuid_st0))
full_addr_st0, evict_way_st0, line_idx_st0, is_dirty_st0, req_uuid_st0))
end
if (do_lookup_st0 && ~pipe_stall) begin
if (is_hit_st0) begin
`TRACE(3, ("%t: %s tags-hit: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), rw_st0, way_idx_st0, line_idx_st0, line_tag_st0, req_uuid_st0))
full_addr_st0, rw_st0, way_idx_st0, line_idx_st0, line_tag_st0, req_uuid_st0))
end else begin
`TRACE(3, ("%t: %s tags-miss: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), rw_st0, way_idx_st0, line_idx_st0, line_tag_st0, req_uuid_st0))
full_addr_st0, rw_st0, way_idx_st0, line_idx_st0, line_tag_st0, req_uuid_st0))
end
end
if (do_fill_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%0d, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), way_idx_st0, line_idx_st0, data_st0, req_uuid_st0))
full_addr_st0, way_idx_st0, line_idx_st0, data_st0, req_uuid_st0))
end
if (do_flush_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), way_idx_st0, line_idx_st0, req_uuid_st0))
full_addr_st0, way_idx_st0, line_idx_st0, req_uuid_st0))
end
if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, crsp_queue_data, req_uuid_st1))
full_addr_st1, way_idx_st1, line_idx_st1, word_idx_st1, crsp_queue_data, req_uuid_st1))
end
if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1))
full_addr_st1, way_idx_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1))
end
if (crsp_queue_fire) begin
`TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1))
full_addr_st1, crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1))
end
if (mreq_queue_push) begin
if (!WRITEBACK && do_write_st1) begin
`TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
mreq_queue_full_addr, mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
end else if (WRITEBACK && do_writeback_st1) begin
`TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
mreq_queue_full_addr, mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
end else begin
`TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1))
mreq_queue_full_addr, mshr_id_st1, req_uuid_st1))
end
end
end

View file

@ -189,7 +189,7 @@ module VX_cache_bypass #(
VX_bits_insert #(
.N (MEM_TAG_NC1_WIDTH),
.S (WSEL_BITS),
.POS (MEM_TAG_ID_WIDTH)
.POS (TAG_SEL_IDX)
) wsel_insert (
.data_in (core_req_nc_arb_tag),
.ins_in (req_wsel),
@ -198,7 +198,7 @@ module VX_cache_bypass #(
VX_bits_remove #(
.N (MEM_TAG_NC2_WIDTH),
.S (WSEL_BITS),
.POS (MEM_TAG_ID_WIDTH)
.POS (TAG_SEL_IDX)
) wsel_remove (
.data_in (mem_bus_out_nc_if[i].rsp_data.tag),
.sel_out (rsp_wsel),

View file

@ -55,7 +55,7 @@
///////////////////////////////////////////////////////////////////////////////
`define CS_LINE_TO_FULL_ADDR(x, i) {x, (`XLEN-$bits(x))'(i << (`XLEN-$bits(x)-`CS_BANK_SEL_BITS))}
`define CS_BANK_TO_FULL_ADDR(x, b) {x, (`XLEN-$bits(x))'(b << (`XLEN-$bits(x)-`CS_BANK_SEL_BITS))}
`define CS_MEM_TO_FULL_ADDR(x) {x, (`XLEN-$bits(x))'(0)}
///////////////////////////////////////////////////////////////////////////////

View file

@ -210,13 +210,13 @@ module VX_cache_mshr #(
end
`RUNTIME_ASSERT(~(allocate_fire && valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, alc_req_uuid))
`CS_BANK_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, alc_req_uuid))
`RUNTIME_ASSERT(~(finalize_valid && ~valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
`CS_BANK_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
`RUNTIME_ASSERT(~(fill_valid && ~valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
`CS_BANK_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
VX_dp_ram #(
.DATAW (DATA_WIDTH),
@ -262,7 +262,7 @@ module VX_cache_mshr #(
end
if (allocate_fire) begin
`TRACE(3, ("%t: %s allocate: addr=0x%0h, id=%0d, pending=%b, prev=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id, allocate_pending, prev_idx, alc_req_uuid))
`CS_BANK_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id, allocate_pending, prev_idx, alc_req_uuid))
end
if (finalize_valid && finalize_is_release) begin
`TRACE(3, ("%t: %s release: id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_id, fin_req_uuid))
@ -272,17 +272,17 @@ module VX_cache_mshr #(
end
if (fill_valid) begin
`TRACE(3, ("%t: %s fill: addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id))
`CS_BANK_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id))
end
if (dequeue_fire) begin
`TRACE(3, ("%t: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid))
`CS_BANK_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid))
end
if (show_table) begin
`TRACE(3, ("%t: %s table", $time, INSTANCE_ID))
for (integer i = 0; i < MSHR_SIZE; ++i) begin
if (valid_table[i]) begin
`TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID)))
`TRACE(3, (" %0d=0x%0h", i, `CS_BANK_TO_FULL_ADDR(addr_table[i], BANK_ID)))
if (write_table[i]) begin
`TRACE(3, ("(w)"))
end else begin

View file

@ -234,13 +234,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
always @(posedge clk) begin
if (core_bus_if[i].req_valid && core_bus_if[i].req_ready) begin
if (core_bus_if[i].req_data.rw) begin
`TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag.value, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_bus_if[i].req_data.tag.uuid))
`TRACE(2, ("%t: %s core-wr-req[%0d]: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag.value, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_bus_if[i].req_data.tag.uuid))
end else begin
`TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag.value, i, core_bus_if[i].req_data.tag.uuid))
`TRACE(2, ("%t: %s core-rd-req[%0d]: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, i, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag.value, i, core_bus_if[i].req_data.tag.uuid))
end
end
if (core_bus_if[i].rsp_valid && core_bus_if[i].rsp_ready) begin
`TRACE(2, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag.value, i, core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag.uuid))
`TRACE(2, ("%t: %s core-rd-rsp[%0d]: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, core_bus_if[i].rsp_data.tag.value, i, core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag.uuid))
end
end
end
@ -249,16 +249,16 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
always @(posedge clk) begin
if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin
if (mem_bus_if[i].req_data.rw) begin
`TRACE(2, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag.value, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, mem_bus_if[i].req_data.tag.uuid))
`TRACE(2, ("%t: %s mem-wr-req[%0d]: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n",
$time, INSTANCE_ID, i, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag.value, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, mem_bus_if[i].req_data.tag.uuid))
end else begin
`TRACE(2, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag.value, mem_bus_if[i].req_data.tag.uuid))
`TRACE(2, ("%t: %s mem-rd-req[%0d]: addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag.value, mem_bus_if[i].req_data.tag.uuid))
end
end
if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin
`TRACE(2, ("%t: %s mem-rd-rsp: data=0x%h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, mem_bus_if[i].rsp_data.data[i], mem_bus_if[i].rsp_data.tag.value, mem_bus_if[i].rsp_data.tag.uuid))
`TRACE(2, ("%t: %s mem-rd-rsp[%0d]: data=0x%h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.data, mem_bus_if[i].rsp_data.tag.value, mem_bus_if[i].rsp_data.tag.uuid))
end
end
end

View file

@ -19,10 +19,12 @@ module VX_avs_adapter #(
parameter ADDR_WIDTH_IN = 1,
parameter ADDR_WIDTH_OUT= 32,
parameter BURST_WIDTH = 1,
parameter NUM_BANKS = 1,
parameter NUM_PORTS_IN = 1,
parameter NUM_PORTS_OUT = 1,
parameter TAG_WIDTH = 1,
parameter RD_QUEUE_SIZE = 1,
parameter BANK_INTERLEAVE= 0,
parameter INTERLEAVE = 0,
parameter ARBITER = "R",
parameter REQ_OUT_BUF = 0,
parameter RSP_OUT_BUF = 0
) (
@ -30,152 +32,224 @@ module VX_avs_adapter #(
input wire reset,
// Memory request
input wire mem_req_valid,
input wire mem_req_rw,
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
input wire [ADDR_WIDTH_IN-1:0] mem_req_addr,
input wire [DATA_WIDTH-1:0] mem_req_data,
input wire [TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_ready,
input wire mem_req_valid [NUM_PORTS_IN],
input wire mem_req_rw [NUM_PORTS_IN],
input wire [DATA_WIDTH/8-1:0] mem_req_byteen [NUM_PORTS_IN],
input wire [ADDR_WIDTH_IN-1:0] mem_req_addr [NUM_PORTS_IN],
input wire [DATA_WIDTH-1:0] mem_req_data [NUM_PORTS_IN],
input wire [TAG_WIDTH-1:0] mem_req_tag [NUM_PORTS_IN],
output wire mem_req_ready [NUM_PORTS_IN],
// Memory response
output wire mem_rsp_valid,
output wire [DATA_WIDTH-1:0] mem_rsp_data,
output wire [TAG_WIDTH-1:0] mem_rsp_tag,
input wire mem_rsp_ready,
output wire mem_rsp_valid [NUM_PORTS_IN],
output wire [DATA_WIDTH-1:0] mem_rsp_data [NUM_PORTS_IN],
output wire [TAG_WIDTH-1:0] mem_rsp_tag [NUM_PORTS_IN],
input wire mem_rsp_ready [NUM_PORTS_IN],
// AVS bus
output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS],
input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS],
output wire [ADDR_WIDTH_OUT-1:0] avs_address [NUM_BANKS],
input wire avs_waitrequest [NUM_BANKS],
output wire avs_write [NUM_BANKS],
output wire avs_read [NUM_BANKS],
output wire [DATA_WIDTH/8-1:0] avs_byteenable [NUM_BANKS],
output wire [BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS],
input wire avs_readdatavalid [NUM_BANKS]
output wire [DATA_WIDTH-1:0] avs_writedata [NUM_PORTS_OUT],
input wire [DATA_WIDTH-1:0] avs_readdata [NUM_PORTS_OUT],
output wire [ADDR_WIDTH_OUT-1:0] avs_address [NUM_PORTS_OUT],
input wire avs_waitrequest [NUM_PORTS_OUT],
output wire avs_write [NUM_PORTS_OUT],
output wire avs_read [NUM_PORTS_OUT],
output wire [DATA_WIDTH/8-1:0] avs_byteenable [NUM_PORTS_OUT],
output wire [BURST_WIDTH-1:0] avs_burstcount [NUM_PORTS_OUT],
input wire avs_readdatavalid [NUM_PORTS_OUT]
);
localparam DATA_SIZE = DATA_WIDTH/8;
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS; // to input space
localparam BANK_OFFSETW = DST_ADDR_WDITH - BANK_SEL_BITS;
localparam PORT_SEL_BITS = `CLOG2(NUM_PORTS_OUT);
localparam PORT_SEL_WIDTH = `UP(PORT_SEL_BITS);
localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + PORT_SEL_BITS; // to input space
localparam PORT_OFFSETW = DST_ADDR_WDITH - PORT_SEL_BITS;
localparam NUM_PORTS_IN_BITS = `CLOG2(NUM_PORTS_IN);
localparam NUM_PORTS_IN_WIDTH = `UP(NUM_PORTS_IN_BITS);
localparam REQ_QUEUE_DATAW = TAG_WIDTH + NUM_PORTS_IN_BITS;
localparam ARB_DATAW = 1 + PORT_OFFSETW + DATA_WIDTH + DATA_SIZE + TAG_WIDTH;
localparam RSP_DATAW = DATA_WIDTH + TAG_WIDTH;
`STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN))
// Requests handling //////////////////////////////////////////////////////
// Ports selection
wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out;
wire [NUM_BANKS-1:0] req_queue_going_full;
wire [NUM_BANKS-1:0] bank_req_ready;
wire [NUM_PORTS_IN-1:0][PORT_SEL_WIDTH-1:0] req_port_out_sel;
wire [NUM_PORTS_IN-1:0][PORT_OFFSETW-1:0] req_port_out_off;
wire [BANK_OFFSETW-1:0] req_bank_off;
wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr);
if (NUM_BANKS > 1) begin : g_bank_sel
if (BANK_INTERLEAVE) begin : g_interleave
assign req_bank_sel = mem_req_addr_out[BANK_SEL_BITS-1:0];
assign req_bank_off = mem_req_addr_out[BANK_SEL_BITS +: BANK_OFFSETW];
end else begin : g_no_interleave
assign req_bank_sel = mem_req_addr_out[BANK_OFFSETW +: BANK_SEL_BITS];
assign req_bank_off = mem_req_addr_out[BANK_OFFSETW-1:0];
if (NUM_PORTS_OUT > 1) begin : g_port_sel
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_i
wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr[i]);
if (INTERLEAVE) begin : g_interleave
assign req_port_out_sel[i] = mem_req_addr_out[PORT_SEL_BITS-1:0];
assign req_port_out_off[i] = mem_req_addr_out[PORT_SEL_BITS +: PORT_OFFSETW];
end else begin : g_no_interleave
assign req_port_out_sel[i] = mem_req_addr_out[PORT_OFFSETW +: PORT_SEL_BITS];
assign req_port_out_off[i] = mem_req_addr_out[PORT_OFFSETW-1:0];
end
end
end else begin : g_no_port_sel
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_i
assign req_port_out_sel[i] = '0;
assign req_port_out_off[i] = DST_ADDR_WDITH'(mem_req_addr[i]);
end
end else begin : g_no_bank_sel
assign req_bank_sel = '0;
assign req_bank_off = mem_req_addr_out;
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push
assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i);
// Request ack
wire [NUM_PORTS_OUT-1:0][NUM_PORTS_IN-1:0] arb_ready_in;
wire [NUM_PORTS_IN-1:0][NUM_PORTS_OUT-1:0] arb_ready_in_w;
VX_transpose #(
.N (NUM_PORTS_OUT),
.M (NUM_PORTS_IN)
) rdy_in_transpose (
.data_in (arb_ready_in),
.data_out (arb_ready_in_w)
);
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_ready_in
assign mem_req_ready[i] = | arb_ready_in_w[i];
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_pending_sizes
// Request handling ///////////////////////////////////////////////////////
wire [NUM_PORTS_OUT-1:0][REQ_QUEUE_DATAW-1:0] rd_req_queue_data_out;
wire [NUM_PORTS_OUT-1:0] rd_req_queue_pop;
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_requests
wire [PORT_OFFSETW-1:0] arb_addr_out;
wire [TAG_WIDTH-1:0] arb_tag_out;
wire [NUM_PORTS_IN_WIDTH-1:0] arb_sel_out;
wire [DATA_WIDTH-1:0] arb_data_out;
wire [DATA_SIZE-1:0] arb_byteen_out;
wire arb_valid_out, arb_ready_out;
wire arb_rw_out;
wire [NUM_PORTS_IN-1:0][ARB_DATAW-1:0] arb_data_in;
wire [NUM_PORTS_IN-1:0] arb_valid_in;
for (genvar j = 0; j < NUM_PORTS_IN; ++j) begin : g_valid_in
assign arb_valid_in[j] = mem_req_valid[j] && (req_port_out_sel[j] == i);
end
for (genvar j = 0; j < NUM_PORTS_IN; ++j) begin : g_data_in
assign arb_data_in[j] = {mem_req_rw[j], req_port_out_off[j], mem_req_byteen[j], mem_req_data[j], mem_req_tag[j]};
end
VX_stream_arb #(
.NUM_INPUTS (NUM_PORTS_IN),
.NUM_OUTPUTS(1),
.DATAW (ARB_DATAW),
.ARBITER (ARBITER)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (arb_valid_in),
.ready_in (arb_ready_in[i]),
.data_in (arb_data_in),
.data_out ({arb_rw_out, arb_addr_out, arb_byteen_out, arb_data_out, arb_tag_out}),
.valid_out (arb_valid_out),
.ready_out (arb_ready_out),
.sel_out (arb_sel_out)
);
wire rd_req_queue_going_full;
wire rd_req_queue_push;
assign rd_req_queue_push = arb_valid_out && arb_ready_out && ~arb_rw_out;
VX_pending_size #(
.SIZE (RD_QUEUE_SIZE)
) pending_size (
.clk (clk),
.reset (reset),
.incr (req_queue_push[i]),
.decr (req_queue_pop[i]),
.incr (rd_req_queue_push),
.decr (rd_req_queue_pop[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (alm_empty),
.full (req_queue_going_full[i]),
.full (rd_req_queue_going_full),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rd_req_queues
wire [REQ_QUEUE_DATAW-1:0] rd_req_queue_data_in;
if (NUM_PORTS_IN > 1) begin : g_input_sel
assign rd_req_queue_data_in = {arb_tag_out, arb_sel_out};
end else begin : g_no_input_sel
`UNUSED_VAR (arb_sel_out)
assign rd_req_queue_data_in = arb_tag_out;
end
VX_fifo_queue #(
.DATAW (TAG_WIDTH),
.DATAW (REQ_QUEUE_DATAW),
.DEPTH (RD_QUEUE_SIZE)
) rd_req_queue (
.clk (clk),
.reset (reset),
.push (req_queue_push[i]),
.pop (req_queue_pop[i]),
.data_in (mem_req_tag),
.data_out (req_queue_tag_out[i]),
.push (rd_req_queue_push),
.pop (rd_req_queue_pop[i]),
.data_in (rd_req_queue_data_in),
.data_out (rd_req_queue_data_out[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_out_bufs
wire valid_out;
wire rw_out;
wire [DATA_SIZE-1:0] byteen_out;
wire [BANK_OFFSETW-1:0] addr_out;
wire [DATA_WIDTH-1:0] data_out;
wire ready_out;
wire buf_valid_out;
wire buf_rw_out;
wire [DATA_SIZE-1:0] buf_byteen_out;
wire [PORT_OFFSETW-1:0] buf_addr_out;
wire [DATA_WIDTH-1:0] buf_data_out;
wire buf_ready_out;
wire valid_out_w = mem_req_valid && ~req_queue_going_full[i] && (req_bank_sel == i);
wire ready_out_w;
// stall pipeline if the request queue is needed and going full
wire arb_valid_out_w, arb_ready_out_w;
wire rd_req_queue_ready = arb_rw_out || ~rd_req_queue_going_full;
assign arb_valid_out_w = arb_valid_out && rd_req_queue_ready;
assign arb_ready_out = arb_ready_out_w && rd_req_queue_ready;
VX_elastic_buffer #(
.DATAW (1 + DATA_SIZE + BANK_OFFSETW + DATA_WIDTH),
.DATAW (1 + DATA_SIZE + PORT_OFFSETW + DATA_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF))
) req_out_buf (
) req_buf (
.clk (clk),
.reset (reset),
.valid_in (valid_out_w),
.ready_in (ready_out_w),
.data_in ({mem_req_rw, mem_req_byteen, req_bank_off, mem_req_data}),
.data_out ({rw_out, byteen_out, addr_out, data_out}),
.valid_out (valid_out),
.ready_out (ready_out)
.valid_in (arb_valid_out_w),
.ready_in (arb_ready_out_w),
.data_in ({arb_rw_out, arb_byteen_out, arb_addr_out, arb_data_out}),
.data_out ({buf_rw_out, buf_byteen_out, buf_addr_out, buf_data_out}),
.valid_out (buf_valid_out),
.ready_out (buf_ready_out)
);
assign avs_read[i] = valid_out && ~rw_out;
assign avs_write[i] = valid_out && rw_out;
assign avs_address[i] = ADDR_WIDTH_OUT'(addr_out);
assign avs_byteenable[i] = byteen_out;
assign avs_writedata[i] = data_out;
assign avs_read[i] = buf_valid_out && ~buf_rw_out;
assign avs_write[i] = buf_valid_out && buf_rw_out;
assign avs_address[i] = ADDR_WIDTH_OUT'(buf_addr_out);
assign avs_byteenable[i] = buf_byteen_out;
assign avs_writedata[i] = buf_data_out;
assign avs_burstcount[i] = BURST_WIDTH'(1);
assign ready_out = ~avs_waitrequest[i];
assign bank_req_ready[i] = ready_out_w && ~req_queue_going_full[i];
assign buf_ready_out = ~avs_waitrequest[i];
end
assign mem_req_ready = bank_req_ready[req_bank_sel];
// Responses handling /////////////////////////////////////////////////////
wire [NUM_BANKS-1:0] rsp_arb_valid_in;
wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in;
wire [NUM_BANKS-1:0] rsp_arb_ready_in;
wire [NUM_PORTS_OUT-1:0] rd_rsp_valid_in;
wire [NUM_PORTS_OUT-1:0][RSP_DATAW-1:0] rd_rsp_data_in;
wire [NUM_PORTS_OUT-1:0][NUM_PORTS_IN_WIDTH-1:0] rd_rsp_sel_in;
wire [NUM_PORTS_OUT-1:0] rd_rsp_ready_in;
wire [NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_queue_data_out;
wire [NUM_BANKS-1:0] rsp_queue_empty;
wire [NUM_PORTS_IN-1:0] rd_rsp_valid_out;
wire [NUM_PORTS_IN-1:0][RSP_DATAW-1:0] rd_rsp_data_out;
wire [NUM_PORTS_IN-1:0] rd_rsp_ready_out;
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_rd_rsp_queues
wire [DATA_WIDTH-1:0] rd_rsp_queue_data_out;
wire rd_rsp_queue_empty;
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rd_rsp_queues
VX_fifo_queue #(
.DATAW (DATA_WIDTH),
.DEPTH (RD_QUEUE_SIZE)
@ -183,39 +257,51 @@ module VX_avs_adapter #(
.clk (clk),
.reset (reset),
.push (avs_readdatavalid[i]),
.pop (req_queue_pop[i]),
.pop (rd_req_queue_pop[i]),
.data_in (avs_readdata[i]),
.data_out (rsp_queue_data_out[i]),
.empty (rsp_queue_empty[i]),
.data_out (rd_rsp_queue_data_out),
.empty (rd_rsp_queue_empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
assign rd_rsp_valid_in[i] = ~rd_rsp_queue_empty;
assign rd_rsp_data_in[i] = {rd_rsp_queue_data_out, rd_req_queue_data_out[i][NUM_PORTS_IN_BITS +: TAG_WIDTH]};
if (NUM_PORTS_IN > 1) begin : g_input_sel
assign rd_rsp_sel_in[i] = rd_req_queue_data_out[i][0 +: NUM_PORTS_IN_BITS];
end else begin : g_no_input_sel
assign rd_rsp_sel_in[i] = 0;
end
assign rd_req_queue_pop[i] = rd_rsp_valid_in[i] && rd_rsp_ready_in[i];
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rsp_arbs
assign rsp_arb_valid_in[i] = ~rsp_queue_empty[i];
assign rsp_arb_data_in[i] = {rsp_queue_data_out[i], req_queue_tag_out[i]};
assign req_queue_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i];
end
VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS),
.DATAW (DATA_WIDTH + TAG_WIDTH),
.ARBITER ("R"),
VX_stream_xbar #(
.NUM_INPUTS (NUM_PORTS_OUT),
.NUM_OUTPUTS(NUM_PORTS_IN),
.DATAW (RSP_DATAW),
.ARBITER (ARBITER),
.OUT_BUF (RSP_OUT_BUF)
) rsp_arb (
) rd_rsp_xbar (
.clk (clk),
.reset (reset),
.valid_in (rsp_arb_valid_in),
.data_in (rsp_arb_data_in),
.ready_in (rsp_arb_ready_in),
.data_out ({mem_rsp_data, mem_rsp_tag}),
.valid_out (mem_rsp_valid),
.ready_out (mem_rsp_ready),
.valid_in (rd_rsp_valid_in),
.data_in (rd_rsp_data_in),
.ready_in (rd_rsp_ready_in),
.sel_in (rd_rsp_sel_in),
.data_out (rd_rsp_data_out),
.valid_out (rd_rsp_valid_out),
.ready_out (rd_rsp_ready_out),
`UNUSED_PIN (collisions),
`UNUSED_PIN (sel_out)
);
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_rd_rsp_data_out
assign mem_rsp_valid[i] = rd_rsp_valid_out[i];
assign {mem_rsp_data[i], mem_rsp_tag[i]} = rd_rsp_data_out[i];
assign rd_rsp_ready_out[i] = mem_rsp_ready[i];
end
endmodule
`TRACING_ON

View file

@ -105,21 +105,24 @@ module VX_axi_adapter #(
localparam NUM_PORTS_IN_WIDTH = `UP(NUM_PORTS_IN_BITS);
localparam TAG_BUFFER_ADDRW = `CLOG2(TAG_BUFFER_SIZE);
localparam NEEDED_TAG_WIDTH = TAG_WIDTH_IN + NUM_PORTS_IN_BITS;
localparam RD_TAG_WIDTH = (NEEDED_TAG_WIDTH > TAG_WIDTH_OUT) ? TAG_BUFFER_ADDRW : TAG_WIDTH_IN;
localparam RD_FULL_TAG_WIDTH = RD_TAG_WIDTH + PORT_SEL_BITS;
localparam DST_TAG_WIDTH = `MAX(RD_FULL_TAG_WIDTH, TAG_WIDTH_IN);
localparam READ_TAG_WIDTH = (NEEDED_TAG_WIDTH > TAG_WIDTH_OUT) ? TAG_BUFFER_ADDRW : TAG_WIDTH_IN;
localparam READ_FULL_TAG_WIDTH = READ_TAG_WIDTH + PORT_SEL_BITS;
localparam WRITE_TAG_WIDTH = `MIN(TAG_WIDTH_IN, TAG_WIDTH_OUT);
localparam DST_TAG_WIDTH = `MAX(READ_FULL_TAG_WIDTH, WRITE_TAG_WIDTH);
localparam ARB_TAG_WIDTH = `MAX(READ_TAG_WIDTH, WRITE_TAG_WIDTH);
localparam ARB_DATAW = 1 + PORT_OFFSETW + DATA_SIZE + DATA_WIDTH + ARB_TAG_WIDTH;
`STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN))
`STATIC_ASSERT ((TAG_WIDTH_OUT >= DST_TAG_WIDTH), ("invalid output tag width: current=%0d, expected=%0d", TAG_WIDTH_OUT, DST_TAG_WIDTH))
// PORT selection
// Ports selection
wire [NUM_PORTS_IN-1:0][PORT_SEL_WIDTH-1:0] req_port_out_sel;
wire [NUM_PORTS_IN-1:0][PORT_OFFSETW-1:0] req_port_out_off;
if (NUM_PORTS_OUT > 1) begin : g_port_sel
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_i
wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr[i]);
if (PORT_INTERLEAVE) begin : g_interleave
if (INTERLEAVE) begin : g_interleave
assign req_port_out_sel[i] = mem_req_addr_out[PORT_SEL_BITS-1:0];
assign req_port_out_off[i] = mem_req_addr_out[PORT_SEL_BITS +: PORT_OFFSETW];
end else begin : g_no_interleave
@ -136,8 +139,8 @@ module VX_axi_adapter #(
// Tag handling logic
wire [NUM_PORTS_IN-1:0] mem_rd_req_tag_ready;
wire [NUM_PORTS_IN-1:0][RD_TAG_WIDTH-1:0] mem_rd_req_tag;
wire [NUM_PORTS_IN-1:0][RD_TAG_WIDTH-1:0] mem_rd_rsp_tag;
wire [NUM_PORTS_IN-1:0][READ_TAG_WIDTH-1:0] mem_rd_req_tag;
wire [NUM_PORTS_IN-1:0][READ_TAG_WIDTH-1:0] mem_rd_rsp_tag;
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_tag_buf
if (NEEDED_TAG_WIDTH > TAG_WIDTH_OUT) begin : g_enabled
@ -209,13 +212,10 @@ module VX_axi_adapter #(
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_axi_write_req
localparam ARB_TAG_WIDTH = `MAX(RD_TAG_WIDTH, TAG_WIDTH_IN);
localparam ARB_DATAW = 1 + PORT_OFFSETW + DATA_SIZE + DATA_WIDTH + ARB_TAG_WIDTH;
wire [PORT_OFFSETW-1:0] arb_addr_out, buf_addr_r_out, buf_addr_w_out;
wire [ARB_TAG_WIDTH-1:0] arb_tag_out;
wire [TAG_WIDTH_IN-1:0] buf_tag_w_out;
wire [RD_TAG_WIDTH-1:0] buf_tag_r_out;
wire [WRITE_TAG_WIDTH-1:0] buf_tag_w_out;
wire [READ_TAG_WIDTH-1:0] buf_tag_r_out;
wire [NUM_PORTS_IN_WIDTH-1:0] arb_sel_out, buf_sel_out;
wire [DATA_WIDTH-1:0] arb_data_out;
wire [DATA_SIZE-1:0] arb_byteen_out;
@ -261,7 +261,7 @@ module VX_axi_adapter #(
assign m_axi_awvalid_w[i] = arb_valid_out && arb_rw_out && ~m_axi_aw_ack[i];
VX_elastic_buffer #(
.DATAW (PORT_OFFSETW + TAG_WIDTH_IN),
.DATAW (PORT_OFFSETW + WRITE_TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF)),
.LUTRAM (`TO_OUT_BUF_LUTRAM(REQ_OUT_BUF))
@ -270,7 +270,7 @@ module VX_axi_adapter #(
.reset (reset),
.valid_in (m_axi_awvalid_w[i]),
.ready_in (m_axi_awready_w[i]),
.data_in ({arb_addr_out, TAG_WIDTH_IN'(arb_tag_out)}),
.data_in ({arb_addr_out, WRITE_TAG_WIDTH'(arb_tag_out)}),
.data_out ({buf_addr_w_out, buf_tag_w_out}),
.valid_out (m_axi_awvalid[i]),
.ready_out (m_axi_awready[i])
@ -312,7 +312,7 @@ module VX_axi_adapter #(
// AXI read address channel
VX_elastic_buffer #(
.DATAW (PORT_OFFSETW + RD_TAG_WIDTH + NUM_PORTS_IN_WIDTH),
.DATAW (PORT_OFFSETW + READ_TAG_WIDTH + NUM_PORTS_IN_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF)),
.LUTRAM (`TO_OUT_BUF_LUTRAM(REQ_OUT_BUF))
@ -321,7 +321,7 @@ module VX_axi_adapter #(
.reset (reset),
.valid_in (arb_valid_out && ~arb_rw_out),
.ready_in (m_axi_arready_w),
.data_in ({arb_addr_out, RD_TAG_WIDTH'(arb_tag_out), arb_sel_out}),
.data_in ({arb_addr_out, READ_TAG_WIDTH'(arb_tag_out), arb_sel_out}),
.data_out ({buf_addr_r_out, buf_tag_r_out, buf_sel_out}),
.valid_out (m_axi_arvalid[i]),
.ready_out (m_axi_arready[i])
@ -359,13 +359,13 @@ module VX_axi_adapter #(
// AXI read response channel
wire [NUM_PORTS_OUT-1:0] rd_rsp_valid_in;
wire [NUM_PORTS_OUT-1:0][DATA_WIDTH+RD_TAG_WIDTH-1:0] rd_rsp_data_in;
wire [NUM_PORTS_OUT-1:0] rd_rsp_ready_in;
wire [NUM_PORTS_OUT-1:0][DATA_WIDTH+READ_TAG_WIDTH-1:0] rd_rsp_data_in;
wire [NUM_PORTS_OUT-1:0][NUM_PORTS_IN_WIDTH-1:0] rd_rsp_sel_in;
wire [NUM_PORTS_OUT-1:0] rd_rsp_ready_in;
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_rd_rsp_data_in
assign rd_rsp_valid_in[i] = m_axi_rvalid[i];
assign rd_rsp_data_in[i] = {m_axi_rdata[i], m_axi_rid[i][NUM_PORTS_IN_BITS +: RD_TAG_WIDTH]};
assign rd_rsp_data_in[i] = {m_axi_rdata[i], m_axi_rid[i][NUM_PORTS_IN_BITS +: READ_TAG_WIDTH]};
if (NUM_PORTS_IN > 1) begin : g_input_sel
assign rd_rsp_sel_in[i] = m_axi_rid[i][0 +: NUM_PORTS_IN_BITS];
end else begin : g_no_input_sel
@ -377,13 +377,13 @@ module VX_axi_adapter #(
end
wire [NUM_PORTS_IN-1:0] rd_rsp_valid_out;
wire [NUM_PORTS_IN-1:0][DATA_WIDTH+RD_TAG_WIDTH-1:0] rd_rsp_data_out;
wire [NUM_PORTS_IN-1:0][DATA_WIDTH+READ_TAG_WIDTH-1:0] rd_rsp_data_out;
wire [NUM_PORTS_IN-1:0] rd_rsp_ready_out;
VX_stream_xbar #(
.NUM_INPUTS (NUM_PORTS_OUT),
.NUM_OUTPUTS(NUM_PORTS_IN),
.DATAW (DATA_WIDTH + RD_TAG_WIDTH),
.DATAW (DATA_WIDTH + READ_TAG_WIDTH),
.ARBITER (ARBITER),
.OUT_BUF (RSP_OUT_BUF)
) rd_rsp_xbar (

View file

@ -301,7 +301,7 @@ private:
if (byte_addr >= uint64_t(IO_COUT_ADDR)
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
// process console output
for (int i = 0; i < IO_COUT_SIZE; i++) {
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];