rtlsim multibanks
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions

This commit is contained in:
tinebp 2024-12-16 22:10:57 -08:00
parent bae24e589c
commit a98d2e24e5
25 changed files with 880 additions and 393 deletions

View file

@ -144,7 +144,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(
// Adjust memory data width to match AXI interface
for (genvar i = 0; i < `VX_MEM_PORTS; i++) begin : g_mem_adapter
VX_mem_adapter #(
VX_mem_data_adapter #(
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.DST_DATA_WIDTH (AXI_DATA_WIDTH),
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
@ -153,7 +153,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(
.DST_TAG_WIDTH (VX_MEM_TAG_A_WIDTH),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) mem_adapter (
) mem_data_adapter (
.clk (clk),
.reset (reset),
@ -192,7 +192,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(
.TAG_WIDTH_IN (VX_MEM_TAG_A_WIDTH),
.TAG_WIDTH_OUT (AXI_TID_WIDTH),
.NUM_PORTS_IN (`VX_MEM_PORTS),
.NUM_PORTS_OUT (AXI_NUM_BANKS),
.NUM_BANKS_OUT (AXI_NUM_BANKS),
.INTERLEAVE (0),
.REQ_OUT_BUF ((`VX_MEM_PORTS > 1) ? 2 : 0),
.RSP_OUT_BUF ((`VX_MEM_PORTS > 1 || AXI_NUM_BANKS > 1) ? 2 : 0)

View file

@ -517,7 +517,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
wire [`VX_MEM_PORTS-1:0] vx_mem_req_ready_qual;
for (genvar i = 0; i < `VX_MEM_PORTS; ++i) begin : g_vx_mem_adapter
VX_mem_adapter #(
VX_mem_data_adapter #(
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
@ -526,7 +526,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.DST_TAG_WIDTH (CCI_VX_TAG_WIDTH),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (2)
) vx_mem_adapter (
) vx_mem_data_adapter (
.clk (clk),
.reset (reset),
@ -567,7 +567,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.TAG_WIDTH (CCI_VX_TAG_WIDTH)
) cci_vx_mem_arb_in_if[2]();
VX_mem_adapter #(
VX_mem_data_adapter #(
.SRC_DATA_WIDTH (CCI_DATA_WIDTH),
.DST_DATA_WIDTH (LMEM_DATA_WIDTH),
.SRC_ADDR_WIDTH (CCI_ADDR_WIDTH),
@ -576,7 +576,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.DST_TAG_WIDTH (CCI_VX_TAG_WIDTH),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) cci_mem_adapter (
) cci_mem_data_adapter (
.clk (clk),
.reset (reset),
@ -632,6 +632,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.bus_in_if (cci_vx_mem_arb_in_if),
.bus_out_if (cci_vx_mem_arb_out_if)
);
`UNUSED_VAR (cci_vx_mem_arb_out_if[0].req_data.flags)
// final merged memory interface
wire mem_req_valid [`VX_MEM_PORTS];
@ -647,23 +648,23 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
wire [AVS_TAG_WIDTH-1:0] mem_rsp_tag [`VX_MEM_PORTS];
wire mem_rsp_ready [`VX_MEM_PORTS];
for (genvar i = 0; i < `VX_MEM_PORTS; ++i) begin : g_mem_bus_if
if (i == 0) begin : g_i0
// assign port0 to CCI/VX arbiter
assign mem_req_valid[0] = cci_vx_mem_arb_out_if[0].req_valid;
assign mem_req_rw[0] = cci_vx_mem_arb_out_if[0].req_data.rw;
assign mem_req_addr[0] = cci_vx_mem_arb_out_if[0].req_data.addr;
assign mem_req_byteen[0]= cci_vx_mem_arb_out_if[0].req_data.byteen;
assign mem_req_data[0] = cci_vx_mem_arb_out_if[0].req_data.data;
assign mem_req_tag[0] = cci_vx_mem_arb_out_if[0].req_data.tag;
assign cci_vx_mem_arb_out_if[0].req_ready = mem_req_ready[0];
assign cci_vx_mem_arb_out_if[0].rsp_valid = mem_rsp_valid[0];
assign cci_vx_mem_arb_out_if[0].rsp_data.data = mem_rsp_data[0];
assign cci_vx_mem_arb_out_if[0].rsp_data.tag = mem_rsp_tag[0];
assign mem_rsp_ready[0] = cci_vx_mem_arb_out_if[0].rsp_ready;
`UNUSED_VAR (cci_vx_mem_arb_out_if[0].req_data.flags)
assign mem_req_valid[i] = cci_vx_mem_arb_out_if[i].req_valid;
assign mem_req_rw[i] = cci_vx_mem_arb_out_if[i].req_data.rw;
assign mem_req_addr[i] = cci_vx_mem_arb_out_if[i].req_data.addr;
assign mem_req_byteen[i]= cci_vx_mem_arb_out_if[i].req_data.byteen;
assign mem_req_data[i] = cci_vx_mem_arb_out_if[i].req_data.data;
assign mem_req_tag[i] = cci_vx_mem_arb_out_if[i].req_data.tag;
assign cci_vx_mem_arb_out_if[i].req_ready = mem_req_ready[i];
assign cci_vx_mem_arb_out_if[i].rsp_valid = mem_rsp_valid[i];
assign cci_vx_mem_arb_out_if[i].rsp_data.data = mem_rsp_data[i];
assign cci_vx_mem_arb_out_if[i].rsp_data.tag = mem_rsp_tag[i];
assign mem_rsp_ready[i] = cci_vx_mem_arb_out_if[i].rsp_ready;
end else begin : g_i
// assign other ports to VX memory bus
for (genvar i = 1; i < `VX_MEM_PORTS; ++i) begin : g_mem_bus_if
assign mem_req_valid[i] = vx_mem_bus_if[i].req_valid;
assign mem_req_rw[i] = vx_mem_bus_if[i].req_data.rw;
assign mem_req_addr[i] = vx_mem_bus_if[i].req_data.addr;
@ -677,6 +678,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
assign vx_mem_bus_if[i].rsp_data.tag = CCI_VX_TAG_WIDTH'(mem_rsp_tag[i]);
assign mem_rsp_ready[i] = vx_mem_bus_if[i].rsp_ready;
end
end
// convert merged memory interface to AVS
VX_avs_adapter #(
@ -685,7 +687,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.ADDR_WIDTH_OUT(LMEM_ADDR_WIDTH),
.BURST_WIDTH (LMEM_BURST_CTRW),
.NUM_PORTS_IN (`VX_MEM_PORTS),
.NUM_PORTS_OUT (NUM_LOCAL_MEM_BANKS),
.NUM_BANKS_OUT (NUM_LOCAL_MEM_BANKS),
.TAG_WIDTH (AVS_TAG_WIDTH),
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE),
.INTERLEAVE (`PLATFORM_MEMORY_INTERLEAVE),

View file

@ -55,32 +55,27 @@ module VX_cache_data #(
`UNUSED_PARAM (WORD_SIZE)
`UNUSED_VAR (stall)
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_write_mask
wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == i);
assign write_mask[i] = write_byteen & {WORD_SIZE{word_en}};
end
if (DIRTY_BYTES != 0) begin : g_dirty_bytes
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_rdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_wdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_wren;
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata
wire evict = fill || flush;
wire evict_way_en = (NUM_WAYS == 1) || (evict_way == i);
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask;
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask
wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j);
assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}};
end
assign byteen_wdata[i] = {LINE_SIZE{write}}; // only asserted on writes
assign byteen_wren[i] = {LINE_SIZE{init}}
| {LINE_SIZE{evict && evict_way_en}}
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
end
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_store
wire [LINE_SIZE-1:0] byteen_wdata = {LINE_SIZE{write}}; // only asserted on writes
wire [LINE_SIZE-1:0] byteen_wren = {LINE_SIZE{init || fill || flush}} | write_mask;
wire byteen_write = ((fill || flush) && ((NUM_WAYS == 1) || (evict_way == i)))
|| (write && tag_matches[i])
|| init;
wire byteen_read = fill || flush;
wire byteen_write = init || write || fill || flush;
VX_sp_ram #(
.DATAW (LINE_SIZE * NUM_WAYS),
.WRENW (LINE_SIZE * NUM_WAYS),
.DATAW (LINE_SIZE),
.WRENW (LINE_SIZE),
.SIZE (`CS_LINES_PER_BANK),
.OUT_REG (1),
.RDW_MODE ("R")
@ -92,10 +87,12 @@ module VX_cache_data #(
.wren (byteen_wren),
.addr (line_idx),
.wdata (byteen_wdata),
.rdata (byteen_rdata)
.rdata (byteen_rdata[i])
);
end
assign evict_byteen = byteen_rdata[way_idx_r];
end else begin : g_no_dirty_bytes
`UNUSED_VAR (init)
`UNUSED_VAR (flush)
@ -104,32 +101,32 @@ module VX_cache_data #(
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
if (WRITE_ENABLE) begin : g_data_store
// create a single write-enable block ram to reduce area overhead
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren;
wire line_write;
wire line_read;
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_wdata
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask;
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask
wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j);
assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}};
end
assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}};
assign line_wren[i] = {LINE_SIZE{fill && fill_way_en}}
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
localparam WRENW = WRITE_ENABLE ? LINE_SIZE : 1;
wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
wire [WRENW-1:0] line_wren;
if (WRITE_ENABLE) begin : g_wren
assign line_wdata = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}};
assign line_wren = {LINE_SIZE{fill}} | write_mask;
end else begin : g_no_wren
`UNUSED_VAR (write_word)
`UNUSED_VAR (write_mask)
assign line_wdata = fill_data;
assign line_wren = 1'b1;
end
assign line_read = read || ((fill || flush) && WRITEBACK);
assign line_write = fill || (write && WRITE_ENABLE);
wire line_write = (fill && ((NUM_WAYS == 1) || (evict_way == i)))
|| (write && tag_matches[i] && WRITE_ENABLE);
wire line_read = read || ((fill || flush) && WRITEBACK);
VX_sp_ram #(
.DATAW (NUM_WAYS * `CS_LINE_WIDTH),
.DATAW (`CS_LINE_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (NUM_WAYS * LINE_SIZE),
.WRENW (WRENW),
.OUT_REG (1),
.RDW_MODE ("R")
) data_store (
@ -140,35 +137,9 @@ module VX_cache_data #(
.wren (line_wren),
.addr (line_idx),
.wdata (line_wdata),
.rdata (line_rdata)
);
end else begin : g_data_store
`UNUSED_VAR (write)
`UNUSED_VAR (write_byteen)
`UNUSED_VAR (write_word)
`UNUSED_VAR (word_idx)
`UNUSED_VAR (tag_matches)
// we don't merge the ways into a single block ram due to WREN overhead
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.OUT_REG (1),
.RDW_MODE ("R")
) data_store (
.clk (clk),
.reset (reset),
.read (read),
.write (fill && fill_way_en),
.wren (1'b1),
.addr (line_idx),
.wdata (fill_data),
.rdata (line_rdata[i])
);
end
end
assign read_data = line_rdata[way_idx_r];

View file

@ -221,7 +221,8 @@ module VX_cache_mshr #(
VX_dp_ram #(
.DATAW (DATA_WIDTH),
.SIZE (MSHR_SIZE),
.RDW_MODE ("R")
.RDW_MODE ("R"),
.RADDR_REG (1)
) mshr_store (
.clk (clk),
.reset (reset),

View file

@ -118,7 +118,8 @@ module VX_cache_repl #(
.DATAW (LRU_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (LRU_WIDTH),
.RDW_MODE ("R")
.RDW_MODE ("R"),
.RADDR_REG (1)
) plru_store (
.clk (clk),
.reset (reset),
@ -158,7 +159,8 @@ module VX_cache_repl #(
VX_sp_ram #(
.DATAW (WAY_SEL_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.RDW_MODE ("R")
.RDW_MODE ("R"),
.RADDR_REG (1)
) ctr_store (
.clk (clk),
.reset (reset),

View file

@ -88,7 +88,8 @@ module VX_cache_tags #(
VX_sp_ram #(
.DATAW (TAG_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.RDW_MODE ("W")
.RDW_MODE ("W"),
.RADDR_REG (1)
) tag_store (
.clk (clk),
.reset (reset),

View file

@ -121,6 +121,7 @@ module VX_async_ram_patch #(
parameter WRENW = 1,
parameter DUAL_PORT = 0,
parameter FORCE_BRAM = 0,
parameter RADDR_REG = 0, // read address registered hint
parameter WRITE_FIRST = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
@ -154,7 +155,7 @@ module VX_async_ram_patch #(
.out ({raddr_s, read_s, is_raddr_reg})
);
wire [DATAW-1:0] rdata_s, rdata_a;
wire [DATAW-1:0] rdata_s;
if (1) begin : g_sync_ram
if (WRENW != 1) begin : g_wren
@ -204,8 +205,12 @@ module VX_async_ram_patch #(
end
end
if (1) begin : g_async_ram
if (DUAL_PORT != 0) begin : g_dp
if (RADDR_REG) begin : g_raddr_reg
`UNUSED_VAR (is_raddr_reg)
assign rdata = rdata_s;
end else begin : g_async_ram
wire [DATAW-1:0] rdata_a;
if (DUAL_PORT) begin : g_dp
if (WRENW != 1) begin : g_wren
if (WRITE_FIRST) begin : g_write_first
`define RAM_ATTRIBUTES `RW_RAM_CHECK
@ -250,9 +255,8 @@ module VX_async_ram_patch #(
end
end
end
end
assign rdata = is_raddr_reg ? rdata_s : rdata_a;
end
endmodule
`TRACING_ON

View file

@ -20,7 +20,7 @@ module VX_avs_adapter #(
parameter ADDR_WIDTH_OUT= 32,
parameter BURST_WIDTH = 1,
parameter NUM_PORTS_IN = 1,
parameter NUM_PORTS_OUT = 1,
parameter NUM_BANKS_OUT = 1,
parameter TAG_WIDTH = 1,
parameter RD_QUEUE_SIZE = 1,
parameter INTERLEAVE = 0,
@ -47,59 +47,59 @@ module VX_avs_adapter #(
input wire mem_rsp_ready [NUM_PORTS_IN],
// AVS bus
output wire [DATA_WIDTH-1:0] avs_writedata [NUM_PORTS_OUT],
input wire [DATA_WIDTH-1:0] avs_readdata [NUM_PORTS_OUT],
output wire [ADDR_WIDTH_OUT-1:0] avs_address [NUM_PORTS_OUT],
input wire avs_waitrequest [NUM_PORTS_OUT],
output wire avs_write [NUM_PORTS_OUT],
output wire avs_read [NUM_PORTS_OUT],
output wire [DATA_WIDTH/8-1:0] avs_byteenable [NUM_PORTS_OUT],
output wire [BURST_WIDTH-1:0] avs_burstcount [NUM_PORTS_OUT],
input wire avs_readdatavalid [NUM_PORTS_OUT]
output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS_OUT],
input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS_OUT],
output wire [ADDR_WIDTH_OUT-1:0] avs_address [NUM_BANKS_OUT],
input wire avs_waitrequest [NUM_BANKS_OUT],
output wire avs_write [NUM_BANKS_OUT],
output wire avs_read [NUM_BANKS_OUT],
output wire [DATA_WIDTH/8-1:0] avs_byteenable [NUM_BANKS_OUT],
output wire [BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS_OUT],
input wire avs_readdatavalid [NUM_BANKS_OUT]
);
localparam DATA_SIZE = DATA_WIDTH/8;
localparam PORT_SEL_BITS = `CLOG2(NUM_PORTS_OUT);
localparam PORT_SEL_WIDTH = `UP(PORT_SEL_BITS);
localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + PORT_SEL_BITS; // to input space
localparam PORT_OFFSETW = DST_ADDR_WDITH - PORT_SEL_BITS;
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS_OUT);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS; // convert output addresss to input space
localparam BANK_ADDR_WIDTH = DST_ADDR_WDITH - BANK_SEL_BITS;
localparam NUM_PORTS_IN_BITS = `CLOG2(NUM_PORTS_IN);
localparam NUM_PORTS_IN_WIDTH = `UP(NUM_PORTS_IN_BITS);
localparam REQ_QUEUE_DATAW = TAG_WIDTH + NUM_PORTS_IN_BITS;
localparam ARB_DATAW = 1 + PORT_OFFSETW + DATA_WIDTH + DATA_SIZE + TAG_WIDTH;
localparam RSP_DATAW = DATA_WIDTH + TAG_WIDTH;
localparam ARB_DATAW = 1 + BANK_ADDR_WIDTH + DATA_WIDTH + DATA_SIZE + TAG_WIDTH;
localparam RSP_XBAR_DATAW = DATA_WIDTH + TAG_WIDTH;
`STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN))
// Ports selection
// Banks selection
wire [NUM_PORTS_IN-1:0][PORT_SEL_WIDTH-1:0] req_port_out_sel;
wire [NUM_PORTS_IN-1:0][PORT_OFFSETW-1:0] req_port_out_off;
wire [NUM_PORTS_IN-1:0][BANK_SEL_WIDTH-1:0] req_bank_sel;
wire [NUM_PORTS_IN-1:0][BANK_ADDR_WIDTH-1:0] req_bank_addr;
if (NUM_PORTS_OUT > 1) begin : g_port_sel
if (NUM_BANKS_OUT > 1) begin : g_port_sel
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_i
wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr[i]);
wire [DST_ADDR_WDITH-1:0] mem_req_addr_dst = DST_ADDR_WDITH'(mem_req_addr[i]);
if (INTERLEAVE) begin : g_interleave
assign req_port_out_sel[i] = mem_req_addr_out[PORT_SEL_BITS-1:0];
assign req_port_out_off[i] = mem_req_addr_out[PORT_SEL_BITS +: PORT_OFFSETW];
assign req_bank_sel[i] = mem_req_addr_dst[BANK_SEL_BITS-1:0];
assign req_bank_addr[i] = mem_req_addr_dst[BANK_SEL_BITS +: BANK_ADDR_WIDTH];
end else begin : g_no_interleave
assign req_port_out_sel[i] = mem_req_addr_out[PORT_OFFSETW +: PORT_SEL_BITS];
assign req_port_out_off[i] = mem_req_addr_out[PORT_OFFSETW-1:0];
assign req_bank_sel[i] = mem_req_addr_dst[BANK_ADDR_WIDTH +: BANK_SEL_BITS];
assign req_bank_addr[i] = mem_req_addr_dst[BANK_ADDR_WIDTH-1:0];
end
end
end else begin : g_no_port_sel
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_i
assign req_port_out_sel[i] = '0;
assign req_port_out_off[i] = DST_ADDR_WDITH'(mem_req_addr[i]);
assign req_bank_sel[i] = '0;
assign req_bank_addr[i] = DST_ADDR_WDITH'(mem_req_addr[i]);
end
end
// Request ack
wire [NUM_PORTS_OUT-1:0][NUM_PORTS_IN-1:0] arb_ready_in;
wire [NUM_PORTS_IN-1:0][NUM_PORTS_OUT-1:0] arb_ready_in_w;
wire [NUM_BANKS_OUT-1:0][NUM_PORTS_IN-1:0] arb_ready_in;
wire [NUM_PORTS_IN-1:0][NUM_BANKS_OUT-1:0] arb_ready_in_w;
VX_transpose #(
.N (NUM_PORTS_OUT),
.N (NUM_BANKS_OUT),
.M (NUM_PORTS_IN)
) rdy_in_transpose (
.data_in (arb_ready_in),
@ -112,12 +112,12 @@ module VX_avs_adapter #(
// Request handling ///////////////////////////////////////////////////////
wire [NUM_PORTS_OUT-1:0][REQ_QUEUE_DATAW-1:0] rd_req_queue_data_out;
wire [NUM_PORTS_OUT-1:0] rd_req_queue_pop;
wire [NUM_BANKS_OUT-1:0][REQ_QUEUE_DATAW-1:0] rd_req_queue_data_out;
wire [NUM_BANKS_OUT-1:0] rd_req_queue_pop;
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_requests
for (genvar i = 0; i < NUM_BANKS_OUT; ++i) begin : g_requests
wire [PORT_OFFSETW-1:0] arb_addr_out;
wire [BANK_ADDR_WIDTH-1:0] arb_addr_out;
wire [TAG_WIDTH-1:0] arb_tag_out;
wire [NUM_PORTS_IN_WIDTH-1:0] arb_sel_out;
wire [DATA_WIDTH-1:0] arb_data_out;
@ -129,11 +129,11 @@ module VX_avs_adapter #(
wire [NUM_PORTS_IN-1:0] arb_valid_in;
for (genvar j = 0; j < NUM_PORTS_IN; ++j) begin : g_valid_in
assign arb_valid_in[j] = mem_req_valid[j] && (req_port_out_sel[j] == i);
assign arb_valid_in[j] = mem_req_valid[j] && (req_bank_sel[j] == i);
end
for (genvar j = 0; j < NUM_PORTS_IN; ++j) begin : g_data_in
assign arb_data_in[j] = {mem_req_rw[j], req_port_out_off[j], mem_req_byteen[j], mem_req_data[j], mem_req_tag[j]};
assign arb_data_in[j] = {mem_req_rw[j], req_bank_addr[j], mem_req_byteen[j], mem_req_data[j], mem_req_tag[j]};
end
VX_stream_arb #(
@ -200,7 +200,7 @@ module VX_avs_adapter #(
wire buf_valid_out;
wire buf_rw_out;
wire [DATA_SIZE-1:0] buf_byteen_out;
wire [PORT_OFFSETW-1:0] buf_addr_out;
wire [BANK_ADDR_WIDTH-1:0] buf_addr_out;
wire [DATA_WIDTH-1:0] buf_data_out;
wire buf_ready_out;
@ -211,7 +211,7 @@ module VX_avs_adapter #(
assign arb_ready_out = arb_ready_out_w && rd_req_queue_ready;
VX_elastic_buffer #(
.DATAW (1 + DATA_SIZE + PORT_OFFSETW + DATA_WIDTH),
.DATAW (1 + DATA_SIZE + BANK_ADDR_WIDTH + DATA_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF))
) req_buf (
@ -236,71 +236,71 @@ module VX_avs_adapter #(
// Responses handling /////////////////////////////////////////////////////
wire [NUM_PORTS_OUT-1:0] rd_rsp_valid_in;
wire [NUM_PORTS_OUT-1:0][RSP_DATAW-1:0] rd_rsp_data_in;
wire [NUM_PORTS_OUT-1:0][NUM_PORTS_IN_WIDTH-1:0] rd_rsp_sel_in;
wire [NUM_PORTS_OUT-1:0] rd_rsp_ready_in;
wire [NUM_BANKS_OUT-1:0] rsp_xbar_valid_in;
wire [NUM_BANKS_OUT-1:0][RSP_XBAR_DATAW-1:0] rsp_xbar_data_in;
wire [NUM_BANKS_OUT-1:0][NUM_PORTS_IN_WIDTH-1:0] rsp_xbar_sel_in;
wire [NUM_BANKS_OUT-1:0] rsp_xbar_ready_in;
wire [NUM_PORTS_IN-1:0] rd_rsp_valid_out;
wire [NUM_PORTS_IN-1:0][RSP_DATAW-1:0] rd_rsp_data_out;
wire [NUM_PORTS_IN-1:0] rd_rsp_ready_out;
wire [NUM_PORTS_IN-1:0] rsp_xbar_valid_out;
wire [NUM_PORTS_IN-1:0][RSP_XBAR_DATAW-1:0] rsp_xbar_data_out;
wire [NUM_PORTS_IN-1:0] rsp_xbar_ready_out;
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_rd_rsp_queues
for (genvar i = 0; i < NUM_BANKS_OUT; ++i) begin : g_rsp_queues
wire [DATA_WIDTH-1:0] rd_rsp_queue_data_out;
wire rd_rsp_queue_empty;
wire [DATA_WIDTH-1:0] rsp_queue_data_out;
wire rsp_queue_empty;
VX_fifo_queue #(
.DATAW (DATA_WIDTH),
.DEPTH (RD_QUEUE_SIZE)
) rd_rsp_queue (
) rsp_queue (
.clk (clk),
.reset (reset),
.push (avs_readdatavalid[i]),
.pop (rd_req_queue_pop[i]),
.data_in (avs_readdata[i]),
.data_out (rd_rsp_queue_data_out),
.empty (rd_rsp_queue_empty),
.data_out (rsp_queue_data_out),
.empty (rsp_queue_empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
assign rd_rsp_valid_in[i] = ~rd_rsp_queue_empty;
assign rd_rsp_data_in[i] = {rd_rsp_queue_data_out, rd_req_queue_data_out[i][NUM_PORTS_IN_BITS +: TAG_WIDTH]};
assign rsp_xbar_valid_in[i] = ~rsp_queue_empty;
assign rsp_xbar_data_in[i] = {rsp_queue_data_out, rd_req_queue_data_out[i][NUM_PORTS_IN_BITS +: TAG_WIDTH]};
if (NUM_PORTS_IN > 1) begin : g_input_sel
assign rd_rsp_sel_in[i] = rd_req_queue_data_out[i][0 +: NUM_PORTS_IN_BITS];
assign rsp_xbar_sel_in[i] = rd_req_queue_data_out[i][0 +: NUM_PORTS_IN_BITS];
end else begin : g_no_input_sel
assign rd_rsp_sel_in[i] = 0;
assign rsp_xbar_sel_in[i] = 0;
end
assign rd_req_queue_pop[i] = rd_rsp_valid_in[i] && rd_rsp_ready_in[i];
assign rd_req_queue_pop[i] = rsp_xbar_valid_in[i] && rsp_xbar_ready_in[i];
end
VX_stream_xbar #(
.NUM_INPUTS (NUM_PORTS_OUT),
.NUM_INPUTS (NUM_BANKS_OUT),
.NUM_OUTPUTS(NUM_PORTS_IN),
.DATAW (RSP_DATAW),
.DATAW (RSP_XBAR_DATAW),
.ARBITER (ARBITER),
.OUT_BUF (RSP_OUT_BUF)
) rd_rsp_xbar (
) rsp_xbar (
.clk (clk),
.reset (reset),
.valid_in (rd_rsp_valid_in),
.data_in (rd_rsp_data_in),
.ready_in (rd_rsp_ready_in),
.sel_in (rd_rsp_sel_in),
.data_out (rd_rsp_data_out),
.valid_out (rd_rsp_valid_out),
.ready_out (rd_rsp_ready_out),
.valid_in (rsp_xbar_valid_in),
.data_in (rsp_xbar_data_in),
.ready_in (rsp_xbar_ready_in),
.sel_in (rsp_xbar_sel_in),
.data_out (rsp_xbar_data_out),
.valid_out (rsp_xbar_valid_out),
.ready_out (rsp_xbar_ready_out),
`UNUSED_PIN (collisions),
`UNUSED_PIN (sel_out)
);
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_rd_rsp_data_out
assign mem_rsp_valid[i] = rd_rsp_valid_out[i];
assign {mem_rsp_data[i], mem_rsp_tag[i]} = rd_rsp_data_out[i];
assign rd_rsp_ready_out[i] = mem_rsp_ready[i];
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_rsp_xbar_data_out
assign mem_rsp_valid[i] = rsp_xbar_valid_out[i];
assign {mem_rsp_data[i], mem_rsp_tag[i]} = rsp_xbar_data_out[i];
assign rsp_xbar_ready_out[i] = mem_rsp_ready[i];
end
endmodule

View file

@ -21,7 +21,7 @@ module VX_axi_adapter #(
parameter TAG_WIDTH_IN = 8,
parameter TAG_WIDTH_OUT = 8,
parameter NUM_PORTS_IN = 1,
parameter NUM_PORTS_OUT = 1,
parameter NUM_BANKS_OUT = 1,
parameter INTERLEAVE = 0,
parameter TAG_BUFFER_SIZE= 32,
parameter ARBITER = "R",
@ -48,92 +48,93 @@ module VX_axi_adapter #(
input wire mem_rsp_ready [NUM_PORTS_IN],
// AXI write request address channel
output wire m_axi_awvalid [NUM_PORTS_OUT],
input wire m_axi_awready [NUM_PORTS_OUT],
output wire [ADDR_WIDTH_OUT-1:0] m_axi_awaddr [NUM_PORTS_OUT],
output wire [TAG_WIDTH_OUT-1:0] m_axi_awid [NUM_PORTS_OUT],
output wire [7:0] m_axi_awlen [NUM_PORTS_OUT],
output wire [2:0] m_axi_awsize [NUM_PORTS_OUT],
output wire [1:0] m_axi_awburst [NUM_PORTS_OUT],
output wire [1:0] m_axi_awlock [NUM_PORTS_OUT],
output wire [3:0] m_axi_awcache [NUM_PORTS_OUT],
output wire [2:0] m_axi_awprot [NUM_PORTS_OUT],
output wire [3:0] m_axi_awqos [NUM_PORTS_OUT],
output wire [3:0] m_axi_awregion [NUM_PORTS_OUT],
output wire m_axi_awvalid [NUM_BANKS_OUT],
input wire m_axi_awready [NUM_BANKS_OUT],
output wire [ADDR_WIDTH_OUT-1:0] m_axi_awaddr [NUM_BANKS_OUT],
output wire [TAG_WIDTH_OUT-1:0] m_axi_awid [NUM_BANKS_OUT],
output wire [7:0] m_axi_awlen [NUM_BANKS_OUT],
output wire [2:0] m_axi_awsize [NUM_BANKS_OUT],
output wire [1:0] m_axi_awburst [NUM_BANKS_OUT],
output wire [1:0] m_axi_awlock [NUM_BANKS_OUT],
output wire [3:0] m_axi_awcache [NUM_BANKS_OUT],
output wire [2:0] m_axi_awprot [NUM_BANKS_OUT],
output wire [3:0] m_axi_awqos [NUM_BANKS_OUT],
output wire [3:0] m_axi_awregion [NUM_BANKS_OUT],
// AXI write request data channel
output wire m_axi_wvalid [NUM_PORTS_OUT],
input wire m_axi_wready [NUM_PORTS_OUT],
output wire [DATA_WIDTH-1:0] m_axi_wdata [NUM_PORTS_OUT],
output wire [DATA_SIZE-1:0] m_axi_wstrb [NUM_PORTS_OUT],
output wire m_axi_wlast [NUM_PORTS_OUT],
output wire m_axi_wvalid [NUM_BANKS_OUT],
input wire m_axi_wready [NUM_BANKS_OUT],
output wire [DATA_WIDTH-1:0] m_axi_wdata [NUM_BANKS_OUT],
output wire [DATA_SIZE-1:0] m_axi_wstrb [NUM_BANKS_OUT],
output wire m_axi_wlast [NUM_BANKS_OUT],
// AXI write response channel
input wire m_axi_bvalid [NUM_PORTS_OUT],
output wire m_axi_bready [NUM_PORTS_OUT],
input wire [TAG_WIDTH_OUT-1:0] m_axi_bid [NUM_PORTS_OUT],
input wire [1:0] m_axi_bresp [NUM_PORTS_OUT],
input wire m_axi_bvalid [NUM_BANKS_OUT],
output wire m_axi_bready [NUM_BANKS_OUT],
input wire [TAG_WIDTH_OUT-1:0] m_axi_bid [NUM_BANKS_OUT],
input wire [1:0] m_axi_bresp [NUM_BANKS_OUT],
// AXI read address channel
output wire m_axi_arvalid [NUM_PORTS_OUT],
input wire m_axi_arready [NUM_PORTS_OUT],
output wire [ADDR_WIDTH_OUT-1:0] m_axi_araddr [NUM_PORTS_OUT],
output wire [TAG_WIDTH_OUT-1:0] m_axi_arid [NUM_PORTS_OUT],
output wire [7:0] m_axi_arlen [NUM_PORTS_OUT],
output wire [2:0] m_axi_arsize [NUM_PORTS_OUT],
output wire [1:0] m_axi_arburst [NUM_PORTS_OUT],
output wire [1:0] m_axi_arlock [NUM_PORTS_OUT],
output wire [3:0] m_axi_arcache [NUM_PORTS_OUT],
output wire [2:0] m_axi_arprot [NUM_PORTS_OUT],
output wire [3:0] m_axi_arqos [NUM_PORTS_OUT],
output wire [3:0] m_axi_arregion [NUM_PORTS_OUT],
output wire m_axi_arvalid [NUM_BANKS_OUT],
input wire m_axi_arready [NUM_BANKS_OUT],
output wire [ADDR_WIDTH_OUT-1:0] m_axi_araddr [NUM_BANKS_OUT],
output wire [TAG_WIDTH_OUT-1:0] m_axi_arid [NUM_BANKS_OUT],
output wire [7:0] m_axi_arlen [NUM_BANKS_OUT],
output wire [2:0] m_axi_arsize [NUM_BANKS_OUT],
output wire [1:0] m_axi_arburst [NUM_BANKS_OUT],
output wire [1:0] m_axi_arlock [NUM_BANKS_OUT],
output wire [3:0] m_axi_arcache [NUM_BANKS_OUT],
output wire [2:0] m_axi_arprot [NUM_BANKS_OUT],
output wire [3:0] m_axi_arqos [NUM_BANKS_OUT],
output wire [3:0] m_axi_arregion [NUM_BANKS_OUT],
// AXI read response channel
input wire m_axi_rvalid [NUM_PORTS_OUT],
output wire m_axi_rready [NUM_PORTS_OUT],
input wire [DATA_WIDTH-1:0] m_axi_rdata [NUM_PORTS_OUT],
input wire m_axi_rlast [NUM_PORTS_OUT],
input wire [TAG_WIDTH_OUT-1:0] m_axi_rid [NUM_PORTS_OUT],
input wire [1:0] m_axi_rresp [NUM_PORTS_OUT]
input wire m_axi_rvalid [NUM_BANKS_OUT],
output wire m_axi_rready [NUM_BANKS_OUT],
input wire [DATA_WIDTH-1:0] m_axi_rdata [NUM_BANKS_OUT],
input wire m_axi_rlast [NUM_BANKS_OUT],
input wire [TAG_WIDTH_OUT-1:0] m_axi_rid [NUM_BANKS_OUT],
input wire [1:0] m_axi_rresp [NUM_BANKS_OUT]
);
localparam LOG2_DATA_SIZE = `CLOG2(DATA_SIZE);
localparam PORT_SEL_BITS = `CLOG2(NUM_PORTS_OUT);
localparam PORT_SEL_WIDTH = `UP(PORT_SEL_BITS);
localparam DST_ADDR_WDITH = (ADDR_WIDTH_OUT - LOG2_DATA_SIZE) + PORT_SEL_BITS; // convert output addresss to byte-addressable input space
localparam PORT_OFFSETW = DST_ADDR_WDITH - PORT_SEL_BITS;
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS_OUT);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam DST_ADDR_WDITH = (ADDR_WIDTH_OUT - LOG2_DATA_SIZE) + BANK_SEL_BITS; // convert output addresss to byte-addressable input space
localparam BANK_ADDR_WIDTH = DST_ADDR_WDITH - BANK_SEL_BITS;
localparam NUM_PORTS_IN_BITS = `CLOG2(NUM_PORTS_IN);
localparam NUM_PORTS_IN_WIDTH = `UP(NUM_PORTS_IN_BITS);
localparam TAG_BUFFER_ADDRW = `CLOG2(TAG_BUFFER_SIZE);
localparam NEEDED_TAG_WIDTH = TAG_WIDTH_IN + NUM_PORTS_IN_BITS;
localparam READ_TAG_WIDTH = (NEEDED_TAG_WIDTH > TAG_WIDTH_OUT) ? TAG_BUFFER_ADDRW : TAG_WIDTH_IN;
localparam READ_FULL_TAG_WIDTH = READ_TAG_WIDTH + PORT_SEL_BITS;
localparam READ_FULL_TAG_WIDTH = READ_TAG_WIDTH + NUM_PORTS_IN_BITS;
localparam WRITE_TAG_WIDTH = `MIN(TAG_WIDTH_IN, TAG_WIDTH_OUT);
localparam DST_TAG_WIDTH = `MAX(READ_FULL_TAG_WIDTH, WRITE_TAG_WIDTH);
localparam ARB_TAG_WIDTH = `MAX(READ_TAG_WIDTH, WRITE_TAG_WIDTH);
localparam ARB_DATAW = 1 + PORT_OFFSETW + DATA_SIZE + DATA_WIDTH + ARB_TAG_WIDTH;
localparam ARB_DATAW = 1 + BANK_ADDR_WIDTH + DATA_SIZE + DATA_WIDTH + ARB_TAG_WIDTH;
localparam RSP_XBAR_DATAW = DATA_WIDTH + READ_TAG_WIDTH;
`STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN))
`STATIC_ASSERT ((TAG_WIDTH_OUT >= DST_TAG_WIDTH), ("invalid output tag width: current=%0d, expected=%0d", TAG_WIDTH_OUT, DST_TAG_WIDTH))
// Ports selection
wire [NUM_PORTS_IN-1:0][PORT_SEL_WIDTH-1:0] req_port_out_sel;
wire [NUM_PORTS_IN-1:0][PORT_OFFSETW-1:0] req_port_out_off;
// Banks selection
wire [NUM_PORTS_IN-1:0][BANK_SEL_WIDTH-1:0] req_bank_sel;
wire [NUM_PORTS_IN-1:0][BANK_ADDR_WIDTH-1:0] req_bank_addr;
if (NUM_PORTS_OUT > 1) begin : g_port_sel
if (NUM_BANKS_OUT > 1) begin : g_port_sel
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_i
wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr[i]);
if (INTERLEAVE) begin : g_interleave
assign req_port_out_sel[i] = mem_req_addr_out[PORT_SEL_BITS-1:0];
assign req_port_out_off[i] = mem_req_addr_out[PORT_SEL_BITS +: PORT_OFFSETW];
assign req_bank_sel[i] = mem_req_addr_out[BANK_SEL_BITS-1:0];
assign req_bank_addr[i] = mem_req_addr_out[BANK_SEL_BITS +: BANK_ADDR_WIDTH];
end else begin : g_no_interleave
assign req_port_out_sel[i] = mem_req_addr_out[PORT_OFFSETW +: PORT_SEL_BITS];
assign req_port_out_off[i] = mem_req_addr_out[PORT_OFFSETW-1:0];
assign req_bank_sel[i] = mem_req_addr_out[BANK_ADDR_WIDTH +: BANK_SEL_BITS];
assign req_bank_addr[i] = mem_req_addr_out[BANK_ADDR_WIDTH-1:0];
end
end
end else begin : g_no_port_sel
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_i
assign req_port_out_sel[i] = '0;
assign req_port_out_off[i] = DST_ADDR_WDITH'(mem_req_addr[i]);
assign req_bank_sel[i] = '0;
assign req_bank_addr[i] = DST_ADDR_WDITH'(mem_req_addr[i]);
end
end
@ -172,11 +173,11 @@ module VX_axi_adapter #(
end
// AXi write request synchronization
wire [NUM_PORTS_OUT-1:0] m_axi_awvalid_w, m_axi_wvalid_w;
wire [NUM_PORTS_OUT-1:0] m_axi_awready_w, m_axi_wready_w;
reg [NUM_PORTS_OUT-1:0] m_axi_aw_ack, m_axi_w_ack, axi_write_ready;
wire [NUM_BANKS_OUT-1:0] m_axi_awvalid_w, m_axi_wvalid_w;
wire [NUM_BANKS_OUT-1:0] m_axi_awready_w, m_axi_wready_w;
reg [NUM_BANKS_OUT-1:0] m_axi_aw_ack, m_axi_w_ack, axi_write_ready;
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_axi_write_ready
for (genvar i = 0; i < NUM_BANKS_OUT; ++i) begin : g_axi_write_ready
VX_axi_write_ack axi_write_ack (
.clk (clk),
.reset (reset),
@ -193,11 +194,11 @@ module VX_axi_adapter #(
// Request ack
wire [NUM_PORTS_OUT-1:0][NUM_PORTS_IN-1:0] arb_ready_in;
wire [NUM_PORTS_IN-1:0][NUM_PORTS_OUT-1:0] arb_ready_in_w;
wire [NUM_BANKS_OUT-1:0][NUM_PORTS_IN-1:0] arb_ready_in;
wire [NUM_PORTS_IN-1:0][NUM_BANKS_OUT-1:0] arb_ready_in_w;
VX_transpose #(
.N (NUM_PORTS_OUT),
.N (NUM_BANKS_OUT),
.M (NUM_PORTS_IN)
) rdy_in_transpose (
.data_in (arb_ready_in),
@ -210,13 +211,13 @@ module VX_axi_adapter #(
// AXI request handling
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_axi_write_req
for (genvar i = 0; i < NUM_BANKS_OUT; ++i) begin : g_axi_write_req
wire [PORT_OFFSETW-1:0] arb_addr_out, buf_addr_r_out, buf_addr_w_out;
wire [BANK_ADDR_WIDTH-1:0] arb_addr_out, buf_addr_r_out, buf_addr_w_out;
wire [ARB_TAG_WIDTH-1:0] arb_tag_out;
wire [WRITE_TAG_WIDTH-1:0] buf_tag_w_out;
wire [READ_TAG_WIDTH-1:0] buf_tag_r_out;
wire [NUM_PORTS_IN_WIDTH-1:0] arb_sel_out, buf_sel_out;
wire [READ_FULL_TAG_WIDTH-1:0] arb_tag_r_out, buf_tag_r_out;
wire [NUM_PORTS_IN_WIDTH-1:0] arb_sel_out;
wire [DATA_WIDTH-1:0] arb_data_out;
wire [DATA_SIZE-1:0] arb_byteen_out;
wire arb_valid_out, arb_ready_out;
@ -227,12 +228,12 @@ module VX_axi_adapter #(
for (genvar j = 0; j < NUM_PORTS_IN; ++j) begin : g_valid_in
wire tag_ready = mem_req_rw[j] || mem_rd_req_tag_ready[j];
assign arb_valid_in[j] = mem_req_valid[j] && tag_ready && (req_port_out_sel[j] == i);
assign arb_valid_in[j] = mem_req_valid[j] && tag_ready && (req_bank_sel[j] == i);
end
for (genvar j = 0; j < NUM_PORTS_IN; ++j) begin : g_data_in
wire [ARB_TAG_WIDTH-1:0] tag_value = mem_req_rw[j] ? ARB_TAG_WIDTH'(mem_req_tag[j]) : ARB_TAG_WIDTH'(mem_rd_req_tag[j]);
assign arb_data_in[j] = {mem_req_rw[j], req_port_out_off[j], mem_req_byteen[j], mem_req_data[j], tag_value};
assign arb_data_in[j] = {mem_req_rw[j], req_bank_addr[j], mem_req_byteen[j], mem_req_data[j], tag_value};
end
VX_stream_arb #(
@ -261,7 +262,7 @@ module VX_axi_adapter #(
assign m_axi_awvalid_w[i] = arb_valid_out && arb_rw_out && ~m_axi_aw_ack[i];
VX_elastic_buffer #(
.DATAW (PORT_OFFSETW + WRITE_TAG_WIDTH),
.DATAW (BANK_ADDR_WIDTH + WRITE_TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF)),
.LUTRAM (`TO_OUT_BUF_LUTRAM(REQ_OUT_BUF))
@ -311,8 +312,15 @@ module VX_axi_adapter #(
// AXI read address channel
if (NUM_PORTS_IN > 1) begin : g_input_sel
assign arb_tag_r_out = READ_FULL_TAG_WIDTH'({arb_tag_out, arb_sel_out});
end else begin : g_no_input_sel
`UNUSED_VAR (arb_sel_out)
assign arb_tag_r_out = READ_TAG_WIDTH'(arb_tag_out);
end
VX_elastic_buffer #(
.DATAW (PORT_OFFSETW + READ_TAG_WIDTH + NUM_PORTS_IN_WIDTH),
.DATAW (BANK_ADDR_WIDTH + READ_FULL_TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF)),
.LUTRAM (`TO_OUT_BUF_LUTRAM(REQ_OUT_BUF))
@ -321,21 +329,14 @@ module VX_axi_adapter #(
.reset (reset),
.valid_in (arb_valid_out && ~arb_rw_out),
.ready_in (m_axi_arready_w),
.data_in ({arb_addr_out, READ_TAG_WIDTH'(arb_tag_out), arb_sel_out}),
.data_out ({buf_addr_r_out, buf_tag_r_out, buf_sel_out}),
.data_in ({arb_addr_out, arb_tag_r_out}),
.data_out ({buf_addr_r_out, buf_tag_r_out}),
.valid_out (m_axi_arvalid[i]),
.ready_out (m_axi_arready[i])
);
assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(buf_addr_r_out) << LOG2_DATA_SIZE;
if (NUM_PORTS_IN > 1) begin : g_input_sel
assign m_axi_arid[i] = TAG_WIDTH_OUT'({buf_tag_r_out, buf_sel_out});
end else begin : g_no_input_sel
`UNUSED_VAR (buf_sel_out)
assign m_axi_arid[i] = TAG_WIDTH_OUT'(buf_tag_r_out);
end
assign m_axi_arlen[i] = 8'b00000000;
assign m_axi_arsize[i] = 3'(LOG2_DATA_SIZE);
assign m_axi_arburst[i] = 2'b00;
@ -348,7 +349,7 @@ module VX_axi_adapter #(
// AXI write response channel (ignore)
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_axi_write_rsp
for (genvar i = 0; i < NUM_BANKS_OUT; ++i) begin : g_axi_write_rsp
`UNUSED_VAR (m_axi_bvalid[i])
`UNUSED_VAR (m_axi_bid[i])
`UNUSED_VAR (m_axi_bresp[i])
@ -358,56 +359,52 @@ module VX_axi_adapter #(
// AXI read response channel
wire [NUM_PORTS_OUT-1:0] rd_rsp_valid_in;
wire [NUM_PORTS_OUT-1:0][DATA_WIDTH+READ_TAG_WIDTH-1:0] rd_rsp_data_in;
wire [NUM_PORTS_OUT-1:0][NUM_PORTS_IN_WIDTH-1:0] rd_rsp_sel_in;
wire [NUM_PORTS_OUT-1:0] rd_rsp_ready_in;
wire [NUM_BANKS_OUT-1:0] rsp_xbar_valid_in;
wire [NUM_BANKS_OUT-1:0][RSP_XBAR_DATAW-1:0] rsp_xbar_data_in;
wire [NUM_BANKS_OUT-1:0][NUM_PORTS_IN_WIDTH-1:0] rsp_xbar_sel_in;
wire [NUM_BANKS_OUT-1:0] rsp_xbar_ready_in;
for (genvar i = 0; i < NUM_PORTS_OUT; ++i) begin : g_rd_rsp_data_in
assign rd_rsp_valid_in[i] = m_axi_rvalid[i];
assign rd_rsp_data_in[i] = {m_axi_rdata[i], m_axi_rid[i][NUM_PORTS_IN_BITS +: READ_TAG_WIDTH]};
for (genvar i = 0; i < NUM_BANKS_OUT; ++i) begin : g_rsp_xbar_data_in
assign rsp_xbar_valid_in[i] = m_axi_rvalid[i];
assign rsp_xbar_data_in[i] = {m_axi_rdata[i], m_axi_rid[i][NUM_PORTS_IN_BITS +: READ_TAG_WIDTH]};
if (NUM_PORTS_IN > 1) begin : g_input_sel
assign rd_rsp_sel_in[i] = m_axi_rid[i][0 +: NUM_PORTS_IN_BITS];
assign rsp_xbar_sel_in[i] = m_axi_rid[i][0 +: NUM_PORTS_IN_BITS];
end else begin : g_no_input_sel
assign rd_rsp_sel_in[i] = 0;
assign rsp_xbar_sel_in[i] = 0;
end
assign m_axi_rready[i] = rd_rsp_ready_in[i];
assign m_axi_rready[i] = rsp_xbar_ready_in[i];
`RUNTIME_ASSERT(~(m_axi_rvalid[i] && m_axi_rlast[i] == 0), ("%t: *** AXI response error", $time))
`RUNTIME_ASSERT(~(m_axi_rvalid[i] && m_axi_rresp[i] != 0), ("%t: *** AXI response error", $time))
end
wire [NUM_PORTS_IN-1:0] rd_rsp_valid_out;
wire [NUM_PORTS_IN-1:0][DATA_WIDTH+READ_TAG_WIDTH-1:0] rd_rsp_data_out;
wire [NUM_PORTS_IN-1:0] rd_rsp_ready_out;
wire [NUM_PORTS_IN-1:0] rsp_xbar_valid_out;
wire [NUM_PORTS_IN-1:0][DATA_WIDTH+READ_TAG_WIDTH-1:0] rsp_xbar_data_out;
wire [NUM_PORTS_IN-1:0] rsp_xbar_ready_out;
VX_stream_xbar #(
.NUM_INPUTS (NUM_PORTS_OUT),
.NUM_INPUTS (NUM_BANKS_OUT),
.NUM_OUTPUTS(NUM_PORTS_IN),
.DATAW (DATA_WIDTH + READ_TAG_WIDTH),
.DATAW (RSP_XBAR_DATAW),
.ARBITER (ARBITER),
.OUT_BUF (RSP_OUT_BUF)
) rd_rsp_xbar (
) rsp_xbar (
.clk (clk),
.reset (reset),
.valid_in (rd_rsp_valid_in),
.data_in (rd_rsp_data_in),
.ready_in (rd_rsp_ready_in),
.sel_in (rd_rsp_sel_in),
.data_out (rd_rsp_data_out),
.valid_out (rd_rsp_valid_out),
.ready_out (rd_rsp_ready_out),
.valid_in (rsp_xbar_valid_in),
.data_in (rsp_xbar_data_in),
.ready_in (rsp_xbar_ready_in),
.sel_in (rsp_xbar_sel_in),
.data_out (rsp_xbar_data_out),
.valid_out (rsp_xbar_valid_out),
.ready_out (rsp_xbar_ready_out),
`UNUSED_PIN (collisions),
`UNUSED_PIN (sel_out)
);
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_rd_rsp_data_out
assign mem_rsp_valid[i] = rd_rsp_valid_out[i];
if (NUM_PORTS_IN > 1) begin : g_input_sel
assign {mem_rsp_data[i], mem_rd_rsp_tag[i]} = rd_rsp_data_out[i];
end else begin : g_no_input_sel
assign {mem_rsp_data[i], mem_rd_rsp_tag[i]} = rd_rsp_data_out[i];
end
assign rd_rsp_ready_out[i] = mem_rsp_ready[i];
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_rsp_xbar_data_out
assign mem_rsp_valid[i] = rsp_xbar_valid_out[i];
assign {mem_rsp_data[i], mem_rd_rsp_tag[i]} = rsp_xbar_data_out[i];
assign rsp_xbar_ready_out[i] = mem_rsp_ready[i];
end
endmodule

View file

@ -50,6 +50,7 @@ module VX_dp_ram #(
parameter OUT_REG = 0,
parameter LUTRAM = 0,
parameter `STRING RDW_MODE = "W", // W: write-first, R: read-first, U: undefined
parameter RADDR_REG = 0, // read address registered hint
parameter RDW_ASSERT = 0,
parameter RESET_RAM = 0,
parameter INIT_ENABLE = 0,
@ -69,6 +70,7 @@ module VX_dp_ram #(
);
localparam WSELW = DATAW / WRENW;
`UNUSED_PARAM (LUTRAM)
`UNUSED_PARAM (RADDR_REG)
`STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter"))
`STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "U"), ("invalid parameter"))
@ -134,7 +136,7 @@ module VX_dp_ram #(
end
assign rdata = rdata_r;
end
end else begin : g_undefined
end else if (RDW_MODE == "U") begin : g_undefined
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
@ -220,7 +222,7 @@ module VX_dp_ram #(
end
assign rdata = rdata_r;
end
end else begin : g_undefined
end else if (RDW_MODE == "U") begin : g_undefined
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
@ -260,6 +262,7 @@ module VX_dp_ram #(
.WRENW (WRENW),
.DUAL_PORT (1),
.FORCE_BRAM (FORCE_BRAM),
.RADDR_REG (RADDR_REG),
.WRITE_FIRST(RDW_MODE == "W"),
.INIT_ENABLE(INIT_ENABLE),
.INIT_FILE (INIT_FILE),

View file

@ -94,7 +94,8 @@ module VX_fifo_queue #(
.DATAW (DATAW),
.SIZE (DEPTH),
.LUTRAM (LUTRAM),
.RDW_MODE ("W")
.RDW_MODE ("W"),
.RADDR_REG (1)
) dp_ram (
.clk (clk),
.reset (reset),

View file

@ -0,0 +1,283 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_platform.vh"
`TRACING_OFF
module VX_mem_bank_adapter #(
parameter DATA_WIDTH = 512,
parameter ADDR_WIDTH_IN = 26, // word-addressable
parameter ADDR_WIDTH_OUT = 32, // byte-addressable
parameter TAG_WIDTH_IN = 8,
parameter TAG_WIDTH_OUT = 8,
parameter NUM_PORTS_IN = 1,
parameter NUM_BANKS_OUT = 1,
parameter INTERLEAVE = 0,
parameter TAG_BUFFER_SIZE= 32,
parameter ARBITER = "R",
parameter REQ_OUT_BUF = 1,
parameter RSP_OUT_BUF = 1,
parameter DATA_SIZE = DATA_WIDTH/8
) (
input wire clk,
input wire reset,
// Input request
input wire mem_req_valid_in [NUM_PORTS_IN],
input wire mem_req_rw_in [NUM_PORTS_IN],
input wire [DATA_SIZE-1:0] mem_req_byteen_in [NUM_PORTS_IN],
input wire [ADDR_WIDTH_IN-1:0] mem_req_addr_in [NUM_PORTS_IN],
input wire [DATA_WIDTH-1:0] mem_req_data_in [NUM_PORTS_IN],
input wire [TAG_WIDTH_IN-1:0] mem_req_tag_in [NUM_PORTS_IN],
output wire mem_req_ready_in [NUM_PORTS_IN],
// Input response
output wire mem_rsp_valid_in [NUM_PORTS_IN],
output wire [DATA_WIDTH-1:0] mem_rsp_data_in [NUM_PORTS_IN],
output wire [TAG_WIDTH_IN-1:0] mem_rsp_tag_in [NUM_PORTS_IN],
input wire mem_rsp_ready_in [NUM_PORTS_IN],
// Output request
output wire mem_req_valid_out [NUM_BANKS_OUT],
output wire mem_req_rw_out [NUM_BANKS_OUT],
output wire [DATA_SIZE-1:0] mem_req_byteen_out [NUM_BANKS_OUT],
output wire [ADDR_WIDTH_OUT-1:0] mem_req_addr_out [NUM_BANKS_OUT],
output wire [DATA_WIDTH-1:0] mem_req_data_out [NUM_BANKS_OUT],
output wire [TAG_WIDTH_OUT-1:0] mem_req_tag_out [NUM_BANKS_OUT],
input wire mem_req_ready_out [NUM_BANKS_OUT],
// Output response
input wire mem_rsp_valid_out [NUM_BANKS_OUT],
input wire [DATA_WIDTH-1:0] mem_rsp_data_out [NUM_BANKS_OUT],
input wire [TAG_WIDTH_OUT-1:0] mem_rsp_tag_out [NUM_BANKS_OUT],
output wire mem_rsp_ready_out [NUM_BANKS_OUT]
);
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS_OUT);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS; // convert output addresss to input space
localparam BANK_ADDR_WIDTH = DST_ADDR_WDITH - BANK_SEL_BITS;
localparam NUM_PORTS_IN_BITS = `CLOG2(NUM_PORTS_IN);
localparam NUM_PORTS_IN_WIDTH = `UP(NUM_PORTS_IN_BITS);
localparam TAG_BUFFER_ADDRW = `CLOG2(TAG_BUFFER_SIZE);
localparam NEEDED_TAG_WIDTH = TAG_WIDTH_IN + NUM_PORTS_IN_BITS;
localparam READ_TAG_WIDTH = (NEEDED_TAG_WIDTH > TAG_WIDTH_OUT) ? TAG_BUFFER_ADDRW : TAG_WIDTH_IN;
localparam READ_FULL_TAG_WIDTH = READ_TAG_WIDTH + NUM_PORTS_IN_BITS;
localparam WRITE_TAG_WIDTH = `MIN(TAG_WIDTH_IN, TAG_WIDTH_OUT);
localparam DST_TAG_WIDTH = `MAX(READ_FULL_TAG_WIDTH, WRITE_TAG_WIDTH);
localparam ARB_TAG_WIDTH = `MAX(READ_TAG_WIDTH, WRITE_TAG_WIDTH);
localparam ARB_DATAW = 1 + BANK_ADDR_WIDTH + DATA_SIZE + DATA_WIDTH + ARB_TAG_WIDTH;
localparam REQ_BUF_DATAW = 1 + BANK_ADDR_WIDTH + DATA_SIZE + DATA_WIDTH + DST_TAG_WIDTH;
localparam RSP_XBAR_DATAW = DATA_WIDTH + READ_TAG_WIDTH;
`STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN))
`STATIC_ASSERT ((TAG_WIDTH_OUT >= DST_TAG_WIDTH), ("invalid output tag width: current=%0d, expected=%0d", TAG_WIDTH_OUT, DST_TAG_WIDTH))
// Banks selection
wire [NUM_PORTS_IN-1:0][BANK_SEL_WIDTH-1:0] req_bank_sel;
wire [NUM_PORTS_IN-1:0][BANK_ADDR_WIDTH-1:0] req_bank_addr;
if (NUM_BANKS_OUT > 1) begin : g_port_sel
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_i
wire [DST_ADDR_WDITH-1:0] mem_req_addr_dst = DST_ADDR_WDITH'(mem_req_addr_in[i]);
if (INTERLEAVE) begin : g_interleave
assign req_bank_sel[i] = mem_req_addr_dst[BANK_SEL_BITS-1:0];
assign req_bank_addr[i] = mem_req_addr_dst[BANK_SEL_BITS +: BANK_ADDR_WIDTH];
end else begin : g_no_interleave
assign req_bank_sel[i] = mem_req_addr_dst[BANK_ADDR_WIDTH +: BANK_SEL_BITS];
assign req_bank_addr[i] = mem_req_addr_dst[BANK_ADDR_WIDTH-1:0];
end
end
end else begin : g_no_port_sel
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_i
assign req_bank_sel[i] = '0;
assign req_bank_addr[i] = DST_ADDR_WDITH'(mem_req_addr_in[i]);
end
end
// Tag handling logic
wire [NUM_PORTS_IN-1:0] mem_rd_req_tag_in_ready;
wire [NUM_PORTS_IN-1:0][READ_TAG_WIDTH-1:0] mem_rd_req_tag_in;
wire [NUM_PORTS_IN-1:0][READ_TAG_WIDTH-1:0] mem_rd_rsp_tag_in;
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_tag_buf
if (NEEDED_TAG_WIDTH > TAG_WIDTH_OUT) begin : g_enabled
wire [TAG_BUFFER_ADDRW-1:0] tbuf_waddr, tbuf_raddr;
wire tbuf_full;
VX_index_buffer #(
.DATAW (TAG_WIDTH_IN),
.SIZE (TAG_BUFFER_SIZE)
) tag_buf (
.clk (clk),
.reset (reset),
.acquire_en (mem_req_valid_in[i] && ~mem_req_rw_in[i] && mem_req_ready_in[i]),
.write_addr (tbuf_waddr),
.write_data (mem_req_tag_in[i]),
.read_data (mem_rsp_tag_in[i]),
.read_addr (tbuf_raddr),
.release_en (mem_rsp_valid_in[i] && mem_rsp_ready_in[i]),
.full (tbuf_full),
`UNUSED_PIN (empty)
);
assign mem_rd_req_tag_in_ready[i] = ~tbuf_full;
assign mem_rd_req_tag_in[i] = tbuf_waddr;
assign tbuf_raddr = mem_rd_rsp_tag_in[i];
end else begin : g_none
assign mem_rd_req_tag_in_ready[i] = 1;
assign mem_rd_req_tag_in[i] = mem_req_tag_in[i];
assign mem_rsp_tag_in[i] = mem_rd_rsp_tag_in[i];
end
end
// Request ack
wire [NUM_BANKS_OUT-1:0][NUM_PORTS_IN-1:0] arb_ready_in;
wire [NUM_PORTS_IN-1:0][NUM_BANKS_OUT-1:0] arb_ready_in_w;
VX_transpose #(
.N (NUM_BANKS_OUT),
.M (NUM_PORTS_IN)
) rdy_in_transpose (
.data_in (arb_ready_in),
.data_out (arb_ready_in_w)
);
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_ready_in
assign mem_req_ready_in[i] = | arb_ready_in_w[i];
end
// Request handling
for (genvar i = 0; i < NUM_BANKS_OUT; ++i) begin : g_requests
wire [BANK_ADDR_WIDTH-1:0] arb_addr_out, buf_addr_out;
wire [ARB_TAG_WIDTH-1:0] arb_tag_out;
wire [DST_TAG_WIDTH-1:0] arb_tag_s_out, buf_tag_out;
wire [NUM_PORTS_IN_WIDTH-1:0] arb_sel_out;
wire [DATA_WIDTH-1:0] arb_data_out, buf_data_out;
wire [DATA_SIZE-1:0] arb_byteen_out, buf_byteen_out;
wire arb_valid_out, buf_valid_out;
wire arb_ready_out, buf_ready_out;
wire arb_rw_out, buf_rw_out;
wire [NUM_PORTS_IN-1:0][ARB_DATAW-1:0] arb_data_in;
wire [NUM_PORTS_IN-1:0] arb_valid_in;
for (genvar j = 0; j < NUM_PORTS_IN; ++j) begin : g_valid_in
wire tag_ready = mem_req_rw_in[j] || mem_rd_req_tag_in_ready[j];
assign arb_valid_in[j] = mem_req_valid_in[j] && tag_ready && (req_bank_sel[j] == i);
end
for (genvar j = 0; j < NUM_PORTS_IN; ++j) begin : g_data_in
wire [ARB_TAG_WIDTH-1:0] tag_value = mem_req_rw_in[j] ? ARB_TAG_WIDTH'(mem_req_tag_in[j]) : ARB_TAG_WIDTH'(mem_rd_req_tag_in[j]);
assign arb_data_in[j] = {mem_req_rw_in[j], req_bank_addr[j], mem_req_byteen_in[j], mem_req_data_in[j], tag_value};
end
VX_stream_arb #(
.NUM_INPUTS (NUM_PORTS_IN),
.NUM_OUTPUTS(1),
.DATAW (ARB_DATAW),
.ARBITER (ARBITER)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (arb_valid_in),
.ready_in (arb_ready_in[i]),
.data_in (arb_data_in),
.data_out ({arb_rw_out, arb_addr_out, arb_byteen_out, arb_data_out, arb_tag_out}),
.valid_out (arb_valid_out),
.ready_out (arb_ready_out),
.sel_out (arb_sel_out)
);
if (NUM_PORTS_IN > 1) begin : g_input_sel
assign arb_tag_s_out = DST_TAG_WIDTH'({arb_tag_out, arb_sel_out});
end else begin : g_no_input_sel
`UNUSED_VAR (arb_sel_out)
assign arb_tag_s_out = DST_TAG_WIDTH'(arb_tag_out);
end
VX_elastic_buffer #(
.DATAW (REQ_BUF_DATAW),
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF)),
.LUTRAM (`TO_OUT_BUF_LUTRAM(REQ_OUT_BUF))
) req_buf (
.clk (clk),
.reset (reset),
.valid_in (arb_valid_out),
.ready_in (arb_ready_out),
.data_in ({arb_rw_out, arb_addr_out, arb_byteen_out, arb_data_out, arb_tag_s_out}),
.data_out ({buf_rw_out, buf_addr_out, buf_byteen_out, buf_data_out, buf_tag_out}),
.valid_out (buf_valid_out),
.ready_out (buf_ready_out)
);
assign mem_req_valid_out[i] = buf_valid_out;
assign mem_req_rw_out[i] = buf_rw_out;
assign mem_req_addr_out[i] = ADDR_WIDTH_OUT'(buf_addr_out);
assign mem_req_byteen_out[i] = buf_byteen_out;
assign mem_req_data_out[i] = buf_data_out;
assign mem_req_tag_out[i] = TAG_WIDTH_OUT'(buf_tag_out);
assign buf_ready_out = mem_req_ready_out[i];
end
// Response channel
wire [NUM_BANKS_OUT-1:0] rsp_xbar_valid_in;
wire [NUM_BANKS_OUT-1:0][RSP_XBAR_DATAW-1:0] rsp_xbar_data_in;
wire [NUM_BANKS_OUT-1:0][NUM_PORTS_IN_WIDTH-1:0] rsp_xbar_sel_in;
wire [NUM_BANKS_OUT-1:0] rsp_xbar_ready_in;
for (genvar i = 0; i < NUM_BANKS_OUT; ++i) begin : g_rsp_xbar_data_in
assign rsp_xbar_valid_in[i] = mem_rsp_valid_out[i];
assign rsp_xbar_data_in[i] = {mem_rsp_data_out[i], mem_rsp_tag_out[i][NUM_PORTS_IN_BITS +: READ_TAG_WIDTH]};
if (NUM_PORTS_IN > 1) begin : g_input_sel
assign rsp_xbar_sel_in[i] = mem_rsp_tag_out[i][0 +: NUM_PORTS_IN_BITS];
end else begin : g_no_input_sel
assign rsp_xbar_sel_in[i] = 0;
end
assign mem_rsp_ready_out[i] = rsp_xbar_ready_in[i];
end
wire [NUM_PORTS_IN-1:0] rsp_xbar_valid_out;
wire [NUM_PORTS_IN-1:0][DATA_WIDTH+READ_TAG_WIDTH-1:0] rsp_xbar_data_out;
wire [NUM_PORTS_IN-1:0] rsp_xbar_ready_out;
VX_stream_xbar #(
.NUM_INPUTS (NUM_BANKS_OUT),
.NUM_OUTPUTS(NUM_PORTS_IN),
.DATAW (RSP_XBAR_DATAW),
.ARBITER (ARBITER),
.OUT_BUF (RSP_OUT_BUF)
) rsp_xbar (
.clk (clk),
.reset (reset),
.valid_in (rsp_xbar_valid_in),
.data_in (rsp_xbar_data_in),
.ready_in (rsp_xbar_ready_in),
.sel_in (rsp_xbar_sel_in),
.data_out (rsp_xbar_data_out),
.valid_out (rsp_xbar_valid_out),
.ready_out (rsp_xbar_ready_out),
`UNUSED_PIN (collisions),
`UNUSED_PIN (sel_out)
);
for (genvar i = 0; i < NUM_PORTS_IN; ++i) begin : g_rsp_xbar_data_out
assign mem_rsp_valid_in[i] = rsp_xbar_valid_out[i];
assign {mem_rsp_data_in[i], mem_rd_rsp_tag_in[i]} = rsp_xbar_data_out[i];
assign rsp_xbar_ready_out[i] = mem_rsp_ready_in[i];
end
endmodule
`TRACING_ON

View file

@ -14,7 +14,7 @@
`include "VX_platform.vh"
`TRACING_OFF
module VX_mem_adapter #(
module VX_mem_data_adapter #(
parameter SRC_DATA_WIDTH = 1,
parameter SRC_ADDR_WIDTH = 1,
parameter DST_DATA_WIDTH = 1,

View file

@ -50,6 +50,7 @@ module VX_sp_ram #(
parameter OUT_REG = 0,
parameter LUTRAM = 0,
parameter `STRING RDW_MODE = "W", // W: write-first, R: read-first, N: no-change, U: undefined
parameter RADDR_REG = 0, // read address registered hint
parameter RDW_ASSERT = 0,
parameter RESET_RAM = 0,
parameter INIT_ENABLE = 0,
@ -68,9 +69,10 @@ module VX_sp_ram #(
);
localparam WSELW = DATAW / WRENW;
`UNUSED_PARAM (LUTRAM)
`UNUSED_PARAM (RADDR_REG)
`STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter"))
`STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "N"), ("invalid parameter"))
`STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "N" || RDW_MODE == "U"), ("invalid parameter"))
`UNUSED_PARAM (RDW_ASSERT)
`ifdef SYNTHESIS
@ -323,6 +325,7 @@ module VX_sp_ram #(
.WRENW (WRENW),
.DUAL_PORT (0),
.FORCE_BRAM (FORCE_BRAM),
.RADDR_REG (RADDR_REG),
.WRITE_FIRST(RDW_MODE == "W"),
.INIT_ENABLE(INIT_ENABLE),
.INIT_FILE (INIT_FILE),

View file

@ -13,6 +13,7 @@
namespace eval vortex {
variable info 0
variable debug 0
proc print_error {msg {do_exit 1}} {
@ -167,21 +168,19 @@ proc get_cell_pin {cell name} {
}
proc remove_cell_from_netlist {cell} {
variable debug
puts "INFO: Removing cell '$cell' from the netlist."
variable info
# Disconnect all pins of the cell
#foreach pin [get_pins -quiet -of_objects $cell] {
# foreach net [get_nets -quiet -of_objects $pin] {
# disconnect_net -net $net -objects $pin
# if {$debug} {puts "DEBUG: Disconnected net '$net' from pin '$pin'."}
# }
#}
foreach pin [get_pins -quiet -of_objects $cell] {
foreach net [get_nets -quiet -of_objects $pin] {
disconnect_net -net $net -objects $pin
if {$info} {puts "INFO: Disconnected net '$net' from pin '$pin'."}
}
}
# Remove the cell
remove_cell $cell
if {$debug} {puts "DEBUG: Cell '$cell' was removed successfully."}
if {$info} {puts "INFO: Cell '$cell' was removed successfully."}
}
proc replace_pin_source {pin source_pin} {
@ -250,6 +249,7 @@ proc find_pin_driver {input_pin {should_exist 1}} {
}
proc create_register_next {parent reg_cell} {
variable info
variable debug
set hier_sep [get_hierarchy_separator]
@ -341,7 +341,7 @@ proc create_register_next {parent reg_cell} {
# FDSE: O = I1 ? 1 : I0; where I0=D, I1=S
set lut_name [unique_cell_name "${parent}${hier_sep}raddr_next"]
set lut_cell [create_cell -reference LUT2 $lut_name]
puts "INFO: Created lut cell: '$lut_cell'"
if {$info} {puts "INFO: Created lut cell: '$lut_cell'"}
if {$register_type == "FDRE"} {
set_property INIT 4'b0010 $lut_cell
@ -389,6 +389,7 @@ proc create_register_next {parent reg_cell} {
}
proc getOrCreateVCCPin {parent} {
variable info
variable debug
set hier_sep [get_hierarchy_separator]
@ -397,7 +398,7 @@ proc getOrCreateVCCPin {parent} {
set vcc_cell [get_cells -quiet $cell_name]
if {[llength $vcc_cell] == 0} {
set vcc_cell [create_cell -reference VCC $cell_name]
puts "INFO: Created VCC cell: '$vcc_cell'"
if {$info} {puts "INFO: Created VCC cell: '$vcc_cell'"}
} elseif {[llength $vcc_cell] > 1} {
puts "ERROR: Multiple VCC cells found with name '$cell_name'."
exit -1
@ -416,6 +417,7 @@ proc getOrCreateVCCPin {parent} {
}
proc getOrCreateGNDPin {parent} {
variable info
variable debug
set hier_sep [get_hierarchy_separator]
@ -424,7 +426,7 @@ proc getOrCreateGNDPin {parent} {
set gnd_cell [get_cells -quiet $cell_name]
if {[llength $gnd_cell] == 0} {
set gnd_cell [create_cell -reference GND $cell_name]
puts "INFO: Created GND cell: '$gnd_cell'"
if {$info} {puts "INFO: Created GND cell: '$gnd_cell'"}
} elseif {[llength $gnd_cell] > 1} {
puts "ERROR: Multiple GND cells found with name '$cell_name'."
exit -1
@ -501,6 +503,7 @@ proc replace_net_source {net source_pin} {
}
proc resolve_async_bram {inst} {
variable info
variable debug
puts "INFO: Resolving asynchronous BRAM patch: '$inst'."
@ -575,27 +578,27 @@ proc resolve_async_bram {inst} {
# do we have a fully registered read address?
if {[llength $reg_next_pins] == [llength $raddr_w_nets]} {
puts "INFO: Fully registered read address detected."
if {$info} {puts "INFO: Fully registered read address detected."}
# Connect all reg_next_pins to all input pins attached to raddr_s_nets
set addr_width [llength $raddr_w_nets]
for {set addr_idx 0} {$addr_idx < $addr_width} {incr addr_idx} {
set raddr_s_net [lindex $raddr_s_nets $addr_idx]
set reg_next_pin [lindex $reg_next_pins $addr_idx]
puts "INFO: Connecting pin '$reg_next_pin' to '$raddr_s_net's pins."
if {$info} {puts "INFO: Connecting pin '$reg_next_pin' to '$raddr_s_net's pins."}
# Connect reg_next_pin to all input pins attached to raddr_s_net
replace_net_source $raddr_s_net $reg_next_pin
}
# Connect reg_ce_src_pin to all input pins attached to read_s_net
puts "INFO: Connecting pin '$reg_ce_src_pin' to '$read_s_net's pins."
if {$info} {puts "INFO: Connecting pin '$reg_ce_src_pin' to '$read_s_net's pins."}
replace_net_source $read_s_net $reg_ce_src_pin
# Create Const<1>'s pin
set vcc_pin [getOrCreateVCCPin $inst]
# Connect vcc_pin to all input pins attached to is_raddr_reg_net
puts "INFO: Connecting pin '$vcc_pin' to '$is_raddr_reg_net's pins."
if {$info} {puts "INFO: Connecting pin '$vcc_pin' to '$is_raddr_reg_net's pins."}
replace_net_source $is_raddr_reg_net $vcc_pin
# Remove all async_ram cells
@ -609,7 +612,7 @@ proc resolve_async_bram {inst} {
set gnd_pin [getOrCreateGNDPin $inst]
# Connect gnd_pin to all input pins attached to is_raddr_reg_net
puts "INFO: Connecting pin '$gnd_pin' to '$is_raddr_reg_net's pins."
if {$info} {puts "INFO: Connecting pin '$gnd_pin' to '$is_raddr_reg_net's pins."}
replace_net_source $is_raddr_reg_net $gnd_pin
# Remove all sync_ram cells

View file

@ -78,12 +78,12 @@ public:
ramulator_memorysystem_->tick();
}
bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback callback, void* arg) {
bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback response_cb, void* arg) {
if (!ramulator_frontend_->receive_external_requests(
is_write ? Ramulator::Request::Type::Write : Ramulator::Request::Type::Read,
addr,
source_id,
[callback_ = std::move(callback), arg_ = std::move(arg)](Ramulator::Request& /*dram_req*/) {
[callback_ = std::move(response_cb), arg_ = std::move(arg)](Ramulator::Request& /*dram_req*/) {
callback_(arg_);
}
)) {
@ -91,7 +91,7 @@ public:
}
if (is_write) {
// Ramulator does not handle write responses, so we call the callback ourselves
callback(arg);
response_cb(arg);
}
return true;
}

View file

@ -26,7 +26,7 @@ public:
void tick();
bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback callback, void* arg);
bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback response_cb, void* arg);
private:
class Impl;

View file

@ -31,7 +31,7 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
# AFU parameters
# Platform parameters
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
endif

View file

@ -441,7 +441,7 @@ private:
}
}
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, byteen=0x%lx, data=0x", timestamp, b, byte_addr, byteen);
/*printf("%0ld: [sim] MEM Wr Req[%d]: addr=0x%lx, byteen=0x%lx, data=0x", timestamp, b, byte_addr, byteen);
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", data[i]);
}
@ -466,7 +466,7 @@ private:
mem_req->ready = false;
pending_mem_reqs_[b].emplace_back(mem_req);
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, byte_addr);
/*printf("%0ld: [sim] MEM Rd Req[%d]: addr=0x%lx, pending={", timestamp, b, byte_addr);
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", mem_req->data[i]);
}

View file

@ -24,6 +24,21 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR
# Platform parameters
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
endif
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
ifeq ($(XLEN),64)
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47
else
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31
endif
endif
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
endif
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
@ -33,12 +48,14 @@ ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src
endif
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
SRCS += $(SRC_DIR)/processor.cpp
TOP = rtlsim_shim
VL_FLAGS = --exe
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
@ -49,7 +66,7 @@ VL_FLAGS += -DXLEN_$(XLEN)
VL_FLAGS += $(CONFIGS)
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(RTL_PKGS)
VL_FLAGS += --cc Vortex --top-module Vortex
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
CXXFLAGS += $(CONFIGS)

View file

@ -13,7 +13,7 @@
#include "processor.h"
#include "VVortex.h"
#include "Vrtlsim_shim.h"
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
@ -35,6 +35,8 @@
#include <dram_sim.h>
#include <util.h>
#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8)
#ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1
#endif
@ -100,7 +102,7 @@ public:
Verilated::assertOn(false);
// create RTL module instance
device_ = new VVortex();
device_ = new Vrtlsim_shim();
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
@ -226,13 +228,11 @@ private:
if (!dram_queue_[b].empty()) {
auto mem_req = dram_queue_[b].front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, b, [](void* arg) {
// mark completed request as ready
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
if (orig_req->ready) {
delete orig_req;
} else {
orig_req->ready = true;
}
}, mem_req)) {
// was successfully sent to dram, remove from queue
dram_queue_[b].pop();
}
}
@ -269,39 +269,39 @@ private:
}
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
// process memory read responses
// process memory responses
if (device_->mem_rsp_valid[b] && mem_rd_rsp_ready_[b]) {
device_->mem_rsp_valid[b] = 0;
}
if (!device_->mem_rsp_valid[b]) {
if (!pending_mem_reqs_[b].empty()
&& (*pending_mem_reqs_[b].begin())->ready) {
if (device_->mem_rsp_valid[b] == 0) {
if (!pending_mem_reqs_[b].empty()) {
auto mem_rsp_it = pending_mem_reqs_[b].begin();
auto mem_rsp = *mem_rsp_it;
/*printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%02x", mem_rsp->data[i]);
}
printf("\n");
*/
if (mem_rsp->ready) {
if (!mem_rsp->write) {
// return read responses
device_->mem_rsp_valid[b] = 1;
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data[b]), mem_rsp->data.data(), MEM_BLOCK_SIZE);
memcpy(VDataCast<void*, PLATFORM_MEMORY_DATA_SIZE>::get(device_->mem_rsp_data[b]), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE);
device_->mem_rsp_tag[b] = mem_rsp->tag;
}
// delete the request
pending_mem_reqs_[b].erase(mem_rsp_it);
delete mem_rsp;
}
}
}
// process memory requests
if (device_->mem_req_valid[b] && device_->mem_req_ready[b]) {
uint64_t byte_addr = (device_->mem_req_addr[b] * MEM_BLOCK_SIZE);
uint64_t byte_addr = (device_->mem_req_addr[b] * PLATFORM_MEMORY_DATA_SIZE);
if (device_->mem_req_rw[b]) {
auto byteen = device_->mem_req_byteen[b];
auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data[b]);
auto data = VDataCast<uint8_t*, PLATFORM_MEMORY_DATA_SIZE>::get(device_->mem_req_data[b]);
// check address range
if (byte_addr >= uint64_t(IO_COUT_ADDR)
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
// process console output
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];
@ -314,31 +314,31 @@ private:
}
} else {
// process writes
/*
printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr);
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
/*printf("%0ld: [sim] MEM Wr Req[%d]: addr=0x%0lx, tag=0x%0lx, byteen=0x", timestamp, b, byte_addr, device_->mem_req_tag[b]);
for (int i = (PLATFORM_MEMORY_DATA_SIZE/4)-1; i >= 0; --i) {
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
}
printf(", data=0x");
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%d=%02x,", i, data[i]);
}
printf("\n");
*/
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("\n");*/
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[byte_addr + i] = data[i];
}
}
auto mem_req = new mem_req_t();
mem_req->tag = device_->mem_req_tag[b];
mem_req->addr = byte_addr;
mem_req->write = true;
mem_req->ready = true;
mem_req->ready = false;
// send dram request
// enqueue dram request
dram_queue_[b].push(mem_req);
// add to pending list
pending_mem_reqs_[b].emplace_back(mem_req);
}
} else {
// process reads
@ -347,13 +347,19 @@ private:
mem_req->addr = byte_addr;
mem_req->write = false;
mem_req->ready = false;
ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE);
pending_mem_reqs_[b].emplace_back(mem_req);
ram_->read(mem_req->data.data(), byte_addr, PLATFORM_MEMORY_DATA_SIZE);
//printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
/*printf("%0ld: [sim] MEM Rd Req[%d]: addr=0x%0lx, tag=0x%0lx, data=0x", timestamp, b, byte_addr, device_->mem_req_tag[b]);
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", mem_req->data[i]);
}
printf("\n");*/
// send dram request
// enqueue dram request
dram_queue_[b].push(mem_req);
// add to pending list
pending_mem_reqs_[b].emplace_back(mem_req);
}
}
}
@ -372,8 +378,8 @@ private:
private:
typedef struct {
VVortex* device;
std::array<uint8_t, MEM_BLOCK_SIZE> data;
Vrtlsim_shim* device;
std::array<uint8_t, PLATFORM_MEMORY_DATA_SIZE> data;
uint64_t addr;
uint64_t tag;
bool write;
@ -390,7 +396,7 @@ private:
DramSim dram_sim_;
VVortex* device_;
Vrtlsim_shim* device_;
RAM* ram_;

196
sim/rtlsim/rtlsim_shim.sv Normal file
View file

@ -0,0 +1,196 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module rtlsim_shim import VX_gpu_pkg::*; #(
parameter MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
parameter MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH,
parameter MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS,
parameter MEM_TAG_WIDTH = 64
) (
`SCOPE_IO_DECL
// Clock
input wire clk,
input wire reset,
// Memory request
output wire mem_req_valid [MEM_NUM_BANKS],
output wire mem_req_rw [MEM_NUM_BANKS],
output wire [(MEM_DATA_WIDTH/8)-1:0] mem_req_byteen [MEM_NUM_BANKS],
output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr [MEM_NUM_BANKS],
output wire [MEM_DATA_WIDTH-1:0] mem_req_data [MEM_NUM_BANKS],
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag [MEM_NUM_BANKS],
input wire mem_req_ready [MEM_NUM_BANKS],
// Memory response
input wire mem_rsp_valid [MEM_NUM_BANKS],
input wire [MEM_DATA_WIDTH-1:0] mem_rsp_data [MEM_NUM_BANKS],
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag [MEM_NUM_BANKS],
output wire mem_rsp_ready [MEM_NUM_BANKS],
// DCR write request
input wire dcr_wr_valid,
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
// Status
output wire busy
);
localparam DST_LDATAW = `CLOG2(MEM_DATA_WIDTH);
localparam SRC_LDATAW = `CLOG2(`VX_MEM_DATA_WIDTH);
localparam SUB_LDATAW = DST_LDATAW - SRC_LDATAW;
localparam VX_MEM_TAG_A_WIDTH = `VX_MEM_TAG_WIDTH + `MAX(SUB_LDATAW, 0);
localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH - SUB_LDATAW;
wire vx_mem_req_valid [`VX_MEM_PORTS];
wire vx_mem_req_rw [`VX_MEM_PORTS];
wire [`VX_MEM_BYTEEN_WIDTH-1:0] vx_mem_req_byteen [`VX_MEM_PORTS];
wire [`VX_MEM_ADDR_WIDTH-1:0] vx_mem_req_addr [`VX_MEM_PORTS];
wire [`VX_MEM_DATA_WIDTH-1:0] vx_mem_req_data [`VX_MEM_PORTS];
wire [`VX_MEM_TAG_WIDTH-1:0] vx_mem_req_tag [`VX_MEM_PORTS];
wire vx_mem_req_ready [`VX_MEM_PORTS];
wire vx_mem_rsp_valid [`VX_MEM_PORTS];
wire [`VX_MEM_DATA_WIDTH-1:0] vx_mem_rsp_data [`VX_MEM_PORTS];
wire [`VX_MEM_TAG_WIDTH-1:0] vx_mem_rsp_tag [`VX_MEM_PORTS];
wire vx_mem_rsp_ready [`VX_MEM_PORTS];
`SCOPE_IO_SWITCH (1);
Vortex vortex (
`SCOPE_IO_BIND (0)
.clk (clk),
.reset (reset),
.mem_req_valid (vx_mem_req_valid),
.mem_req_rw (vx_mem_req_rw),
.mem_req_byteen (vx_mem_req_byteen),
.mem_req_addr (vx_mem_req_addr),
.mem_req_data (vx_mem_req_data),
.mem_req_tag (vx_mem_req_tag),
.mem_req_ready (vx_mem_req_ready),
.mem_rsp_valid (vx_mem_rsp_valid),
.mem_rsp_data (vx_mem_rsp_data),
.mem_rsp_tag (vx_mem_rsp_tag),
.mem_rsp_ready (vx_mem_rsp_ready),
.dcr_wr_valid (dcr_wr_valid),
.dcr_wr_addr (dcr_wr_addr),
.dcr_wr_data (dcr_wr_data),
.busy (busy)
);
wire mem_req_valid_a [`VX_MEM_PORTS];
wire mem_req_rw_a [`VX_MEM_PORTS];
wire [(MEM_DATA_WIDTH/8)-1:0] mem_req_byteen_a [`VX_MEM_PORTS];
wire [VX_MEM_ADDR_A_WIDTH-1:0] mem_req_addr_a [`VX_MEM_PORTS];
wire [MEM_DATA_WIDTH-1:0] mem_req_data_a [`VX_MEM_PORTS];
wire [VX_MEM_TAG_A_WIDTH-1:0] mem_req_tag_a [`VX_MEM_PORTS];
wire mem_req_ready_a [`VX_MEM_PORTS];
wire mem_rsp_valid_a [`VX_MEM_PORTS];
wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_a [`VX_MEM_PORTS];
wire [VX_MEM_TAG_A_WIDTH-1:0] mem_rsp_tag_a [`VX_MEM_PORTS];
wire mem_rsp_ready_a [`VX_MEM_PORTS];
// Adjust memory data width to match AXI interface
for (genvar i = 0; i < `VX_MEM_PORTS; i++) begin : g_mem_adapter
VX_mem_data_adapter #(
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.DST_DATA_WIDTH (MEM_DATA_WIDTH),
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
.DST_ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH),
.SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.DST_TAG_WIDTH (VX_MEM_TAG_A_WIDTH),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) mem_data_adapter (
.clk (clk),
.reset (reset),
.mem_req_valid_in (vx_mem_req_valid[i]),
.mem_req_addr_in (vx_mem_req_addr[i]),
.mem_req_rw_in (vx_mem_req_rw[i]),
.mem_req_byteen_in (vx_mem_req_byteen[i]),
.mem_req_data_in (vx_mem_req_data[i]),
.mem_req_tag_in (vx_mem_req_tag[i]),
.mem_req_ready_in (vx_mem_req_ready[i]),
.mem_rsp_valid_in (vx_mem_rsp_valid[i]),
.mem_rsp_data_in (vx_mem_rsp_data[i]),
.mem_rsp_tag_in (vx_mem_rsp_tag[i]),
.mem_rsp_ready_in (vx_mem_rsp_ready[i]),
.mem_req_valid_out (mem_req_valid_a[i]),
.mem_req_addr_out (mem_req_addr_a[i]),
.mem_req_rw_out (mem_req_rw_a[i]),
.mem_req_byteen_out (mem_req_byteen_a[i]),
.mem_req_data_out (mem_req_data_a[i]),
.mem_req_tag_out (mem_req_tag_a[i]),
.mem_req_ready_out (mem_req_ready_a[i]),
.mem_rsp_valid_out (mem_rsp_valid_a[i]),
.mem_rsp_data_out (mem_rsp_data_a[i]),
.mem_rsp_tag_out (mem_rsp_tag_a[i]),
.mem_rsp_ready_out (mem_rsp_ready_a[i])
);
end
VX_mem_bank_adapter #(
.DATA_WIDTH (MEM_DATA_WIDTH),
.ADDR_WIDTH_IN (VX_MEM_ADDR_A_WIDTH),
.ADDR_WIDTH_OUT (MEM_ADDR_WIDTH),
.TAG_WIDTH_IN (VX_MEM_TAG_A_WIDTH),
.TAG_WIDTH_OUT (MEM_TAG_WIDTH),
.NUM_PORTS_IN (`VX_MEM_PORTS),
.NUM_BANKS_OUT (MEM_NUM_BANKS),
.INTERLEAVE (0),
.REQ_OUT_BUF ((`VX_MEM_PORTS > 1) ? 2 : 0),
.RSP_OUT_BUF ((`VX_MEM_PORTS > 1 || MEM_NUM_BANKS > 1) ? 2 : 0)
) mem_bank_adapter (
.clk (clk),
.reset (reset),
.mem_req_valid_in (mem_req_valid_a),
.mem_req_rw_in (mem_req_rw_a),
.mem_req_byteen_in (mem_req_byteen_a),
.mem_req_addr_in (mem_req_addr_a),
.mem_req_data_in (mem_req_data_a),
.mem_req_tag_in (mem_req_tag_a),
.mem_req_ready_in (mem_req_ready_a),
.mem_rsp_valid_in (mem_rsp_valid_a),
.mem_rsp_data_in (mem_rsp_data_a),
.mem_rsp_tag_in (mem_rsp_tag_a),
.mem_rsp_ready_in (mem_rsp_ready_a),
.mem_req_valid_out (mem_req_valid),
.mem_req_rw_out (mem_req_rw),
.mem_req_byteen_out (mem_req_byteen),
.mem_req_addr_out (mem_req_addr),
.mem_req_data_out (mem_req_data),
.mem_req_tag_out (mem_req_tag),
.mem_req_ready_out (mem_req_ready),
.mem_rsp_valid_out (mem_rsp_valid),
.mem_rsp_data_out (mem_rsp_data),
.mem_rsp_tag_out (mem_rsp_tag),
.mem_rsp_ready_out (mem_rsp_ready)
);
endmodule

View file

@ -86,7 +86,7 @@ public:
if (!rsp_args->request.write) {
MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
rsp_args->memsim->mem_xbar_->RspOut.at(rsp_args->bank_id).push(mem_rsp, 1);
DT(3, rsp_args->memsim->simobject_->name() << "-mem-rsp: bank=" << rsp_args->bank_id << ", " << mem_rsp);
DT(3, rsp_args->memsim->simobject_->name() << "-mem-rsp[" << rsp_args->bank_id << "]: " << mem_rsp);
}
delete rsp_args;
},
@ -99,7 +99,7 @@ public:
continue;
}
DT(3, simobject_->name() << "-mem-req: bank=" << i << ", " << mem_req);
DT(3, simobject_->name() << "-mem-req[" << i << "]: " << mem_req);
mem_xbar_->ReqOut.at(i).pop();
counter++;

View file

@ -31,7 +31,7 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
# AFU parameters
# Platform parameters
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
endif
@ -45,9 +45,6 @@ endif
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
endif
ifeq (,$(findstring PLATFORM_MEMORY_OFFSET,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_OFFSET=0
endif
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)

View file

@ -227,7 +227,7 @@ public:
return -1;
uint64_t base_addr = bank_id * mem_bank_size_ + addr;
ram_->write(data, base_addr, size);
/*printf("%0ld: [sim] xrt-mem-write: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size);
/*printf("%0ld: [sim] xrt-mem-write[%d]: addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size);
for (int i = size-1; i >= 0; --i) {
printf("%02x", ((const uint8_t*)data)[i]);
}
@ -242,7 +242,7 @@ public:
return -1;
uint64_t base_addr = bank_id * mem_bank_size_ + addr;
ram_->read(data, base_addr, size);
/*printf("%0ld: [sim] xrt-mem-read: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size);
/*printf("%0ld: [sim] xrt-mem-read[%d]: addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size);
for (int i = size-1; i >= 0; --i) {
printf("%02x", ((uint8_t*)data)[i]);
}
@ -491,7 +491,7 @@ private:
mem_req->ready = false;
pending_mem_reqs_[b].emplace_back(mem_req);
/*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, b, mem_req->addr, mem_req->tag);
/*printf("%0ld: [sim] axi-mem-read[%d]: addr=0x%lx, tag=0x%x, data=0x", timestamp, b, mem_req->addr, mem_req->tag);
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", mem_req->data[b]);
}
@ -534,7 +534,7 @@ private:
mem_req->ready = false;
pending_mem_reqs_[b].emplace_back(mem_req);
/*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, b, mem_req->addr, byteen, mem_req->tag);
/*printf("%0ld: [sim] axi-mem-write[%d]: addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, b, mem_req->addr, byteen, mem_req->tag);
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", m_axi_states_[b].write_req_data[i]]);
}