xrtsim addressing fix

This commit is contained in:
Blaise Tine 2024-09-23 08:56:57 -07:00
parent 818522f7e4
commit 9a6dbdf1a9
9 changed files with 68 additions and 66 deletions

View file

@ -84,7 +84,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(
);
localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH;
localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + `CLOG2(`VX_MEM_DATA_WIDTH) - `CLOG2(AXI_DATA_WIDTH);
`STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag width: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH))
wire mem_req_valid;
@ -182,13 +182,13 @@ module Vortex_axi import VX_gpu_pkg::*; #(
);
VX_axi_adapter #(
.DATA_WIDTH (AXI_DATA_WIDTH),
.ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH),
.TAG_WIDTH (AXI_TID_WIDTH),
.NUM_BANKS (AXI_NUM_BANKS),
.AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
.BANK_INTERLEAVE (0),
.RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0)
.DATA_WIDTH (AXI_DATA_WIDTH),
.ADDR_WIDTH_IN (VX_MEM_ADDR_A_WIDTH),
.ADDR_WIDTH_OUT (AXI_ADDR_WIDTH),
.TAG_WIDTH (AXI_TID_WIDTH),
.NUM_BANKS (AXI_NUM_BANKS),
.BANK_INTERLEAVE(0),
.RSP_OUT_BUF ((AXI_NUM_BANKS > 1) ? 2 : 0)
) axi_adapter (
.clk (clk),
.reset (reset),

View file

@ -602,13 +602,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
VX_avs_adapter #(
.DATA_WIDTH (LMEM_DATA_WIDTH),
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.ADDR_WIDTH_IN (LMEM_ADDR_WIDTH),
.ADDR_WIDTH_OUT($bits(t_local_mem_addr)),
.BURST_WIDTH (LMEM_BURST_CTRW),
.NUM_BANKS (NUM_LOCAL_MEM_BANKS),
.TAG_WIDTH (AVS_REQ_TAGW + 1),
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE),
.AVS_ADDR_WIDTH($bits(t_local_mem_addr)),
.BANK_INTERLEAVE (`PLATFORM_MEMORY_INTERLEAVE),
.BANK_INTERLEAVE(`PLATFORM_MEMORY_INTERLEAVE),
.REQ_OUT_BUF (2),
.RSP_OUT_BUF (0)
) avs_adapter (

View file

@ -15,8 +15,7 @@
module VX_afu_ctrl #(
parameter S_AXI_ADDR_WIDTH = 8,
parameter S_AXI_DATA_WIDTH = 32,
parameter M_AXI_ADDR_WIDTH = 25
parameter S_AXI_DATA_WIDTH = 32
) (
// axi4 lite slave signals
input wire clk,
@ -135,7 +134,7 @@ module VX_afu_ctrl #(
// device caps
wire [63:0] dev_caps = {8'b0,
5'(M_AXI_ADDR_WIDTH-16),
5'(`PLATFORM_MEMORY_ADDR_WIDTH-16),
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
16'(`NUM_CORES * `NUM_CLUSTERS),

View file

@ -17,8 +17,8 @@ module VX_afu_wrap #(
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
parameter C_M_AXI_MEM_ID_WIDTH = 32,
parameter C_M_AXI_MEM_ADDR_WIDTH = 25,
parameter C_M_AXI_MEM_DATA_WIDTH = 512,
parameter C_M_AXI_MEM_ADDR_WIDTH = 25,
parameter C_M_AXI_MEM_NUM_BANKS = 2
) (
// System signals
@ -52,6 +52,11 @@ module VX_afu_wrap #(
output wire interrupt
);
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_BANKS);
`else
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH;
`endif
localparam STATE_IDLE = 0;
localparam STATE_RUN = 1;
@ -187,8 +192,7 @@ module VX_afu_wrap #(
VX_afu_ctrl #(
.S_AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
.S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
.M_AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH)
.S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH)
) afu_ctrl (
.clk (clk),
.reset (reset),
@ -228,19 +232,19 @@ module VX_afu_wrap #(
.dcr_wr_data (dcr_wr_data)
);
wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS];
wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS];
wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS];
wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS];
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing
assign m_axi_mem_awaddr_a[i] = m_axi_mem_awaddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET);
assign m_axi_mem_araddr_a[i] = m_axi_mem_araddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET);
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET);
assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET);
end
`SCOPE_IO_SWITCH (2)
Vortex_axi #(
.AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
.AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
.AXI_ADDR_WIDTH (M_AXI_MEM_ADDR_WIDTH),
.AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
) vortex_axi (

View file

@ -18,11 +18,10 @@ module vortex_afu #(
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
`ifdef SYNTHESIS
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
parameter C_M_AXI_MEM_NUM_BANKS = 1
`else
parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH,
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
`endif
) (

View file

@ -16,13 +16,13 @@
`TRACING_OFF
module VX_avs_adapter #(
parameter DATA_WIDTH = 1,
parameter ADDR_WIDTH = 1,
parameter ADDR_WIDTH_IN = 1,
parameter ADDR_WIDTH_OUT= 32,
parameter BURST_WIDTH = 1,
parameter NUM_BANKS = 1,
parameter TAG_WIDTH = 1,
parameter RD_QUEUE_SIZE = 1,
parameter BANK_INTERLEAVE= 0,
parameter AVS_ADDR_WIDTH = ADDR_WIDTH - `CLOG2(NUM_BANKS),
parameter REQ_OUT_BUF = 0,
parameter RSP_OUT_BUF = 0
) (
@ -33,7 +33,7 @@ module VX_avs_adapter #(
input wire mem_req_valid,
input wire mem_req_rw,
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
input wire [ADDR_WIDTH-1:0] mem_req_addr,
input wire [ADDR_WIDTH_IN-1:0] mem_req_addr,
input wire [DATA_WIDTH-1:0] mem_req_data,
input wire [TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_ready,
@ -47,7 +47,7 @@ module VX_avs_adapter #(
// AVS bus
output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS],
input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS],
output wire [AVS_ADDR_WIDTH-1:0] avs_address [NUM_BANKS],
output wire [ADDR_WIDTH_OUT-1:0] avs_address [NUM_BANKS],
input wire avs_waitrequest [NUM_BANKS],
output wire avs_write [NUM_BANKS],
output wire avs_read [NUM_BANKS],
@ -58,30 +58,34 @@ module VX_avs_adapter #(
localparam DATA_SIZE = DATA_WIDTH/8;
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS;
localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS; // to input space
localparam BANK_OFFSETW = DST_ADDR_WDITH - BANK_SEL_BITS;
`STATIC_ASSERT ((AVS_ADDR_WIDTH >= BANK_OFFSETW), ("invalid parameter"))
`STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN))
// Requests handling //////////////////////////////////////////////////////
wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out;
wire [NUM_BANKS-1:0] req_queue_going_full;
wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
wire [BANK_OFFSETW-1:0] req_bank_off;
wire [NUM_BANKS-1:0] bank_req_ready;
wire [BANK_OFFSETW-1:0] req_bank_off;
wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr);
if (NUM_BANKS > 1) begin : g_bank_sel
if (BANK_INTERLEAVE) begin : g_interleave
assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0];
assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW];
assign req_bank_sel = mem_req_addr_out[BANK_SEL_BITS-1:0];
assign req_bank_off = mem_req_addr_out[BANK_SEL_BITS +: BANK_OFFSETW];
end else begin : g_no_interleave
assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS];
assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0];
assign req_bank_sel = mem_req_addr_out[BANK_OFFSETW +: BANK_SEL_BITS];
assign req_bank_off = mem_req_addr_out[BANK_OFFSETW-1:0];
end
end else begin : g_no_bank_sel
assign req_bank_sel = '0;
assign req_bank_off = mem_req_addr;
assign req_bank_off = mem_req_addr_out;
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push
@ -151,7 +155,7 @@ module VX_avs_adapter #(
assign avs_read[i] = valid_out && ~rw_out;
assign avs_write[i] = valid_out && rw_out;
assign avs_address[i] = AVS_ADDR_WIDTH'(addr_out);
assign avs_address[i] = ADDR_WIDTH_OUT'(addr_out);
assign avs_byteenable[i] = byteen_out;
assign avs_writedata[i] = data_out;
assign avs_burstcount[i] = BURST_WIDTH'(1);

View file

@ -16,10 +16,10 @@
`TRACING_OFF
module VX_axi_adapter #(
parameter DATA_WIDTH = 512,
parameter ADDR_WIDTH = 32,
parameter ADDR_WIDTH_IN = 1,
parameter ADDR_WIDTH_OUT = 32,
parameter TAG_WIDTH = 8,
parameter NUM_BANKS = 1,
parameter AXI_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)),
parameter BANK_INTERLEAVE= 0,
parameter RSP_OUT_BUF = 0
) (
@ -30,7 +30,7 @@ module VX_axi_adapter #(
input wire mem_req_valid,
input wire mem_req_rw,
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
input wire [ADDR_WIDTH-1:0] mem_req_addr,
input wire [ADDR_WIDTH_IN-1:0] mem_req_addr,
input wire [DATA_WIDTH-1:0] mem_req_data,
input wire [TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_ready,
@ -44,7 +44,7 @@ module VX_axi_adapter #(
// AXI write request address channel
output wire m_axi_awvalid [NUM_BANKS],
input wire m_axi_awready [NUM_BANKS],
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS],
output wire [ADDR_WIDTH_OUT-1:0] m_axi_awaddr [NUM_BANKS],
output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS],
output wire [7:0] m_axi_awlen [NUM_BANKS],
output wire [2:0] m_axi_awsize [NUM_BANKS],
@ -71,7 +71,7 @@ module VX_axi_adapter #(
// AXI read address channel
output wire m_axi_arvalid [NUM_BANKS],
input wire m_axi_arready [NUM_BANKS],
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS],
output wire [ADDR_WIDTH_OUT-1:0] m_axi_araddr [NUM_BANKS],
output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS],
output wire [7:0] m_axi_arlen [NUM_BANKS],
output wire [2:0] m_axi_arsize [NUM_BANKS],
@ -93,25 +93,27 @@ module VX_axi_adapter #(
localparam DATA_SIZE = `CLOG2(DATA_WIDTH/8);
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS;
localparam DST_ADDR_WDITH = BANK_OFFSETW + `CLOG2(DATA_WIDTH/8);
localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS - `CLOG2(DATA_WIDTH/8); // to input space
localparam BANK_OFFSETW = DST_ADDR_WDITH - BANK_SEL_BITS;
`STATIC_ASSERT ((AXI_ADDR_WIDTH >= DST_ADDR_WDITH), ("invalid tag width: current=%0d, expected=%0d", AXI_ADDR_WIDTH, DST_ADDR_WDITH))
`STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN))
wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
wire [BANK_OFFSETW-1:0] req_bank_off;
wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr);
if (NUM_BANKS > 1) begin : g_bank_sel
if (BANK_INTERLEAVE) begin : g_interleave
assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0];
assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW];
assign req_bank_sel = mem_req_addr_out[BANK_SEL_BITS-1:0];
assign req_bank_off = mem_req_addr_out[BANK_SEL_BITS +: BANK_OFFSETW];
end else begin : g_no_interleave
assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS];
assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0];
assign req_bank_sel = mem_req_addr_out[BANK_OFFSETW +: BANK_SEL_BITS];
assign req_bank_off = mem_req_addr_out[BANK_OFFSETW-1:0];
end
end else begin : g_no_bank_sel
assign req_bank_sel = '0;
assign req_bank_off = mem_req_addr;
assign req_bank_off = mem_req_addr_out;
end
wire mem_req_fire = mem_req_valid && mem_req_ready;
@ -148,7 +150,7 @@ module VX_axi_adapter #(
// AXI write request address channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i];
assign m_axi_awaddr[i] = AXI_ADDR_WIDTH'(req_bank_off);
assign m_axi_awaddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8);
assign m_axi_awid[i] = mem_req_tag;
assign m_axi_awlen[i] = 8'b00000000;
assign m_axi_awsize[i] = 3'(DATA_SIZE);
@ -180,7 +182,7 @@ module VX_axi_adapter #(
// AXI read request channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i);
assign m_axi_araddr[i] = AXI_ADDR_WIDTH'(req_bank_off);
assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8);
assign m_axi_arid[i] = mem_req_tag;
assign m_axi_arlen[i] = 8'b00000000;
assign m_axi_arsize[i] = 3'(DATA_SIZE);

View file

@ -17,8 +17,8 @@ module vortex_afu_shim #(
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH,
parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
) (
// System signals
@ -54,8 +54,8 @@ module vortex_afu_shim #(
.C_S_AXI_CTRL_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
.C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
.C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
.C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
.C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
.C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
.C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
) afu_wrap (
.clk (ap_clk),

View file

@ -61,12 +61,6 @@
#define CPU_GPU_LATENCY 200
#if PLATFORM_MEMORY_ADDR_WIDTH > 32
typedef QData Vl_m_addr_t;
#else
typedef IData Vl_m_addr_t;
#endif
#if PLATFORM_MEMORY_DATA_WIDTH > 64
typedef VlWide<(PLATFORM_MEMORY_DATA_WIDTH/32)> Vl_m_data_t;
#else
@ -482,7 +476,7 @@ private:
if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) {
auto mem_req = new mem_req_t();
mem_req->tag = *m_axi_mem_[i].arid;
mem_req->addr = i * mem_bank_size_ + uint64_t(*m_axi_mem_[i].araddr) * PLATFORM_MEMORY_DATA_SIZE;
mem_req->addr = i * mem_bank_size_ + uint64_t(*m_axi_mem_[i].araddr);
ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE);
mem_req->write = false;
mem_req->ready = false;
@ -511,7 +505,7 @@ private:
auto byteen = *m_axi_mem_[i].wstrb;
auto data = (uint8_t*)m_axi_mem_[i].wdata->data();
auto byte_addr = i * mem_bank_size_ + m_axi_states_[i].write_req_addr * PLATFORM_MEMORY_DATA_SIZE;
auto byte_addr = i * mem_bank_size_ + m_axi_states_[i].write_req_addr;
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
if ((byteen >> i) & 0x1) {
@ -562,7 +556,7 @@ private:
typedef struct {
CData* awvalid;
CData* awready;
Vl_m_addr_t* awaddr;
QData* awaddr;
IData* awid;
CData* awlen;
CData* wvalid;
@ -572,7 +566,7 @@ private:
CData* wlast;
CData* arvalid;
CData* arready;
Vl_m_addr_t* araddr;
QData* araddr;
IData* arid;
CData* arlen;
CData* rvalid;