opaesim and xrtsim multi-bank memory support

This commit is contained in:
Blaise Tine 2024-09-22 03:54:40 -07:00
parent 00feb8b424
commit b8199decf4
27 changed files with 488 additions and 464 deletions

View file

@ -273,10 +273,11 @@ config2()
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
# test single-bank DRAM
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
# test 27-bit DRAM address
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=xrt --app=mstress
echo "configuration-2 tests done!"
}

View file

@ -15,7 +15,7 @@
module Vortex_axi import VX_gpu_pkg::*; #(
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH + (`VX_MEM_DATA_WIDTH/8),
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
parameter AXI_NUM_BANKS = 1
)(
@ -82,11 +82,10 @@ module Vortex_axi import VX_gpu_pkg::*; #(
// Status
output wire busy
);
localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH;
`STATIC_ASSERT((AXI_DATA_WIDTH == `VX_MEM_DATA_WIDTH), ("invalid memory data size: current=%0d, expected=%0d", AXI_DATA_WIDTH, `VX_MEM_DATA_WIDTH))
`STATIC_ASSERT((AXI_ADDR_WIDTH >= `MEM_ADDR_WIDTH), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH))
`STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH))
localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH;
localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + `CLOG2(`VX_MEM_DATA_WIDTH) - `CLOG2(AXI_DATA_WIDTH);
`STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag width: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH))
wire mem_req_valid;
wire mem_req_rw;
@ -101,94 +100,6 @@ module Vortex_axi import VX_gpu_pkg::*; #(
wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag;
wire mem_rsp_ready;
wire [`MEM_ADDR_WIDTH-1:0] m_axi_awaddr_unqual [AXI_NUM_BANKS];
wire [`MEM_ADDR_WIDTH-1:0] m_axi_araddr_unqual [AXI_NUM_BANKS];
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_awid_unqual [AXI_NUM_BANKS];
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_arid_unqual [AXI_NUM_BANKS];
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual [AXI_NUM_BANKS];
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual [AXI_NUM_BANKS];
for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin : g_padding
assign m_axi_awaddr[i] = `MEM_ADDR_WIDTH'(m_axi_awaddr_unqual[i]);
assign m_axi_araddr[i] = `MEM_ADDR_WIDTH'(m_axi_araddr_unqual[i]);
assign m_axi_awid[i] = AXI_TID_WIDTH'(m_axi_awid_unqual[i]);
assign m_axi_arid[i] = AXI_TID_WIDTH'(m_axi_arid_unqual[i]);
assign m_axi_rid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_rid[i]);
assign m_axi_bid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_bid[i]);
end
VX_axi_adapter #(
.DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.ADDR_WIDTH (`MEM_ADDR_WIDTH),
.TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.NUM_BANKS (AXI_NUM_BANKS),
.RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0)
) axi_adapter (
.clk (clk),
.reset (reset),
.mem_req_valid (mem_req_valid),
.mem_req_rw (mem_req_rw),
.mem_req_byteen (mem_req_byteen),
.mem_req_addr (mem_req_addr),
.mem_req_data (mem_req_data),
.mem_req_tag (mem_req_tag),
.mem_req_ready (mem_req_ready),
.mem_rsp_valid (mem_rsp_valid),
.mem_rsp_data (mem_rsp_data),
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_ready (mem_rsp_ready),
.m_axi_awvalid (m_axi_awvalid),
.m_axi_awready (m_axi_awready),
.m_axi_awaddr (m_axi_awaddr_unqual),
.m_axi_awid (m_axi_awid_unqual),
.m_axi_awlen (m_axi_awlen),
.m_axi_awsize (m_axi_awsize),
.m_axi_awburst (m_axi_awburst),
.m_axi_awlock (m_axi_awlock),
.m_axi_awcache (m_axi_awcache),
.m_axi_awprot (m_axi_awprot),
.m_axi_awqos (m_axi_awqos),
.m_axi_awregion (m_axi_awregion),
.m_axi_wvalid (m_axi_wvalid),
.m_axi_wready (m_axi_wready),
.m_axi_wdata (m_axi_wdata),
.m_axi_wstrb (m_axi_wstrb),
.m_axi_wlast (m_axi_wlast),
.m_axi_bvalid (m_axi_bvalid),
.m_axi_bready (m_axi_bready),
.m_axi_bid (m_axi_bid_unqual),
.m_axi_bresp (m_axi_bresp),
.m_axi_arvalid (m_axi_arvalid),
.m_axi_arready (m_axi_arready),
.m_axi_araddr (m_axi_araddr_unqual),
.m_axi_arid (m_axi_arid_unqual),
.m_axi_arlen (m_axi_arlen),
.m_axi_arsize (m_axi_arsize),
.m_axi_arburst (m_axi_arburst),
.m_axi_arlock (m_axi_arlock),
.m_axi_arcache (m_axi_arcache),
.m_axi_arprot (m_axi_arprot),
.m_axi_arqos (m_axi_arqos),
.m_axi_arregion (m_axi_arregion),
.m_axi_rvalid (m_axi_rvalid),
.m_axi_rready (m_axi_rready),
.m_axi_rdata (m_axi_rdata),
.m_axi_rlast (m_axi_rlast) ,
.m_axi_rid (m_axi_rid_unqual),
.m_axi_rresp (m_axi_rresp)
);
`SCOPE_IO_SWITCH (1)
Vortex vortex (
@ -217,4 +128,127 @@ module Vortex_axi import VX_gpu_pkg::*; #(
.busy (busy)
);
wire mem_req_valid_a;
wire mem_req_rw_a;
wire [(AXI_DATA_WIDTH/8)-1:0] mem_req_byteen_a;
wire [VX_MEM_ADDR_A_WIDTH-1:0] mem_req_addr_a;
wire [AXI_DATA_WIDTH-1:0] mem_req_data_a;
wire [AXI_TID_WIDTH-1:0] mem_req_tag_a;
wire mem_req_ready_a;
wire mem_rsp_valid_a;
wire [AXI_DATA_WIDTH-1:0] mem_rsp_data_a;
wire [AXI_TID_WIDTH-1:0] mem_rsp_tag_a;
wire mem_rsp_ready_a;
VX_mem_adapter #(
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.DST_DATA_WIDTH (AXI_DATA_WIDTH),
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
.DST_ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH),
.SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.DST_TAG_WIDTH (AXI_TID_WIDTH),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) mem_adapter (
.clk (clk),
.reset (reset),
.mem_req_valid_in (mem_req_valid),
.mem_req_addr_in (mem_req_addr),
.mem_req_rw_in (mem_req_rw),
.mem_req_byteen_in (mem_req_byteen),
.mem_req_data_in (mem_req_data),
.mem_req_tag_in (mem_req_tag),
.mem_req_ready_in (mem_req_ready),
.mem_rsp_valid_in (mem_rsp_valid),
.mem_rsp_data_in (mem_rsp_data),
.mem_rsp_tag_in (mem_rsp_tag),
.mem_rsp_ready_in (mem_rsp_ready),
.mem_req_valid_out (mem_req_valid_a),
.mem_req_addr_out (mem_req_addr_a),
.mem_req_rw_out (mem_req_rw_a),
.mem_req_byteen_out (mem_req_byteen_a),
.mem_req_data_out (mem_req_data_a),
.mem_req_tag_out (mem_req_tag_a),
.mem_req_ready_out (mem_req_ready_a),
.mem_rsp_valid_out (mem_rsp_valid_a),
.mem_rsp_data_out (mem_rsp_data_a),
.mem_rsp_tag_out (mem_rsp_tag_a),
.mem_rsp_ready_out (mem_rsp_ready_a)
);
VX_axi_adapter #(
.DATA_WIDTH (AXI_DATA_WIDTH),
.ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH),
.TAG_WIDTH (AXI_TID_WIDTH),
.NUM_BANKS (AXI_NUM_BANKS),
.AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
.BANK_INTERLEAVE (0),
.RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0)
) axi_adapter (
.clk (clk),
.reset (reset),
.mem_req_valid (mem_req_valid_a),
.mem_req_rw (mem_req_rw_a),
.mem_req_byteen (mem_req_byteen_a),
.mem_req_addr (mem_req_addr_a),
.mem_req_data (mem_req_data_a),
.mem_req_tag (mem_req_tag_a),
.mem_req_ready (mem_req_ready_a),
.mem_rsp_valid (mem_rsp_valid_a),
.mem_rsp_data (mem_rsp_data_a),
.mem_rsp_tag (mem_rsp_tag_a),
.mem_rsp_ready (mem_rsp_ready_a),
.m_axi_awvalid (m_axi_awvalid),
.m_axi_awready (m_axi_awready),
.m_axi_awaddr (m_axi_awaddr),
.m_axi_awid (m_axi_awid),
.m_axi_awlen (m_axi_awlen),
.m_axi_awsize (m_axi_awsize),
.m_axi_awburst (m_axi_awburst),
.m_axi_awlock (m_axi_awlock),
.m_axi_awcache (m_axi_awcache),
.m_axi_awprot (m_axi_awprot),
.m_axi_awqos (m_axi_awqos),
.m_axi_awregion (m_axi_awregion),
.m_axi_wvalid (m_axi_wvalid),
.m_axi_wready (m_axi_wready),
.m_axi_wdata (m_axi_wdata),
.m_axi_wstrb (m_axi_wstrb),
.m_axi_wlast (m_axi_wlast),
.m_axi_bvalid (m_axi_bvalid),
.m_axi_bready (m_axi_bready),
.m_axi_bid (m_axi_bid),
.m_axi_bresp (m_axi_bresp),
.m_axi_arvalid (m_axi_arvalid),
.m_axi_arready (m_axi_arready),
.m_axi_araddr (m_axi_araddr),
.m_axi_arid (m_axi_arid),
.m_axi_arlen (m_axi_arlen),
.m_axi_arsize (m_axi_arsize),
.m_axi_arburst (m_axi_arburst),
.m_axi_arlock (m_axi_arlock),
.m_axi_arcache (m_axi_arcache),
.m_axi_arprot (m_axi_arprot),
.m_axi_arqos (m_axi_arqos),
.m_axi_arregion (m_axi_arregion),
.m_axi_rvalid (m_axi_rvalid),
.m_axi_rready (m_axi_rready),
.m_axi_rdata (m_axi_rdata),
.m_axi_rlast (m_axi_rlast),
.m_axi_rid (m_axi_rid),
.m_axi_rresp (m_axi_rresp)
);
endmodule

View file

@ -30,7 +30,17 @@
//`include "platform_afu_top_config.vh"
`ifdef PLATFORM_PROVIDES_LOCAL_MEMORY
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH `PLATFORM_MEMORY_ADDR_WIDTH
`endif
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH `PLATFORM_MEMORY_DATA_WIDTH
`endif
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH `PLATFORM_MEMORY_BURST_CNT_WIDTH
`endif
package local_mem_cfg_pkg;
@ -57,5 +67,3 @@ package local_mem_cfg_pkg;
typedef logic [LOCAL_MEM_DATA_N_BYTES-1:0] t_local_mem_byte_mask;
endpackage // local_mem_cfg_pkg
`endif // PLATFORM_PROVIDES_LOCAL_MEMORY

View file

@ -42,7 +42,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
);
localparam LMEM_DATA_WIDTH = $bits(t_local_mem_data);
localparam LMEM_DATA_SIZE = LMEM_DATA_WIDTH / 8;
localparam LMEM_ADDR_WIDTH = $bits(t_local_mem_addr);
localparam LMEM_ADDR_WIDTH = `VX_MEM_ADDR_WIDTH + ($clog2(`VX_MEM_DATA_WIDTH) - $clog2(LMEM_DATA_WIDTH));
localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt);
localparam CCI_DATA_WIDTH = $bits(t_ccip_clData);
@ -96,9 +96,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
localparam STATE_DCR_WRITE = 4;
localparam STATE_WIDTH = `CLOG2(STATE_DCR_WRITE+1);
localparam BANK_BYTE_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + `CLOG2(`PLATFORM_MEMORY_DATA_WIDTH/8);
wire [127:0] afu_id = `AFU_ACCEL_UUID;
wire [63:0] dev_caps = {16'b0,
wire [63:0] dev_caps = {8'b0,
5'(BANK_BYTE_ADDR_WIDTH-16),
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
16'(`NUM_CORES * `NUM_CLUSTERS),
8'(`NUM_WARPS),
@ -601,6 +605,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.NUM_BANKS (NUM_LOCAL_MEM_BANKS),
.TAG_WIDTH (AVS_REQ_TAGW + 1),
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE),
.AVS_ADDR_WIDTH($bits(t_local_mem_addr)),
.BANK_INTERLEAVE (1),
.REQ_OUT_BUF (2),
.RSP_OUT_BUF (0)
) avs_adapter (

View file

@ -14,21 +14,21 @@
`include "vortex_afu.vh"
module VX_afu_ctrl #(
parameter AXI_ADDR_WIDTH = 8,
parameter AXI_DATA_WIDTH = 32,
parameter AXI_NUM_BANKS = 1
parameter S_AXI_ADDR_WIDTH = 8,
parameter S_AXI_DATA_WIDTH = 32,
parameter M_AXI_ADDR_WIDTH = 25
) (
// axi4 lite slave signals
input wire clk,
input wire reset,
input wire s_axi_awvalid,
input wire [AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
output wire s_axi_awready,
input wire s_axi_wvalid,
input wire [AXI_DATA_WIDTH-1:0] s_axi_wdata,
input wire [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb,
input wire [S_AXI_DATA_WIDTH-1:0] s_axi_wdata,
input wire [S_AXI_DATA_WIDTH/8-1:0]s_axi_wstrb,
output wire s_axi_wready,
output wire s_axi_bvalid,
@ -36,11 +36,11 @@ module VX_afu_ctrl #(
input wire s_axi_bready,
input wire s_axi_arvalid,
input wire [AXI_ADDR_WIDTH-1:0] s_axi_araddr,
input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_araddr,
output wire s_axi_arready,
output wire s_axi_rvalid,
output wire [AXI_DATA_WIDTH-1:0] s_axi_rdata,
output wire [S_AXI_DATA_WIDTH-1:0] s_axi_rdata,
output wire [1:0] s_axi_rresp,
input wire s_axi_rready,
@ -56,8 +56,6 @@ module VX_afu_ctrl #(
output wire scope_bus_out,
`endif
output wire [63:0] mem_base [AXI_NUM_BANKS],
output wire dcr_wr_valid,
output wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
output wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data
@ -125,10 +123,6 @@ module VX_afu_ctrl #(
//ADDR_SCP_CTRL = 8'h3C,
`endif
ADDR_MEM_0 = 8'h40,
ADDR_MEM_1 = 8'h44,
//ADDR_MEM_CTRL = 8'h48,
ADDR_BITS = 8;
localparam
@ -144,7 +138,9 @@ module VX_afu_ctrl #(
RSTATE_WIDTH = 2;
// device caps
wire [63:0] dev_caps = {16'b0,
wire [63:0] dev_caps = {8'b0,
5'(M_AXI_ADDR_WIDTH-16),
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
16'(`NUM_CORES * `NUM_CLUSTERS),
8'(`NUM_WARPS),
@ -174,7 +170,6 @@ module VX_afu_ctrl #(
reg gie_r;
reg [1:0] ier_r;
reg [1:0] isr_r;
reg [63:0] mem_r [AXI_NUM_BANKS];
reg [31:0] dcra_r;
reg [31:0] dcrv_r;
reg dcr_wr_valid_r;
@ -311,10 +306,6 @@ module VX_afu_ctrl #(
dcra_r <= '0;
dcrv_r <= '0;
dcr_wr_valid_r <= 0;
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
mem_r[i] <= '0;
end
end else begin
dcr_wr_valid_r <= 0;
ap_reset_r <= 0;
@ -353,16 +344,7 @@ module VX_afu_ctrl #(
dcrv_r <= (s_axi_wdata & wmask) | (dcrv_r & ~wmask);
dcr_wr_valid_r <= 1;
end
default: begin
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
if (waddr == (ADDR_MEM_0 + 8'(i) * 8'd12)) begin
mem_r[i][31:0] <= (s_axi_wdata & wmask) | (mem_r[i][31:0] & ~wmask);
end
if (waddr == (ADDR_MEM_1 + 8'(i) * 8'd12)) begin
mem_r[i][63:32] <= (s_axi_wdata & wmask) | (mem_r[i][63:32] & ~wmask);
end
end
end
default:;
endcase
if (ier_r[0] & ap_done)
@ -453,8 +435,6 @@ module VX_afu_ctrl #(
assign ap_start = ap_start_r;
assign interrupt = gie_r & (| isr_r);
assign mem_base = mem_r;
assign dcr_wr_valid = dcr_wr_valid_r;
assign dcr_wr_addr = `VX_DCR_ADDR_WIDTH'(dcra_r);
assign dcr_wr_data = `VX_DCR_DATA_WIDTH'(dcrv_r);

View file

@ -16,16 +16,17 @@
module VX_afu_wrap #(
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
parameter C_M_AXI_MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH
parameter C_M_AXI_MEM_ID_WIDTH = 32,
parameter C_M_AXI_MEM_ADDR_WIDTH = 25,
parameter C_M_AXI_MEM_DATA_WIDTH = 512,
parameter C_M_AXI_MEM_NUM_BANKS = 2
) (
// System signals
input wire clk,
input wire reset,
// AXI4 master interface
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
// AXI4-Lite slave interface
input wire s_axi_ctrl_awvalid,
@ -48,7 +49,6 @@ module VX_afu_wrap #(
output wire interrupt
);
localparam C_M_AXI_MEM_NUM_BANKS = `M_AXI_MEM_NUM_BANKS;
localparam STATE_IDLE = 0;
localparam STATE_RUN = 1;
@ -80,7 +80,7 @@ module VX_afu_wrap #(
wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS];
// convert memory interface to array
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
reg [15:0] vx_pending_writes;
@ -88,8 +88,6 @@ module VX_afu_wrap #(
reg vx_reset = 1; // asserted at initialization
wire vx_busy;
wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS];
wire dcr_wr_valid;
wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr;
wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data;
@ -181,9 +179,9 @@ module VX_afu_wrap #(
end
VX_afu_ctrl #(
.AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
.AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
.S_AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
.S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
.M_AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH)
) afu_ctrl (
.clk (clk),
.reset (reset),
@ -218,26 +216,24 @@ module VX_afu_wrap #(
.scope_bus_out (scope_bus_in),
`endif
.mem_base (mem_base),
.dcr_wr_valid (dcr_wr_valid),
.dcr_wr_addr (dcr_wr_addr),
.dcr_wr_data (dcr_wr_data)
);
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS];
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS];
wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS];
wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS];
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
assign m_axi_mem_awaddr_a[i] = m_axi_mem_awaddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET);
assign m_axi_mem_araddr_a[i] = m_axi_mem_araddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET);
end
`SCOPE_IO_SWITCH (2)
Vortex_axi #(
.AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
.AXI_ADDR_WIDTH (`MEM_ADDR_WIDTH),
.AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
.AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
) vortex_axi (

View file

@ -16,16 +16,17 @@
module vortex_afu #(
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
parameter C_M_AXI_MEM_ADDR_WIDTH = `M_AXI_MEM_ADDR_WIDTH,
parameter C_M_AXI_MEM_DATA_WIDTH = `M_AXI_MEM_DATA_WIDTH
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8),
parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
) (
// System signals
input wire ap_clk,
input wire ap_rst_n,
// AXI4 master interface
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
// AXI4-Lite slave interface
input wire s_axi_ctrl_awvalid,
@ -54,12 +55,13 @@ module vortex_afu #(
.C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
.C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
.C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
.C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH)
.C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
.C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
) afu_wrap (
.clk (ap_clk),
.reset (~ap_rst_n),
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
.s_axi_ctrl_awready (s_axi_ctrl_awready),

View file

@ -14,20 +14,24 @@
`ifndef VORTEX_AFU_VH
`define VORTEX_AFU_VH
`ifndef M_AXI_MEM_NUM_BANKS
`define M_AXI_MEM_NUM_BANKS 1
`ifndef PLATFORM_MEMORY_BANKS
`define PLATFORM_MEMORY_BANKS 2
`endif
`ifndef M_AXI_MEM_ADDR_WIDTH
`define M_AXI_MEM_ADDR_WIDTH 34
`ifndef PLATFORM_MEMORY_ADDR_WIDTH
`define PLATFORM_MEMORY_ADDR_WIDTH 25
`endif
`ifndef M_AXI_MEM_DATA_WIDTH
`define M_AXI_MEM_DATA_WIDTH 512
`ifndef PLATFORM_MEMORY_DATA_WIDTH
`define PLATFORM_MEMORY_DATA_WIDTH 512
`endif
`ifndef M_AXI_MEM_ID_WIDTH
`define M_AXI_MEM_ID_WIDTH 32
`ifndef PLATFORM_MEMORY_OFFSET
`define PLATFORM_MEMORY_OFFSET 0
`endif
`ifndef PLATFORM_MEMORY_ID_WIDTH
`define PLATFORM_MEMORY_ID_WIDTH 32
`endif
`define GEN_AXI_MEM(i) \

View file

@ -21,6 +21,8 @@ module VX_avs_adapter #(
parameter NUM_BANKS = 1,
parameter TAG_WIDTH = 1,
parameter RD_QUEUE_SIZE = 1,
parameter BANK_INTERLEAVE= 0,
parameter AVS_ADDR_WIDTH = ADDR_WIDTH - `CLOG2(NUM_BANKS),
parameter REQ_OUT_BUF = 0,
parameter RSP_OUT_BUF = 0
) (
@ -45,7 +47,7 @@ module VX_avs_adapter #(
// AVS bus
output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS],
input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS],
output wire [ADDR_WIDTH-1:0] avs_address [NUM_BANKS],
output wire [AVS_ADDR_WIDTH-1:0] avs_address [NUM_BANKS],
input wire avs_waitrequest [NUM_BANKS],
output wire avs_write [NUM_BANKS],
output wire avs_read [NUM_BANKS],
@ -53,28 +55,35 @@ module VX_avs_adapter #(
output wire [BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS],
input wire avs_readdatavalid [NUM_BANKS]
);
localparam DATA_SIZE = DATA_WIDTH/8;
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS);
localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS;
localparam DATA_SIZE = DATA_WIDTH/8;
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS;
`STATIC_ASSERT ((AVS_ADDR_WIDTH >= BANK_OFFSETW), ("invalid parameter"))
// Requests handling //////////////////////////////////////////////////////
wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out;
wire [NUM_BANKS-1:0] req_queue_going_full;
wire [BANK_ADDRW-1:0] req_bank_sel;
wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
wire [BANK_OFFSETW-1:0] req_bank_off;
wire [NUM_BANKS-1:0] bank_req_ready;
if (NUM_BANKS > 1) begin : g_bank_sel
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
end else begin : g_bank_sel
if (BANK_INTERLEAVE) begin : g_interleave
assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0];
assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW];
end else begin : g_no_interleave
assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS];
assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0];
end
end else begin : g_no_bank_sel
assign req_bank_sel = '0;
assign req_bank_off = mem_req_addr;
end
assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS];
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push
assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i);
end
@ -142,7 +151,7 @@ module VX_avs_adapter #(
assign avs_read[i] = valid_out && ~rw_out;
assign avs_write[i] = valid_out && rw_out;
assign avs_address[i] = ADDR_WIDTH'(addr_out);
assign avs_address[i] = AVS_ADDR_WIDTH'(addr_out);
assign avs_byteenable[i] = byteen_out;
assign avs_writedata[i] = data_out;
assign avs_burstcount[i] = BURST_WIDTH'(1);

View file

@ -19,7 +19,8 @@ module VX_axi_adapter #(
parameter ADDR_WIDTH = 32,
parameter TAG_WIDTH = 8,
parameter NUM_BANKS = 1,
parameter AVS_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)),
parameter AXI_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)),
parameter BANK_INTERLEAVE= 0,
parameter RSP_OUT_BUF = 0
) (
input wire clk,
@ -29,7 +30,7 @@ module VX_axi_adapter #(
input wire mem_req_valid,
input wire mem_req_rw,
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
input wire [AVS_ADDR_WIDTH-1:0] mem_req_addr,
input wire [ADDR_WIDTH-1:0] mem_req_addr,
input wire [DATA_WIDTH-1:0] mem_req_data,
input wire [TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_ready,
@ -43,7 +44,7 @@ module VX_axi_adapter #(
// AXI write request address channel
output wire m_axi_awvalid [NUM_BANKS],
input wire m_axi_awready [NUM_BANKS],
output wire [ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS],
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS],
output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS],
output wire [7:0] m_axi_awlen [NUM_BANKS],
output wire [2:0] m_axi_awsize [NUM_BANKS],
@ -70,7 +71,7 @@ module VX_axi_adapter #(
// AXI read address channel
output wire m_axi_arvalid [NUM_BANKS],
input wire m_axi_arready [NUM_BANKS],
output wire [ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS],
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS],
output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS],
output wire [7:0] m_axi_arlen [NUM_BANKS],
output wire [2:0] m_axi_arsize [NUM_BANKS],
@ -89,15 +90,28 @@ module VX_axi_adapter #(
input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS],
input wire [1:0] m_axi_rresp [NUM_BANKS]
);
localparam AXSIZE = `CLOG2(DATA_WIDTH/8);
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS);
localparam DATA_SIZE = `CLOG2(DATA_WIDTH/8);
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS;
localparam DST_ADDR_WDITH = BANK_OFFSETW + `CLOG2(DATA_WIDTH/8);
wire [BANK_ADDRW-1:0] req_bank_sel;
if (NUM_BANKS > 1) begin : g_req_bank_sel
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
end else begin : g_req_bank_sel_0
`STATIC_ASSERT ((AXI_ADDR_WIDTH >= DST_ADDR_WDITH), ("invalid tag width: current=%0d, expected=%0d", AXI_ADDR_WIDTH, DST_ADDR_WDITH))
wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
wire [BANK_OFFSETW-1:0] req_bank_off;
if (NUM_BANKS > 1) begin : g_bank_sel
if (BANK_INTERLEAVE) begin : g_interleave
assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0];
assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW];
end else begin : g_no_interleave
assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS];
assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0];
end
end else begin : g_no_bank_sel
assign req_bank_sel = '0;
assign req_bank_off = mem_req_addr;
end
wire mem_req_fire = mem_req_valid && mem_req_ready;
@ -134,10 +148,10 @@ module VX_axi_adapter #(
// AXI write request address channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i];
assign m_axi_awaddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE;
assign m_axi_awaddr[i] = AXI_ADDR_WIDTH'(req_bank_off);
assign m_axi_awid[i] = mem_req_tag;
assign m_axi_awlen[i] = 8'b00000000;
assign m_axi_awsize[i] = 3'(AXSIZE);
assign m_axi_awsize[i] = 3'(DATA_SIZE);
assign m_axi_awburst[i] = 2'b00;
assign m_axi_awlock[i] = 2'b00;
assign m_axi_awcache[i] = 4'b0000;
@ -166,10 +180,10 @@ module VX_axi_adapter #(
// AXI read request channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i);
assign m_axi_araddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE;
assign m_axi_araddr[i] = AXI_ADDR_WIDTH'(req_bank_off);
assign m_axi_arid[i] = mem_req_tag;
assign m_axi_arlen[i] = 8'b00000000;
assign m_axi_arsize[i] = 3'(AXSIZE);
assign m_axi_arsize[i] = 3'(DATA_SIZE);
assign m_axi_arburst[i] = 2'b00;
assign m_axi_arlock[i] = 2'b00;
assign m_axi_arcache[i] = 4'b0000;

View file

@ -53,8 +53,6 @@ module VX_mem_adapter #(
input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_out,
output wire mem_rsp_ready_out
);
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
localparam DST_DATA_SIZE = (DST_DATA_WIDTH / 8);
localparam DST_LDATAW = `CLOG2(DST_DATA_WIDTH);
localparam SRC_LDATAW = `CLOG2(SRC_DATA_WIDTH);
@ -74,6 +72,7 @@ module VX_mem_adapter #(
wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in_w;
wire mem_rsp_ready_in_w;
`UNUSED_VAR (mem_req_tag_in)
`UNUSED_VAR (mem_rsp_tag_out)
if (DST_LDATAW > SRC_LDATAW) begin : g_wider_dst_data
@ -122,7 +121,7 @@ module VX_mem_adapter #(
assign mem_rsp_valid_in_w = mem_rsp_valid_out;
assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx];
assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]);
assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[DST_TAG_WIDTH-1:D]);
assign mem_rsp_ready_out = mem_rsp_ready_in_w;
end else if (DST_LDATAW < SRC_LDATAW) begin : g_wider_src_data

View file

@ -7,17 +7,21 @@ include ../../common.mk
# AFU parameters
CONFIGS += -DNOPAE
CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
ifeq ($(XLEN),64)
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41
else
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25
endif
endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4
ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4
endif
#CONFIGS += -DNUM_CORES=2

View file

@ -98,7 +98,7 @@ ifdef PERF
endif
# ast dump flags
XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI
XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=26 -DPLATFORM_MEMORY_DATA_WIDTH=512 -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI
all: swconfig ip-gen setup build

View file

@ -35,6 +35,7 @@ typedef void* vx_buffer_h;
#define VX_CAPS_LOCAL_MEM_SIZE 0x6
#define VX_CAPS_ISA_FLAGS 0x7
#define VX_CAPS_NUM_MEM_BANKS 0x8
#define VX_CAPS_MEM_BANK_SIZE 0x9
// device isa flags
#define VX_ISA_STD_A (1ull << ISA_STD_A)

View file

@ -163,11 +163,6 @@ public:
});
{
// retrieve FPGA global memory size
CHECK_FPGA_ERR(api_.fpgaPropertiesGetLocalMemorySize(filter, &global_mem_size_), {
global_mem_size_ = GLOBAL_MEM_SIZE;
});
// Load ISA CAPS
CHECK_FPGA_ERR(api_.fpgaReadMMIO64(fpga_, 0, MMIO_ISA_CAPS, &isa_caps_), {
api_.fpgaClose(fpga_);
@ -179,6 +174,12 @@ public:
api_.fpgaClose(fpga_);
return -1;
});
// Determine global memory size
uint64_t num_banks, bank_size;
this->get_caps(VX_CAPS_NUM_MEM_BANKS, &num_banks);
this->get_caps(VX_CAPS_MEM_BANK_SIZE, &bank_size);
global_mem_size_ = num_banks * bank_size;
}
#ifdef SCOPE
@ -231,7 +232,10 @@ public:
_value = isa_caps_;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
_value = 1 << ((dev_caps_ >> 48) & 0x7);
break;
case VX_CAPS_MEM_BANK_SIZE:
_value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f));
break;
default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);

View file

@ -80,6 +80,9 @@ public:
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
case VX_CAPS_MEM_BANK_SIZE:
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();

View file

@ -84,6 +84,9 @@ public:
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
case VX_CAPS_MEM_BANK_SIZE:
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();

View file

@ -8,6 +8,7 @@ SRC_DIR := $(VORTEX_HOME)/runtime/xrt
CXXFLAGS += -std=c++14 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(XILINX_XRT)/include -I$(SIM_DIR)/common
CXXFLAGS += -DXLEN_$(XLEN)
CXXFLAGS += -fPIC
LDFLAGS += -shared -pthread

View file

@ -49,7 +49,6 @@ using namespace vortex;
#define MMIO_ISA_ADDR 0x1C
#define MMIO_DCR_ADDR 0x28
#define MMIO_SCP_ADDR 0x34
#define MMIO_MEM_ADDR 0x40
#define CTL_AP_START (1 << 0)
#define CTL_AP_DONE (1 << 1)
@ -58,24 +57,6 @@ using namespace vortex;
#define CTL_AP_RESET (1 << 4)
#define CTL_AP_RESTART (1 << 7)
struct platform_info_t {
const char *prefix_name;
uint8_t lg2_num_banks;
uint8_t lg2_bank_size;
uint64_t mem_base;
};
static const platform_info_t g_platforms[] = {
{"vortex_xrtsim", 0, 32, 0x0}, // 16 x 256 MB = 4 GB
{"xilinx_u200", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4
{"xilinx_u250", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4
{"xilinx_u50", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2
{"xilinx_u280", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2
{"xilinx_u55c", 5, 29, 0x0}, // 32 x 512 MB = 16 GB HBM2
{"xilinx_vck5000", 0, 33, 0xC000000000}, // 1 x 8 GB = 8 GB DDR4
{"xilinx_kv260", 0, 32, 0x0}, // 1 x 4 GB = 4 GB DDR4
};
#ifdef CPP_API
typedef xrt::device xrt_device_t;
@ -113,18 +94,6 @@ static void dump_xrt_error(xrtDeviceHandle xrtDevice, xrtErrorCode err) {
}
#endif
static int get_platform_info(const std::string &device_name,
platform_info_t *platform_info) {
for (size_t i = 0; i < (sizeof(g_platforms) / sizeof(platform_info_t)); ++i) {
auto &platform = g_platforms[i];
if (device_name.rfind(platform.prefix_name, 0) == 0) {
*platform_info = platform;
return 0;
}
}
return -1;
}
///////////////////////////////////////////////////////////////////////////////
class vx_device {
@ -181,58 +150,6 @@ public:
auto xclbin = xrt::xclbin(xlbin_path_s);
auto device_name = xrtDevice.get_info<xrt::info::device::name>();
/*{
uint32_t num_banks = 0;
uint64_t bank_size = 0;
uint64_t mem_base = 0;
auto mem_json =
nlohmann::json::parse(xrtDevice.get_info<xrt::info::device::memory>()); if
(!mem_json.is_null()) { uint32_t index = 0; for (auto& mem :
mem_json["board"]["memory"]["memories"]) { auto enabled =
mem["enabled"].get<std::string>(); if (enabled == "true") { if (index == 0)
{ mem_base = std::stoull(mem["base_address"].get<std::string>(), nullptr,
16); bank_size = std::stoull(mem["range_bytes"].get<std::string>(), nullptr,
16);
}
++index;
}
}
num_banks = index;
}
fprintf(stderr, "[VXDRV] memory description: base=0x%lx, size=0x%lx,
count=%d\n", mem_base, bank_size, num_banks);
}*/
/*{
std::cout << "Device" << device_index << " : " <<
xrtDevice.get_info<xrt::info::device::name>() << std::endl; std::cout << "
bdf : " << xrtDevice.get_info<xrt::info::device::bdf>() << std::endl;
std::cout << " kdma : " <<
xrtDevice.get_info<xrt::info::device::kdma>() << std::endl; std::cout << "
max_freq : " <<
xrtDevice.get_info<xrt::info::device::max_clock_frequency_mhz>() <<
std::endl; std::cout << " memory : " <<
xrtDevice.get_info<xrt::info::device::memory>() << std::endl; std::cout << "
thermal : " << xrtDevice.get_info<xrt::info::device::thermal>() <<
std::endl; std::cout << " m2m : " << std::boolalpha <<
xrtDevice.get_info<xrt::info::device::m2m>() << std::dec << std::endl;
std::cout << " nodma : " << std::boolalpha <<
xrtDevice.get_info<xrt::info::device::nodma>() << std::dec << std::endl;
std::cout << "Memory info :" << std::endl;
for (const auto& mem_bank : xclbin.get_mems()) {
std::cout << " index : " << mem_bank.get_index() << std::endl;
std::cout << " tag : " << mem_bank.get_tag() << std::endl;
std::cout << " type : " << (int)mem_bank.get_type() << std::endl;
std::cout << " base_address : 0x" << std::hex <<
mem_bank.get_base_address() << std::endl; std::cout << " size : 0x" <<
(mem_bank.get_size_kb() * 1000) << std::dec << std::endl; std::cout << "
used :" << mem_bank.get_used() << std::endl;
}
}*/
#else
CHECK_HANDLE(xrtDevice, xrtDeviceOpen(device_index), {
@ -275,11 +192,6 @@ public:
printf("info: device name=%s.\n", device_name.c_str());
CHECK_ERR(get_platform_info(device_name, &platform_), {
fprintf(stderr, "[VXDRV] Error: platform not supported: %s\n", device_name.c_str());
return err;
});
CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_RESET), {
return err;
});
@ -300,36 +212,13 @@ public:
return err;
});
uint32_t num_banks = 1 << platform_.lg2_num_banks;
uint64_t bank_size = 1ull << platform_.lg2_bank_size;
uint64_t num_banks;
this->get_caps(VX_CAPS_NUM_MEM_BANKS, &num_banks);
lg2_num_banks_ = log2ceil(num_banks);
// adjust memory banks allocation to architecture limit
int isa_arch = VX_ISA_ARCH(isa_caps_);
if (isa_arch == 32) {
uint64_t max_mem_size = 1ull << 32;
uint32_t need_num_banks = max_mem_size / bank_size;
if (num_banks > need_num_banks) {
printf("info: adjusted number of banks from %d to %d.\n", num_banks, need_num_banks);
num_banks = need_num_banks;
platform_.lg2_num_banks = log2ceil(num_banks);
}
}
for (uint32_t i = 0; i < num_banks; ++i) {
uint32_t reg_addr = MMIO_MEM_ADDR + (i * 12);
uint64_t reg_value = platform_.mem_base + i * bank_size;
CHECK_ERR(this->write_register(reg_addr, reg_value & 0xffffffff), {
return err;
});
CHECK_ERR(this->write_register(reg_addr + 4, (reg_value >> 32) & 0xffffffff), {
return err;
});
#ifndef BANK_INTERLEAVE
break;
#endif
}
uint64_t bank_size;
this->get_caps(VX_CAPS_MEM_BANK_SIZE, &bank_size);
lg2_bank_size_ = log2ceil(bank_size);
global_mem_size_ = num_banks * bank_size;
@ -418,7 +307,10 @@ public:
_value = isa_caps_;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
_value = 1 << ((dev_caps_ >> 48) & 0x7);
break;
case VX_CAPS_MEM_BANK_SIZE:
_value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f));
break;
default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
@ -734,23 +626,23 @@ private:
MemoryAllocator global_mem_;
xrt_device_t xrtDevice_;
xrt_kernel_t xrtKernel_;
platform_info_t platform_;
uint64_t dev_caps_;
uint64_t isa_caps_;
uint64_t global_mem_size_;
DeviceConfig dcrs_;
std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_;
uint32_t lg2_num_banks_;
uint32_t lg2_bank_size_;
#ifdef BANK_INTERLEAVE
std::vector<xrt_buffer_t> xrtBuffers_;
int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) {
uint32_t num_banks = 1 << platform_.lg2_num_banks;
uint32_t num_banks = 1 << lg2_num_banks_;
uint64_t block_addr = addr / CACHE_BLOCK_SIZE;
uint32_t index = block_addr & (num_banks - 1);
uint64_t offset =
(block_addr >> platform_.lg2_num_banks) * CACHE_BLOCK_SIZE;
uint64_t offset = (block_addr >> lg2_num_banks_) * CACHE_BLOCK_SIZE;
if (pIdx) {
*pIdx = index;
}
@ -778,9 +670,9 @@ private:
std::unordered_map<uint32_t, buf_cnt_t> xrtBuffers_;
int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) {
uint32_t num_banks = 1 << platform_.lg2_num_banks;
uint64_t bank_size = 1ull << platform_.lg2_bank_size;
uint32_t index = addr >> platform_.lg2_bank_size;
uint32_t num_banks = 1 << lg2_num_banks_;
uint64_t bank_size = 1ull << lg2_bank_size_;
uint32_t index = addr >> lg2_bank_size_;
uint64_t offset = addr & (bank_size - 1);
if (index > num_banks) {
fprintf(stderr, "[VXDRV] Error: address out of range: 0x%lx\n", addr);
@ -807,7 +699,7 @@ private:
}
} else {
printf("allocating bank%d...\n", bank_id);
uint64_t bank_size = 1ull << platform_.lg2_bank_size;
uint64_t bank_size = 1ull << lg2_bank_size_;
#ifdef CPP_API
xrt::bo xrtBuffer(xrtDevice_, bank_size, xrt::bo::flags::normal, bank_id);
#else

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -20,30 +20,58 @@ constexpr uint32_t count_leading_zeros(uint32_t value) {
return value ? __builtin_clz(value) : 32;
}
constexpr uint32_t count_leading_zeros(uint64_t value) {
return value ? __builtin_clzll(value) : 64;
}
constexpr uint32_t count_trailing_zeros(uint32_t value) {
return value ? __builtin_ctz(value) : 32;
}
constexpr uint32_t count_trailing_zeros(uint64_t value) {
return value ? __builtin_ctzll(value) : 64;
}
constexpr bool ispow2(uint32_t value) {
return value && !(value & (value - 1));
}
constexpr bool ispow2(uint64_t value) {
return value && !(value & (value - 1));
}
constexpr uint32_t log2ceil(uint32_t value) {
return 32 - count_leading_zeros(value - 1);
}
constexpr uint32_t log2ceil(uint64_t value) {
return 64 - count_leading_zeros(value - 1);
}
inline unsigned log2up(uint32_t value) {
return std::max<uint32_t>(1, log2ceil(value));
}
inline unsigned log2up(uint64_t value) {
return std::max<uint32_t>(1, log2ceil(value));
}
constexpr unsigned log2floor(uint32_t value) {
return 31 - count_leading_zeros(value);
}
constexpr unsigned log2floor(uint64_t value) {
return 63 - count_leading_zeros(value);
}
constexpr unsigned ceil2(uint32_t value) {
return 32 - count_leading_zeros(value);
}
constexpr unsigned ceil2(uint64_t value) {
return 64 - count_leading_zeros(value);
}
inline uint64_t bit_clr(uint64_t bits, uint32_t index) {
assert(index <= 63);
return bits & ~(1ull << index);
@ -86,7 +114,7 @@ template <typename T = uint32_t>
T sext(const T& word, uint32_t width) {
assert(width > 1);
assert(width <= (sizeof(T) * 8));
if (width == (sizeof(T) * 8))
if (width == (sizeof(T) * 8))
return word;
T mask((static_cast<T>(1) << width) - 1);
return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : (word & mask);
@ -96,7 +124,7 @@ template <typename T = uint32_t>
T zext(const T& word, uint32_t width) {
assert(width > 1);
assert(width <= (sizeof(T) * 8));
if (width == (sizeof(T) * 8))
if (width == (sizeof(T) * 8))
return word;
T mask((static_cast<T>(1) << width) - 1);
return word & mask;

View file

@ -71,13 +71,14 @@ public:
// Check if the reservation is within memory capacity bounds
if (addr + size > capacity_) {
printf("error: address range out of bounds\n");
printf("error: address range out of bounds - requested=0x%lx, capacity=0x%lx\n", (addr + size), capacity_);
return -1;
}
// Ensure the reservation does not overlap with existing pages
if (hasPageOverlap(addr, size)) {
printf("error: address range overlaps with existing allocation\n");
uint64_t overlapStart, overlapEnd;
if (hasPageOverlap(addr, size, &overlapStart, &overlapEnd)) {
printf("error: address range overlaps with existing allocation - requested=[0x%lx-0x%lx], existing=[0x%lx, 0x%lx]\n", addr, addr+size, overlapStart, overlapEnd);
return -1;
}
@ -509,15 +510,15 @@ private:
return false;
}
bool hasPageOverlap(uint64_t start, uint64_t size) {
bool hasPageOverlap(uint64_t start, uint64_t size, uint64_t* overlapStart, uint64_t* overlapEnd) {
page_t* current = pages_;
while (current != nullptr) {
uint64_t pageStart = current->addr;
uint64_t pageEnd = pageStart + current->size;
uint64_t requestEnd = start + size;
if ((start >= pageStart && start < pageEnd) || // Start of request is inside the page
(requestEnd > pageStart && requestEnd <= pageEnd) || // End of request is inside the page
(start <= pageStart && requestEnd >= pageEnd)) { // Request envelops the page
uint64_t end = start + size;
if ((start <= pageEnd) && (end >= pageStart)) {
*overlapStart = pageStart;
*overlapEnd = pageEnd;
return true;
}
current = current->next;

View file

@ -32,18 +32,21 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
# AFU parameters
CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
ifeq ($(XLEN),64)
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41
else
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25
endif
endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4
ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4
endif
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)

View file

@ -35,7 +35,7 @@
#include <unordered_map>
#include <util.h>
#define PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH/8)
#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8)
#ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1
@ -145,6 +145,9 @@ public:
// allocate RAM
ram_ = new RAM(0, RAM_PAGE_SIZE);
// calculate memory bank size
mem_bank_size_ = (1ull << PLATFORM_MEMORY_ADDR_WIDTH) * PLATFORM_MEMORY_DATA_SIZE;
// reset the device
this->reset();
@ -406,14 +409,14 @@ private:
}
void avs_bus_reset() {
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) {
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
device_->avs_readdatavalid[b] = 0;
device_->avs_waitrequest[b] = 0;
}
}
void avs_bus_eval() {
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) {
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
// process memory responses
device_->avs_readdatavalid[b] = 0;
if (!pending_mem_reqs_[b].empty()
@ -421,7 +424,7 @@ private:
auto mem_rd_it = pending_mem_reqs_[b].begin();
auto mem_req = *mem_rd_it;
device_->avs_readdatavalid[b] = 1;
memcpy(device_->avs_readdata[b], mem_req->data.data(), PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
memcpy(device_->avs_readdata[b], mem_req->data.data(), PLATFORM_MEMORY_DATA_SIZE);
uint32_t addr = mem_req->addr;
pending_mem_reqs_[b].erase(mem_rd_it);
delete mem_req;
@ -429,19 +432,20 @@ private:
// process memory requests
assert(!device_->avs_read[b] || !device_->avs_write[b]);
uint64_t byte_addr = (uint64_t(device_->avs_address[b]) * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE;
uint64_t byte_addr = b * mem_bank_size_ + uint64_t(device_->avs_address[b]) * PLATFORM_MEMORY_DATA_SIZE;
if (device_->avs_write[b]) {
// process write request
uint64_t byteen = device_->avs_byteenable[b];
uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data());
for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) {
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[byte_addr + i] = data[i];
}
}
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, data=0x", timestamp, b, byte_addr);
for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) {
printf("%02x", data[(PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE-1)-i]);
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, byteen=0x%lx, data=0x", timestamp, b, byte_addr, byteen);
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", data[i]);
}
printf("\n");*/
@ -455,22 +459,20 @@ private:
dram_queue_.push(mem_req);
} else
if (device_->avs_read[b]) {
// process read request
auto mem_req = new mem_req_t();
mem_req->addr = device_->avs_address[b];
mem_req->bank_id = b;
ram_->read(mem_req->data.data(), byte_addr, PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
ram_->read(mem_req->data.data(), byte_addr, PLATFORM_MEMORY_DATA_SIZE);
mem_req->write = false;
mem_req->ready = false;
pending_mem_reqs_[b].emplace_back(mem_req);
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, mem_req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
for (auto& req : pending_mem_reqs_[b]) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
else
printf(" %0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, byte_addr);
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", mem_req->data[i]);
}
printf("}\n");*/
printf("\n");*/
// send dram request
dram_queue_.push(mem_req);
@ -481,7 +483,7 @@ private:
}
typedef struct {
std::array<uint8_t, PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE> data;
std::array<uint8_t, PLATFORM_MEMORY_DATA_SIZE> data;
uint32_t addr;
uint32_t bank_id;
bool write;
@ -514,9 +516,10 @@ private:
bool stop_;
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
int64_t host_buffer_ids_;
uint64_t host_buffer_ids_;
uint64_t mem_bank_size_;
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_PARAM_LOCAL_MEMORY_BANKS];
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
std::list<cci_rd_req_t> cci_reads_;
std::list<cci_wr_req_t> cci_writes_;

View file

@ -78,22 +78,22 @@ module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; (
output t_ccip_mmioData af2cp_sTxPort_c2_data,
// Avalon signals for local memory access
output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output t_local_mem_addr avs_address [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
input logic avs_waitrequest [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output logic avs_write [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output logic avs_read [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
input avs_readdatavalid [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS]
output t_local_mem_data avs_writedata [`PLATFORM_MEMORY_BANKS],
input t_local_mem_data avs_readdata [`PLATFORM_MEMORY_BANKS],
output t_local_mem_addr avs_address [`PLATFORM_MEMORY_BANKS],
input logic avs_waitrequest [`PLATFORM_MEMORY_BANKS],
output logic avs_write [`PLATFORM_MEMORY_BANKS],
output logic avs_read [`PLATFORM_MEMORY_BANKS],
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_MEMORY_BANKS],
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_MEMORY_BANKS],
input avs_readdatavalid [`PLATFORM_MEMORY_BANKS]
);
t_if_ccip_Rx cp2af_sRxPort;
t_if_ccip_Tx af2cp_sTxPort;
vortex_afu #(
.NUM_LOCAL_MEM_BANKS(`PLATFORM_PARAM_LOCAL_MEMORY_BANKS)
.NUM_LOCAL_MEM_BANKS(`PLATFORM_MEMORY_BANKS)
) afu (
.clk(clk),
.reset(reset),

View file

@ -32,14 +32,21 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
# AFU parameters
ifeq (,$(findstring M_AXI_MEM_NUM_BANKS,$(CONFIGS)))
CONFIGS += -DM_AXI_MEM_NUM_BANKS=1
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
endif
ifeq (,$(findstring M_AXI_MEM_ADDR_WIDTH,$(CONFIGS)))
CONFIGS += -DM_AXI_MEM_ADDR_WIDTH=32
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
ifeq ($(XLEN),64)
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41
else
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25
endif
endif
ifeq (,$(findstring M_AXI_MEM_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DM_AXI_MEM_DATA_WIDTH=512
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
endif
ifeq (,$(findstring PLATFORM_MEMORY_OFFSET,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_OFFSET=0
endif
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)

View file

@ -11,22 +11,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_platform.vh"
`include "vortex_afu.vh"
module vortex_afu_shim #(
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8),
parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
) (
// System signals
input wire ap_clk,
input wire ap_rst_n,
// AXI4 master interface
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
// AXI4-Lite slave interface
input wire s_axi_ctrl_awvalid,
@ -50,35 +50,38 @@ module vortex_afu_shim #(
output wire interrupt
`IGNORE_WARNINGS_END
);
vortex_afu #(
.C_S_AXI_CTRL_ADDR_WIDTH(C_S_AXI_CTRL_ADDR_WIDTH),
.C_S_AXI_CTRL_DATA_WIDTH(C_S_AXI_CTRL_DATA_WIDTH),
.C_M_AXI_MEM_ID_WIDTH(C_M_AXI_MEM_ID_WIDTH),
.C_M_AXI_MEM_ADDR_WIDTH(C_M_AXI_MEM_ADDR_WIDTH),
.C_M_AXI_MEM_DATA_WIDTH(C_M_AXI_MEM_DATA_WIDTH)
) afu (
.ap_clk(ap_clk),
.ap_rst_n(ap_rst_n),
// AXI4 master interface
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
.s_axi_ctrl_awvalid(s_axi_ctrl_awvalid),
.s_axi_ctrl_awready(s_axi_ctrl_awready),
.s_axi_ctrl_awaddr(s_axi_ctrl_awaddr),
.s_axi_ctrl_wvalid(s_axi_ctrl_wvalid),
.s_axi_ctrl_wready(s_axi_ctrl_wready),
.s_axi_ctrl_wdata(s_axi_ctrl_wdata),
.s_axi_ctrl_wstrb(s_axi_ctrl_wstrb),
.s_axi_ctrl_arvalid(s_axi_ctrl_arvalid),
.s_axi_ctrl_arready(s_axi_ctrl_arready),
.s_axi_ctrl_araddr(s_axi_ctrl_araddr),
.s_axi_ctrl_rvalid(s_axi_ctrl_rvalid),
.s_axi_ctrl_rready(s_axi_ctrl_rready),
.s_axi_ctrl_rdata(s_axi_ctrl_rdata),
.s_axi_ctrl_rresp(s_axi_ctrl_rresp),
.s_axi_ctrl_bvalid(s_axi_ctrl_bvalid),
.s_axi_ctrl_bready(s_axi_ctrl_bready),
.s_axi_ctrl_bresp(s_axi_ctrl_bresp),
.interrupt(interrupt)
);
VX_afu_wrap #(
.C_S_AXI_CTRL_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
.C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
.C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
.C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
.C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
.C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
) afu_wrap (
.clk (ap_clk),
.reset (~ap_rst_n),
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
.s_axi_ctrl_awready (s_axi_ctrl_awready),
.s_axi_ctrl_awaddr (s_axi_ctrl_awaddr),
.s_axi_ctrl_wvalid (s_axi_ctrl_wvalid),
.s_axi_ctrl_wready (s_axi_ctrl_wready),
.s_axi_ctrl_wdata (s_axi_ctrl_wdata),
.s_axi_ctrl_wstrb (s_axi_ctrl_wstrb),
.s_axi_ctrl_arvalid (s_axi_ctrl_arvalid),
.s_axi_ctrl_arready (s_axi_ctrl_arready),
.s_axi_ctrl_araddr (s_axi_ctrl_araddr),
.s_axi_ctrl_rvalid (s_axi_ctrl_rvalid),
.s_axi_ctrl_rready (s_axi_ctrl_rready),
.s_axi_ctrl_rdata (s_axi_ctrl_rdata),
.s_axi_ctrl_rresp (s_axi_ctrl_rresp),
.s_axi_ctrl_bvalid (s_axi_ctrl_bvalid),
.s_axi_ctrl_bready (s_axi_ctrl_bready),
.s_axi_ctrl_bresp (s_axi_ctrl_bresp),
.interrupt (interrupt)
);
endmodule

View file

@ -37,7 +37,7 @@
#include <iostream>
#define M_AXI_MEM_DATA_SIZE (M_AXI_MEM_DATA_WIDTH/8)
#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8)
#ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1
@ -59,10 +59,24 @@
#define RAM_PAGE_SIZE 4096
#define MEM_BANK_SIZE (1ull << M_AXI_MEM_ADDR_WIDTH)
#define CPU_GPU_LATENCY 200
#if PLATFORM_MEMORY_ADDR_WIDTH > 32
typedef QData Vl_m_addr_t;
#else
typedef IData Vl_m_addr_t;
#endif
#if PLATFORM_MEMORY_DATA_WIDTH > 64
typedef VlWide<(PLATFORM_MEMORY_DATA_WIDTH/32)> Vl_m_data_t;
#else
#if PLATFORM_MEMORY_DATA_WIDTH > 32
typedef QData Vl_m_data_t;
#else
typedef IData Vl_m_data_t;
#endif
#endif
using namespace vortex;
static uint64_t timestamp = 0;
@ -134,7 +148,7 @@ public:
if (future_.valid()) {
future_.wait();
}
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
delete mem_alloc_[i];
}
if (ram_) {
@ -169,15 +183,18 @@ public:
tfp_->open("trace.vcd");
#endif
// calculate memory bank size
mem_bank_size_ = ((1ull << PLATFORM_MEMORY_ADDR_WIDTH) / PLATFORM_MEMORY_BANKS) * PLATFORM_MEMORY_DATA_SIZE;
// allocate RAM
ram_ = new RAM(0, RAM_PAGE_SIZE);
// initialize AXI memory interfaces
MP_M_AXI_MEM(M_AXI_MEM_NUM_BANKS);
MP_M_AXI_MEM(PLATFORM_MEMORY_BANKS);
// initialize memory allocator
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
mem_alloc_[i] = new MemoryAllocator(0, MEM_BANK_SIZE, 4096, 64);
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
mem_alloc_[i] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
}
// reset the device
@ -198,13 +215,13 @@ public:
}
int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) {
if (bank_id >= M_AXI_MEM_NUM_BANKS)
if (bank_id >= PLATFORM_MEMORY_BANKS)
return -1;
return mem_alloc_[bank_id]->allocate(size, addr);
}
int mem_free(uint32_t bank_id, uint64_t addr) {
if (bank_id >= M_AXI_MEM_NUM_BANKS)
if (bank_id >= PLATFORM_MEMORY_BANKS)
return -1;
return mem_alloc_[bank_id]->release(addr);
}
@ -212,11 +229,11 @@ public:
int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) {
std::lock_guard<std::mutex> guard(mutex_);
if (bank_id >= M_AXI_MEM_NUM_BANKS)
if (bank_id >= PLATFORM_MEMORY_BANKS)
return -1;
uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr;
uint64_t base_addr = bank_id * mem_bank_size_ + addr;
ram_->write(data, base_addr, size);
/*printf("%0ld: [sim] xrt-mem-write: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size);
/*printf("%0ld: [sim] xrt-mem-write: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size);
for (int i = size-1; i >= 0; --i) {
printf("%02x", ((const uint8_t*)data)[i]);
}
@ -227,11 +244,11 @@ public:
int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) {
std::lock_guard<std::mutex> guard(mutex_);
if (bank_id >= M_AXI_MEM_NUM_BANKS)
if (bank_id >= PLATFORM_MEMORY_BANKS)
return -1;
uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr;
uint64_t base_addr = bank_id * mem_bank_size_ + addr;
ram_->read(data, base_addr, size);
/*printf("%0ld: [sim] xrt-mem-read: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size);
/*printf("%0ld: [sim] xrt-mem-read: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size);
for (int i = size-1; i >= 0; --i) {
printf("%02x", ((uint8_t*)data)[i]);
}
@ -307,7 +324,7 @@ private:
reqs.clear();
}
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
std::queue<mem_req_t*> empty;
std::swap(dram_queues_[i], empty);
}
@ -334,7 +351,7 @@ private:
void tick() {
this->axi_mem_bus_eval();
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
if (!dram_queues_[i].empty()) {
auto mem_req = dram_queues_[i].front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) {
@ -394,7 +411,7 @@ private:
}
void axi_mem_bus_reset() {
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
// address read request
*m_axi_mem_[i].arready = 1;
@ -418,7 +435,7 @@ private:
}
void axi_mem_bus_eval() {
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
// handle read responses
if (m_axi_states_[i].read_rsp_pending && (*m_axi_mem_[i].rready)) {
*m_axi_mem_[i].rvalid = 0;
@ -434,7 +451,7 @@ private:
*m_axi_mem_[i].rid = mem_rsp->tag;
*m_axi_mem_[i].rresp = 0;
*m_axi_mem_[i].rlast = 1;
memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), M_AXI_MEM_DATA_SIZE);
memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE);
pending_mem_reqs_[i].erase(mem_rsp_it);
m_axi_states_[i].read_rsp_pending = true;
delete mem_rsp;
@ -465,14 +482,14 @@ private:
if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) {
auto mem_req = new mem_req_t();
mem_req->tag = *m_axi_mem_[i].arid;
mem_req->addr = uint64_t(*m_axi_mem_[i].araddr) * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE;
ram_->read(mem_req->data.data(), mem_req->addr, M_AXI_MEM_DATA_SIZE);
mem_req->addr = i * mem_bank_size_ + uint64_t(*m_axi_mem_[i].araddr) * PLATFORM_MEMORY_DATA_SIZE;
ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE);
mem_req->write = false;
mem_req->ready = false;
pending_mem_reqs_[i].emplace_back(mem_req);
/*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag);
for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) {
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", mem_req->data[i]);
}
printf("\n");*/
@ -494,9 +511,9 @@ private:
auto byteen = *m_axi_mem_[i].wstrb;
auto data = (uint8_t*)m_axi_mem_[i].wdata->data();
auto byte_addr = m_axi_states_[i].write_req_addr * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE;
auto byte_addr = i * mem_bank_size_ + m_axi_states_[i].write_req_addr * PLATFORM_MEMORY_DATA_SIZE;
for (int i = 0; i < M_AXI_MEM_DATA_SIZE; i++) {
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[byte_addr + i] = data[i];
}
@ -510,7 +527,7 @@ private:
pending_mem_reqs_[i].emplace_back(mem_req);
/*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag);
for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) {
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", data[i]);
}
printf("\n");*/
@ -535,7 +552,7 @@ private:
} m_axi_state_t;
typedef struct {
std::array<uint8_t, M_AXI_MEM_DATA_SIZE> data;
std::array<uint8_t, PLATFORM_MEMORY_DATA_SIZE> data;
uint32_t tag;
uint64_t addr;
bool write;
@ -545,22 +562,22 @@ private:
typedef struct {
CData* awvalid;
CData* awready;
QData* awaddr;
Vl_m_addr_t* awaddr;
IData* awid;
CData* awlen;
CData* wvalid;
CData* wready;
VlWide<16>* wdata;
Vl_m_data_t* wdata;
QData* wstrb;
CData* wlast;
CData* arvalid;
CData* arready;
QData* araddr;
Vl_m_addr_t* araddr;
IData* arid;
CData* arlen;
CData* rvalid;
CData* rready;
VlWide<16>* rdata;
Vl_m_data_t* rdata;
CData* rlast;
IData* rid;
CData* rresp;
@ -573,21 +590,22 @@ private:
Vvortex_afu_shim* device_;
RAM* ram_;
DramSim dram_sim_;
uint64_t mem_bank_size_;
std::future<void> future_;
bool stop_;
std::mutex mutex_;
std::list<mem_req_t*> pending_mem_reqs_[M_AXI_MEM_NUM_BANKS];
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
m_axi_mem_t m_axi_mem_[M_AXI_MEM_NUM_BANKS];
m_axi_mem_t m_axi_mem_[PLATFORM_MEMORY_BANKS];
MemoryAllocator* mem_alloc_[M_AXI_MEM_NUM_BANKS];
MemoryAllocator* mem_alloc_[PLATFORM_MEMORY_BANKS];
m_axi_state_t m_axi_states_[M_AXI_MEM_NUM_BANKS];
m_axi_state_t m_axi_states_[PLATFORM_MEMORY_BANKS];
std::queue<mem_req_t*> dram_queues_[M_AXI_MEM_NUM_BANKS];
std::queue<mem_req_t*> dram_queues_[PLATFORM_MEMORY_BANKS];
#ifdef VCD_OUTPUT
VerilatedVcdC* tfp_;