mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
opaesim and xrtsim multi-bank memory support
This commit is contained in:
parent
00feb8b424
commit
b8199decf4
27 changed files with 488 additions and 464 deletions
|
@ -273,10 +273,11 @@ config2()
|
|||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
|
||||
|
||||
# test single-bank DRAM
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
|
||||
# test 27-bit DRAM address
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
|
||||
echo "configuration-2 tests done!"
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
module Vortex_axi import VX_gpu_pkg::*; #(
|
||||
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH + (`VX_MEM_DATA_WIDTH/8),
|
||||
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
|
||||
parameter AXI_NUM_BANKS = 1
|
||||
)(
|
||||
|
@ -82,11 +82,10 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
// Status
|
||||
output wire busy
|
||||
);
|
||||
localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH;
|
||||
|
||||
`STATIC_ASSERT((AXI_DATA_WIDTH == `VX_MEM_DATA_WIDTH), ("invalid memory data size: current=%0d, expected=%0d", AXI_DATA_WIDTH, `VX_MEM_DATA_WIDTH))
|
||||
`STATIC_ASSERT((AXI_ADDR_WIDTH >= `MEM_ADDR_WIDTH), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH))
|
||||
`STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH))
|
||||
localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH;
|
||||
localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + `CLOG2(`VX_MEM_DATA_WIDTH) - `CLOG2(AXI_DATA_WIDTH);
|
||||
|
||||
`STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag width: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH))
|
||||
|
||||
wire mem_req_valid;
|
||||
wire mem_req_rw;
|
||||
|
@ -101,94 +100,6 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_awaddr_unqual [AXI_NUM_BANKS];
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_araddr_unqual [AXI_NUM_BANKS];
|
||||
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_awid_unqual [AXI_NUM_BANKS];
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_arid_unqual [AXI_NUM_BANKS];
|
||||
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual [AXI_NUM_BANKS];
|
||||
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual [AXI_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin : g_padding
|
||||
assign m_axi_awaddr[i] = `MEM_ADDR_WIDTH'(m_axi_awaddr_unqual[i]);
|
||||
assign m_axi_araddr[i] = `MEM_ADDR_WIDTH'(m_axi_araddr_unqual[i]);
|
||||
|
||||
assign m_axi_awid[i] = AXI_TID_WIDTH'(m_axi_awid_unqual[i]);
|
||||
assign m_axi_arid[i] = AXI_TID_WIDTH'(m_axi_arid_unqual[i]);
|
||||
|
||||
assign m_axi_rid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_rid[i]);
|
||||
assign m_axi_bid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_bid[i]);
|
||||
end
|
||||
|
||||
VX_axi_adapter #(
|
||||
.DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`VX_MEM_TAG_WIDTH),
|
||||
.NUM_BANKS (AXI_NUM_BANKS),
|
||||
.RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0)
|
||||
) axi_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid (mem_req_valid),
|
||||
.mem_req_rw (mem_req_rw),
|
||||
.mem_req_byteen (mem_req_byteen),
|
||||
.mem_req_addr (mem_req_addr),
|
||||
.mem_req_data (mem_req_data),
|
||||
.mem_req_tag (mem_req_tag),
|
||||
.mem_req_ready (mem_req_ready),
|
||||
|
||||
.mem_rsp_valid (mem_rsp_valid),
|
||||
.mem_rsp_data (mem_rsp_data),
|
||||
.mem_rsp_tag (mem_rsp_tag),
|
||||
.mem_rsp_ready (mem_rsp_ready),
|
||||
|
||||
.m_axi_awvalid (m_axi_awvalid),
|
||||
.m_axi_awready (m_axi_awready),
|
||||
.m_axi_awaddr (m_axi_awaddr_unqual),
|
||||
.m_axi_awid (m_axi_awid_unqual),
|
||||
.m_axi_awlen (m_axi_awlen),
|
||||
.m_axi_awsize (m_axi_awsize),
|
||||
.m_axi_awburst (m_axi_awburst),
|
||||
.m_axi_awlock (m_axi_awlock),
|
||||
.m_axi_awcache (m_axi_awcache),
|
||||
.m_axi_awprot (m_axi_awprot),
|
||||
.m_axi_awqos (m_axi_awqos),
|
||||
.m_axi_awregion (m_axi_awregion),
|
||||
|
||||
.m_axi_wvalid (m_axi_wvalid),
|
||||
.m_axi_wready (m_axi_wready),
|
||||
.m_axi_wdata (m_axi_wdata),
|
||||
.m_axi_wstrb (m_axi_wstrb),
|
||||
.m_axi_wlast (m_axi_wlast),
|
||||
|
||||
.m_axi_bvalid (m_axi_bvalid),
|
||||
.m_axi_bready (m_axi_bready),
|
||||
.m_axi_bid (m_axi_bid_unqual),
|
||||
.m_axi_bresp (m_axi_bresp),
|
||||
|
||||
.m_axi_arvalid (m_axi_arvalid),
|
||||
.m_axi_arready (m_axi_arready),
|
||||
.m_axi_araddr (m_axi_araddr_unqual),
|
||||
.m_axi_arid (m_axi_arid_unqual),
|
||||
.m_axi_arlen (m_axi_arlen),
|
||||
.m_axi_arsize (m_axi_arsize),
|
||||
.m_axi_arburst (m_axi_arburst),
|
||||
.m_axi_arlock (m_axi_arlock),
|
||||
.m_axi_arcache (m_axi_arcache),
|
||||
.m_axi_arprot (m_axi_arprot),
|
||||
.m_axi_arqos (m_axi_arqos),
|
||||
.m_axi_arregion (m_axi_arregion),
|
||||
|
||||
.m_axi_rvalid (m_axi_rvalid),
|
||||
.m_axi_rready (m_axi_rready),
|
||||
.m_axi_rdata (m_axi_rdata),
|
||||
.m_axi_rlast (m_axi_rlast) ,
|
||||
.m_axi_rid (m_axi_rid_unqual),
|
||||
.m_axi_rresp (m_axi_rresp)
|
||||
);
|
||||
|
||||
`SCOPE_IO_SWITCH (1)
|
||||
|
||||
Vortex vortex (
|
||||
|
@ -217,4 +128,127 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
.busy (busy)
|
||||
);
|
||||
|
||||
wire mem_req_valid_a;
|
||||
wire mem_req_rw_a;
|
||||
wire [(AXI_DATA_WIDTH/8)-1:0] mem_req_byteen_a;
|
||||
wire [VX_MEM_ADDR_A_WIDTH-1:0] mem_req_addr_a;
|
||||
wire [AXI_DATA_WIDTH-1:0] mem_req_data_a;
|
||||
wire [AXI_TID_WIDTH-1:0] mem_req_tag_a;
|
||||
wire mem_req_ready_a;
|
||||
|
||||
wire mem_rsp_valid_a;
|
||||
wire [AXI_DATA_WIDTH-1:0] mem_rsp_data_a;
|
||||
wire [AXI_TID_WIDTH-1:0] mem_rsp_tag_a;
|
||||
wire mem_rsp_ready_a;
|
||||
|
||||
VX_mem_adapter #(
|
||||
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.DST_DATA_WIDTH (AXI_DATA_WIDTH),
|
||||
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
|
||||
.DST_ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH),
|
||||
.SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
|
||||
.DST_TAG_WIDTH (AXI_TID_WIDTH),
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (0)
|
||||
) mem_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid_in (mem_req_valid),
|
||||
.mem_req_addr_in (mem_req_addr),
|
||||
.mem_req_rw_in (mem_req_rw),
|
||||
.mem_req_byteen_in (mem_req_byteen),
|
||||
.mem_req_data_in (mem_req_data),
|
||||
.mem_req_tag_in (mem_req_tag),
|
||||
.mem_req_ready_in (mem_req_ready),
|
||||
|
||||
.mem_rsp_valid_in (mem_rsp_valid),
|
||||
.mem_rsp_data_in (mem_rsp_data),
|
||||
.mem_rsp_tag_in (mem_rsp_tag),
|
||||
.mem_rsp_ready_in (mem_rsp_ready),
|
||||
|
||||
.mem_req_valid_out (mem_req_valid_a),
|
||||
.mem_req_addr_out (mem_req_addr_a),
|
||||
.mem_req_rw_out (mem_req_rw_a),
|
||||
.mem_req_byteen_out (mem_req_byteen_a),
|
||||
.mem_req_data_out (mem_req_data_a),
|
||||
.mem_req_tag_out (mem_req_tag_a),
|
||||
.mem_req_ready_out (mem_req_ready_a),
|
||||
|
||||
.mem_rsp_valid_out (mem_rsp_valid_a),
|
||||
.mem_rsp_data_out (mem_rsp_data_a),
|
||||
.mem_rsp_tag_out (mem_rsp_tag_a),
|
||||
.mem_rsp_ready_out (mem_rsp_ready_a)
|
||||
);
|
||||
|
||||
VX_axi_adapter #(
|
||||
.DATA_WIDTH (AXI_DATA_WIDTH),
|
||||
.ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH),
|
||||
.TAG_WIDTH (AXI_TID_WIDTH),
|
||||
.NUM_BANKS (AXI_NUM_BANKS),
|
||||
.AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
|
||||
.BANK_INTERLEAVE (0),
|
||||
.RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0)
|
||||
) axi_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid (mem_req_valid_a),
|
||||
.mem_req_rw (mem_req_rw_a),
|
||||
.mem_req_byteen (mem_req_byteen_a),
|
||||
.mem_req_addr (mem_req_addr_a),
|
||||
.mem_req_data (mem_req_data_a),
|
||||
.mem_req_tag (mem_req_tag_a),
|
||||
.mem_req_ready (mem_req_ready_a),
|
||||
|
||||
.mem_rsp_valid (mem_rsp_valid_a),
|
||||
.mem_rsp_data (mem_rsp_data_a),
|
||||
.mem_rsp_tag (mem_rsp_tag_a),
|
||||
.mem_rsp_ready (mem_rsp_ready_a),
|
||||
|
||||
.m_axi_awvalid (m_axi_awvalid),
|
||||
.m_axi_awready (m_axi_awready),
|
||||
.m_axi_awaddr (m_axi_awaddr),
|
||||
.m_axi_awid (m_axi_awid),
|
||||
.m_axi_awlen (m_axi_awlen),
|
||||
.m_axi_awsize (m_axi_awsize),
|
||||
.m_axi_awburst (m_axi_awburst),
|
||||
.m_axi_awlock (m_axi_awlock),
|
||||
.m_axi_awcache (m_axi_awcache),
|
||||
.m_axi_awprot (m_axi_awprot),
|
||||
.m_axi_awqos (m_axi_awqos),
|
||||
.m_axi_awregion (m_axi_awregion),
|
||||
|
||||
.m_axi_wvalid (m_axi_wvalid),
|
||||
.m_axi_wready (m_axi_wready),
|
||||
.m_axi_wdata (m_axi_wdata),
|
||||
.m_axi_wstrb (m_axi_wstrb),
|
||||
.m_axi_wlast (m_axi_wlast),
|
||||
|
||||
.m_axi_bvalid (m_axi_bvalid),
|
||||
.m_axi_bready (m_axi_bready),
|
||||
.m_axi_bid (m_axi_bid),
|
||||
.m_axi_bresp (m_axi_bresp),
|
||||
|
||||
.m_axi_arvalid (m_axi_arvalid),
|
||||
.m_axi_arready (m_axi_arready),
|
||||
.m_axi_araddr (m_axi_araddr),
|
||||
.m_axi_arid (m_axi_arid),
|
||||
.m_axi_arlen (m_axi_arlen),
|
||||
.m_axi_arsize (m_axi_arsize),
|
||||
.m_axi_arburst (m_axi_arburst),
|
||||
.m_axi_arlock (m_axi_arlock),
|
||||
.m_axi_arcache (m_axi_arcache),
|
||||
.m_axi_arprot (m_axi_arprot),
|
||||
.m_axi_arqos (m_axi_arqos),
|
||||
.m_axi_arregion (m_axi_arregion),
|
||||
|
||||
.m_axi_rvalid (m_axi_rvalid),
|
||||
.m_axi_rready (m_axi_rready),
|
||||
.m_axi_rdata (m_axi_rdata),
|
||||
.m_axi_rlast (m_axi_rlast),
|
||||
.m_axi_rid (m_axi_rid),
|
||||
.m_axi_rresp (m_axi_rresp)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -30,7 +30,17 @@
|
|||
|
||||
//`include "platform_afu_top_config.vh"
|
||||
|
||||
`ifdef PLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH `PLATFORM_MEMORY_ADDR_WIDTH
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH `PLATFORM_MEMORY_DATA_WIDTH
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH `PLATFORM_MEMORY_BURST_CNT_WIDTH
|
||||
`endif
|
||||
|
||||
package local_mem_cfg_pkg;
|
||||
|
||||
|
@ -57,5 +67,3 @@ package local_mem_cfg_pkg;
|
|||
typedef logic [LOCAL_MEM_DATA_N_BYTES-1:0] t_local_mem_byte_mask;
|
||||
|
||||
endpackage // local_mem_cfg_pkg
|
||||
|
||||
`endif // PLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
|
|
|
@ -42,7 +42,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
);
|
||||
localparam LMEM_DATA_WIDTH = $bits(t_local_mem_data);
|
||||
localparam LMEM_DATA_SIZE = LMEM_DATA_WIDTH / 8;
|
||||
localparam LMEM_ADDR_WIDTH = $bits(t_local_mem_addr);
|
||||
localparam LMEM_ADDR_WIDTH = `VX_MEM_ADDR_WIDTH + ($clog2(`VX_MEM_DATA_WIDTH) - $clog2(LMEM_DATA_WIDTH));
|
||||
localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt);
|
||||
|
||||
localparam CCI_DATA_WIDTH = $bits(t_ccip_clData);
|
||||
|
@ -96,9 +96,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
localparam STATE_DCR_WRITE = 4;
|
||||
localparam STATE_WIDTH = `CLOG2(STATE_DCR_WRITE+1);
|
||||
|
||||
localparam BANK_BYTE_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + `CLOG2(`PLATFORM_MEMORY_DATA_WIDTH/8);
|
||||
|
||||
wire [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
wire [63:0] dev_caps = {16'b0,
|
||||
wire [63:0] dev_caps = {8'b0,
|
||||
5'(BANK_BYTE_ADDR_WIDTH-16),
|
||||
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
|
||||
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
|
||||
16'(`NUM_CORES * `NUM_CLUSTERS),
|
||||
8'(`NUM_WARPS),
|
||||
|
@ -601,6 +605,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.NUM_BANKS (NUM_LOCAL_MEM_BANKS),
|
||||
.TAG_WIDTH (AVS_REQ_TAGW + 1),
|
||||
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE),
|
||||
.AVS_ADDR_WIDTH($bits(t_local_mem_addr)),
|
||||
.BANK_INTERLEAVE (1),
|
||||
.REQ_OUT_BUF (2),
|
||||
.RSP_OUT_BUF (0)
|
||||
) avs_adapter (
|
||||
|
|
|
@ -14,21 +14,21 @@
|
|||
`include "vortex_afu.vh"
|
||||
|
||||
module VX_afu_ctrl #(
|
||||
parameter AXI_ADDR_WIDTH = 8,
|
||||
parameter AXI_DATA_WIDTH = 32,
|
||||
parameter AXI_NUM_BANKS = 1
|
||||
parameter S_AXI_ADDR_WIDTH = 8,
|
||||
parameter S_AXI_DATA_WIDTH = 32,
|
||||
parameter M_AXI_ADDR_WIDTH = 25
|
||||
) (
|
||||
// axi4 lite slave signals
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire s_axi_awvalid,
|
||||
input wire [AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
|
||||
input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
|
||||
output wire s_axi_awready,
|
||||
|
||||
input wire s_axi_wvalid,
|
||||
input wire [AXI_DATA_WIDTH-1:0] s_axi_wdata,
|
||||
input wire [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb,
|
||||
input wire [S_AXI_DATA_WIDTH-1:0] s_axi_wdata,
|
||||
input wire [S_AXI_DATA_WIDTH/8-1:0]s_axi_wstrb,
|
||||
output wire s_axi_wready,
|
||||
|
||||
output wire s_axi_bvalid,
|
||||
|
@ -36,11 +36,11 @@ module VX_afu_ctrl #(
|
|||
input wire s_axi_bready,
|
||||
|
||||
input wire s_axi_arvalid,
|
||||
input wire [AXI_ADDR_WIDTH-1:0] s_axi_araddr,
|
||||
input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_araddr,
|
||||
output wire s_axi_arready,
|
||||
|
||||
output wire s_axi_rvalid,
|
||||
output wire [AXI_DATA_WIDTH-1:0] s_axi_rdata,
|
||||
output wire [S_AXI_DATA_WIDTH-1:0] s_axi_rdata,
|
||||
output wire [1:0] s_axi_rresp,
|
||||
input wire s_axi_rready,
|
||||
|
||||
|
@ -56,8 +56,6 @@ module VX_afu_ctrl #(
|
|||
output wire scope_bus_out,
|
||||
`endif
|
||||
|
||||
output wire [63:0] mem_base [AXI_NUM_BANKS],
|
||||
|
||||
output wire dcr_wr_valid,
|
||||
output wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
|
||||
output wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data
|
||||
|
@ -125,10 +123,6 @@ module VX_afu_ctrl #(
|
|||
//ADDR_SCP_CTRL = 8'h3C,
|
||||
`endif
|
||||
|
||||
ADDR_MEM_0 = 8'h40,
|
||||
ADDR_MEM_1 = 8'h44,
|
||||
//ADDR_MEM_CTRL = 8'h48,
|
||||
|
||||
ADDR_BITS = 8;
|
||||
|
||||
localparam
|
||||
|
@ -144,7 +138,9 @@ module VX_afu_ctrl #(
|
|||
RSTATE_WIDTH = 2;
|
||||
|
||||
// device caps
|
||||
wire [63:0] dev_caps = {16'b0,
|
||||
wire [63:0] dev_caps = {8'b0,
|
||||
5'(M_AXI_ADDR_WIDTH-16),
|
||||
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
|
||||
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
|
||||
16'(`NUM_CORES * `NUM_CLUSTERS),
|
||||
8'(`NUM_WARPS),
|
||||
|
@ -174,7 +170,6 @@ module VX_afu_ctrl #(
|
|||
reg gie_r;
|
||||
reg [1:0] ier_r;
|
||||
reg [1:0] isr_r;
|
||||
reg [63:0] mem_r [AXI_NUM_BANKS];
|
||||
reg [31:0] dcra_r;
|
||||
reg [31:0] dcrv_r;
|
||||
reg dcr_wr_valid_r;
|
||||
|
@ -311,10 +306,6 @@ module VX_afu_ctrl #(
|
|||
dcra_r <= '0;
|
||||
dcrv_r <= '0;
|
||||
dcr_wr_valid_r <= 0;
|
||||
|
||||
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
|
||||
mem_r[i] <= '0;
|
||||
end
|
||||
end else begin
|
||||
dcr_wr_valid_r <= 0;
|
||||
ap_reset_r <= 0;
|
||||
|
@ -353,16 +344,7 @@ module VX_afu_ctrl #(
|
|||
dcrv_r <= (s_axi_wdata & wmask) | (dcrv_r & ~wmask);
|
||||
dcr_wr_valid_r <= 1;
|
||||
end
|
||||
default: begin
|
||||
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
|
||||
if (waddr == (ADDR_MEM_0 + 8'(i) * 8'd12)) begin
|
||||
mem_r[i][31:0] <= (s_axi_wdata & wmask) | (mem_r[i][31:0] & ~wmask);
|
||||
end
|
||||
if (waddr == (ADDR_MEM_1 + 8'(i) * 8'd12)) begin
|
||||
mem_r[i][63:32] <= (s_axi_wdata & wmask) | (mem_r[i][63:32] & ~wmask);
|
||||
end
|
||||
end
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
|
||||
if (ier_r[0] & ap_done)
|
||||
|
@ -453,8 +435,6 @@ module VX_afu_ctrl #(
|
|||
assign ap_start = ap_start_r;
|
||||
assign interrupt = gie_r & (| isr_r);
|
||||
|
||||
assign mem_base = mem_r;
|
||||
|
||||
assign dcr_wr_valid = dcr_wr_valid_r;
|
||||
assign dcr_wr_addr = `VX_DCR_ADDR_WIDTH'(dcra_r);
|
||||
assign dcr_wr_data = `VX_DCR_DATA_WIDTH'(dcrv_r);
|
||||
|
|
|
@ -16,16 +16,17 @@
|
|||
module VX_afu_wrap #(
|
||||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = 25,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = 512,
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = 2
|
||||
) (
|
||||
// System signals
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// AXI4 master interface
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
|
||||
// AXI4-Lite slave interface
|
||||
input wire s_axi_ctrl_awvalid,
|
||||
|
@ -48,7 +49,6 @@ module VX_afu_wrap #(
|
|||
|
||||
output wire interrupt
|
||||
);
|
||||
localparam C_M_AXI_MEM_NUM_BANKS = `M_AXI_MEM_NUM_BANKS;
|
||||
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_RUN = 1;
|
||||
|
@ -80,7 +80,7 @@ module VX_afu_wrap #(
|
|||
wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
// convert memory interface to array
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||
|
||||
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
|
||||
reg [15:0] vx_pending_writes;
|
||||
|
@ -88,8 +88,6 @@ module VX_afu_wrap #(
|
|||
reg vx_reset = 1; // asserted at initialization
|
||||
wire vx_busy;
|
||||
|
||||
wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
wire dcr_wr_valid;
|
||||
wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr;
|
||||
wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data;
|
||||
|
@ -181,9 +179,9 @@ module VX_afu_wrap #(
|
|||
end
|
||||
|
||||
VX_afu_ctrl #(
|
||||
.AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
|
||||
.AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
|
||||
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
.S_AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
|
||||
.S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
|
||||
.M_AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH)
|
||||
) afu_ctrl (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -218,26 +216,24 @@ module VX_afu_wrap #(
|
|||
.scope_bus_out (scope_bus_in),
|
||||
`endif
|
||||
|
||||
.mem_base (mem_base),
|
||||
|
||||
.dcr_wr_valid (dcr_wr_valid),
|
||||
.dcr_wr_addr (dcr_wr_addr),
|
||||
.dcr_wr_data (dcr_wr_data)
|
||||
);
|
||||
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing
|
||||
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
|
||||
assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
|
||||
assign m_axi_mem_awaddr_a[i] = m_axi_mem_awaddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET);
|
||||
assign m_axi_mem_araddr_a[i] = m_axi_mem_araddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET);
|
||||
end
|
||||
|
||||
`SCOPE_IO_SWITCH (2)
|
||||
|
||||
Vortex_axi #(
|
||||
.AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
|
||||
.AXI_ADDR_WIDTH (`MEM_ADDR_WIDTH),
|
||||
.AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
|
||||
.AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
|
||||
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
) vortex_axi (
|
||||
|
|
|
@ -16,16 +16,17 @@
|
|||
module vortex_afu #(
|
||||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = `M_AXI_MEM_ADDR_WIDTH,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `M_AXI_MEM_DATA_WIDTH
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8),
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
|
||||
) (
|
||||
// System signals
|
||||
input wire ap_clk,
|
||||
input wire ap_rst_n,
|
||||
|
||||
// AXI4 master interface
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
|
||||
// AXI4-Lite slave interface
|
||||
input wire s_axi_ctrl_awvalid,
|
||||
|
@ -54,12 +55,13 @@ module vortex_afu #(
|
|||
.C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
|
||||
.C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
|
||||
.C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
|
||||
.C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH)
|
||||
.C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
|
||||
.C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
) afu_wrap (
|
||||
.clk (ap_clk),
|
||||
.reset (~ap_rst_n),
|
||||
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
|
||||
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
|
||||
.s_axi_ctrl_awready (s_axi_ctrl_awready),
|
||||
|
|
|
@ -14,20 +14,24 @@
|
|||
`ifndef VORTEX_AFU_VH
|
||||
`define VORTEX_AFU_VH
|
||||
|
||||
`ifndef M_AXI_MEM_NUM_BANKS
|
||||
`define M_AXI_MEM_NUM_BANKS 1
|
||||
`ifndef PLATFORM_MEMORY_BANKS
|
||||
`define PLATFORM_MEMORY_BANKS 2
|
||||
`endif
|
||||
|
||||
`ifndef M_AXI_MEM_ADDR_WIDTH
|
||||
`define M_AXI_MEM_ADDR_WIDTH 34
|
||||
`ifndef PLATFORM_MEMORY_ADDR_WIDTH
|
||||
`define PLATFORM_MEMORY_ADDR_WIDTH 25
|
||||
`endif
|
||||
|
||||
`ifndef M_AXI_MEM_DATA_WIDTH
|
||||
`define M_AXI_MEM_DATA_WIDTH 512
|
||||
`ifndef PLATFORM_MEMORY_DATA_WIDTH
|
||||
`define PLATFORM_MEMORY_DATA_WIDTH 512
|
||||
`endif
|
||||
|
||||
`ifndef M_AXI_MEM_ID_WIDTH
|
||||
`define M_AXI_MEM_ID_WIDTH 32
|
||||
`ifndef PLATFORM_MEMORY_OFFSET
|
||||
`define PLATFORM_MEMORY_OFFSET 0
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_ID_WIDTH
|
||||
`define PLATFORM_MEMORY_ID_WIDTH 32
|
||||
`endif
|
||||
|
||||
`define GEN_AXI_MEM(i) \
|
||||
|
|
|
@ -21,6 +21,8 @@ module VX_avs_adapter #(
|
|||
parameter NUM_BANKS = 1,
|
||||
parameter TAG_WIDTH = 1,
|
||||
parameter RD_QUEUE_SIZE = 1,
|
||||
parameter BANK_INTERLEAVE= 0,
|
||||
parameter AVS_ADDR_WIDTH = ADDR_WIDTH - `CLOG2(NUM_BANKS),
|
||||
parameter REQ_OUT_BUF = 0,
|
||||
parameter RSP_OUT_BUF = 0
|
||||
) (
|
||||
|
@ -45,7 +47,7 @@ module VX_avs_adapter #(
|
|||
// AVS bus
|
||||
output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS],
|
||||
input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS],
|
||||
output wire [ADDR_WIDTH-1:0] avs_address [NUM_BANKS],
|
||||
output wire [AVS_ADDR_WIDTH-1:0] avs_address [NUM_BANKS],
|
||||
input wire avs_waitrequest [NUM_BANKS],
|
||||
output wire avs_write [NUM_BANKS],
|
||||
output wire avs_read [NUM_BANKS],
|
||||
|
@ -53,28 +55,35 @@ module VX_avs_adapter #(
|
|||
output wire [BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS],
|
||||
input wire avs_readdatavalid [NUM_BANKS]
|
||||
);
|
||||
localparam DATA_SIZE = DATA_WIDTH/8;
|
||||
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
|
||||
localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS;
|
||||
localparam DATA_SIZE = DATA_WIDTH/8;
|
||||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS;
|
||||
|
||||
`STATIC_ASSERT ((AVS_ADDR_WIDTH >= BANK_OFFSETW), ("invalid parameter"))
|
||||
|
||||
// Requests handling //////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out;
|
||||
wire [NUM_BANKS-1:0] req_queue_going_full;
|
||||
wire [BANK_ADDRW-1:0] req_bank_sel;
|
||||
wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
|
||||
wire [BANK_OFFSETW-1:0] req_bank_off;
|
||||
wire [NUM_BANKS-1:0] bank_req_ready;
|
||||
|
||||
if (NUM_BANKS > 1) begin : g_bank_sel
|
||||
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
|
||||
end else begin : g_bank_sel
|
||||
if (BANK_INTERLEAVE) begin : g_interleave
|
||||
assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0];
|
||||
assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW];
|
||||
end else begin : g_no_interleave
|
||||
assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS];
|
||||
assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0];
|
||||
end
|
||||
end else begin : g_no_bank_sel
|
||||
assign req_bank_sel = '0;
|
||||
assign req_bank_off = mem_req_addr;
|
||||
end
|
||||
|
||||
assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push
|
||||
assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i);
|
||||
end
|
||||
|
@ -142,7 +151,7 @@ module VX_avs_adapter #(
|
|||
|
||||
assign avs_read[i] = valid_out && ~rw_out;
|
||||
assign avs_write[i] = valid_out && rw_out;
|
||||
assign avs_address[i] = ADDR_WIDTH'(addr_out);
|
||||
assign avs_address[i] = AVS_ADDR_WIDTH'(addr_out);
|
||||
assign avs_byteenable[i] = byteen_out;
|
||||
assign avs_writedata[i] = data_out;
|
||||
assign avs_burstcount[i] = BURST_WIDTH'(1);
|
||||
|
|
|
@ -19,7 +19,8 @@ module VX_axi_adapter #(
|
|||
parameter ADDR_WIDTH = 32,
|
||||
parameter TAG_WIDTH = 8,
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter AVS_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)),
|
||||
parameter AXI_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)),
|
||||
parameter BANK_INTERLEAVE= 0,
|
||||
parameter RSP_OUT_BUF = 0
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -29,7 +30,7 @@ module VX_axi_adapter #(
|
|||
input wire mem_req_valid,
|
||||
input wire mem_req_rw,
|
||||
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
|
||||
input wire [AVS_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
input wire [ADDR_WIDTH-1:0] mem_req_addr,
|
||||
input wire [DATA_WIDTH-1:0] mem_req_data,
|
||||
input wire [TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire mem_req_ready,
|
||||
|
@ -43,7 +44,7 @@ module VX_axi_adapter #(
|
|||
// AXI write request address channel
|
||||
output wire m_axi_awvalid [NUM_BANKS],
|
||||
input wire m_axi_awready [NUM_BANKS],
|
||||
output wire [ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS],
|
||||
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS],
|
||||
output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS],
|
||||
output wire [7:0] m_axi_awlen [NUM_BANKS],
|
||||
output wire [2:0] m_axi_awsize [NUM_BANKS],
|
||||
|
@ -70,7 +71,7 @@ module VX_axi_adapter #(
|
|||
// AXI read address channel
|
||||
output wire m_axi_arvalid [NUM_BANKS],
|
||||
input wire m_axi_arready [NUM_BANKS],
|
||||
output wire [ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS],
|
||||
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS],
|
||||
output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS],
|
||||
output wire [7:0] m_axi_arlen [NUM_BANKS],
|
||||
output wire [2:0] m_axi_arsize [NUM_BANKS],
|
||||
|
@ -89,15 +90,28 @@ module VX_axi_adapter #(
|
|||
input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS],
|
||||
input wire [1:0] m_axi_rresp [NUM_BANKS]
|
||||
);
|
||||
localparam AXSIZE = `CLOG2(DATA_WIDTH/8);
|
||||
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
|
||||
localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS);
|
||||
localparam DATA_SIZE = `CLOG2(DATA_WIDTH/8);
|
||||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS;
|
||||
localparam DST_ADDR_WDITH = BANK_OFFSETW + `CLOG2(DATA_WIDTH/8);
|
||||
|
||||
wire [BANK_ADDRW-1:0] req_bank_sel;
|
||||
if (NUM_BANKS > 1) begin : g_req_bank_sel
|
||||
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
|
||||
end else begin : g_req_bank_sel_0
|
||||
`STATIC_ASSERT ((AXI_ADDR_WIDTH >= DST_ADDR_WDITH), ("invalid tag width: current=%0d, expected=%0d", AXI_ADDR_WIDTH, DST_ADDR_WDITH))
|
||||
|
||||
wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
|
||||
wire [BANK_OFFSETW-1:0] req_bank_off;
|
||||
|
||||
if (NUM_BANKS > 1) begin : g_bank_sel
|
||||
if (BANK_INTERLEAVE) begin : g_interleave
|
||||
assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0];
|
||||
assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW];
|
||||
end else begin : g_no_interleave
|
||||
assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS];
|
||||
assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0];
|
||||
end
|
||||
end else begin : g_no_bank_sel
|
||||
assign req_bank_sel = '0;
|
||||
assign req_bank_off = mem_req_addr;
|
||||
end
|
||||
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
|
@ -134,10 +148,10 @@ module VX_axi_adapter #(
|
|||
// AXI write request address channel
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr
|
||||
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i];
|
||||
assign m_axi_awaddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE;
|
||||
assign m_axi_awaddr[i] = AXI_ADDR_WIDTH'(req_bank_off);
|
||||
assign m_axi_awid[i] = mem_req_tag;
|
||||
assign m_axi_awlen[i] = 8'b00000000;
|
||||
assign m_axi_awsize[i] = 3'(AXSIZE);
|
||||
assign m_axi_awsize[i] = 3'(DATA_SIZE);
|
||||
assign m_axi_awburst[i] = 2'b00;
|
||||
assign m_axi_awlock[i] = 2'b00;
|
||||
assign m_axi_awcache[i] = 4'b0000;
|
||||
|
@ -166,10 +180,10 @@ module VX_axi_adapter #(
|
|||
// AXI read request channel
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req
|
||||
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i);
|
||||
assign m_axi_araddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE;
|
||||
assign m_axi_araddr[i] = AXI_ADDR_WIDTH'(req_bank_off);
|
||||
assign m_axi_arid[i] = mem_req_tag;
|
||||
assign m_axi_arlen[i] = 8'b00000000;
|
||||
assign m_axi_arsize[i] = 3'(AXSIZE);
|
||||
assign m_axi_arsize[i] = 3'(DATA_SIZE);
|
||||
assign m_axi_arburst[i] = 2'b00;
|
||||
assign m_axi_arlock[i] = 2'b00;
|
||||
assign m_axi_arcache[i] = 4'b0000;
|
||||
|
|
|
@ -53,8 +53,6 @@ module VX_mem_adapter #(
|
|||
input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_out,
|
||||
output wire mem_rsp_ready_out
|
||||
);
|
||||
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
|
||||
|
||||
localparam DST_DATA_SIZE = (DST_DATA_WIDTH / 8);
|
||||
localparam DST_LDATAW = `CLOG2(DST_DATA_WIDTH);
|
||||
localparam SRC_LDATAW = `CLOG2(SRC_DATA_WIDTH);
|
||||
|
@ -74,6 +72,7 @@ module VX_mem_adapter #(
|
|||
wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in_w;
|
||||
wire mem_rsp_ready_in_w;
|
||||
|
||||
`UNUSED_VAR (mem_req_tag_in)
|
||||
`UNUSED_VAR (mem_rsp_tag_out)
|
||||
|
||||
if (DST_LDATAW > SRC_LDATAW) begin : g_wider_dst_data
|
||||
|
@ -122,7 +121,7 @@ module VX_mem_adapter #(
|
|||
|
||||
assign mem_rsp_valid_in_w = mem_rsp_valid_out;
|
||||
assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx];
|
||||
assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]);
|
||||
assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[DST_TAG_WIDTH-1:D]);
|
||||
assign mem_rsp_ready_out = mem_rsp_ready_in_w;
|
||||
|
||||
end else if (DST_LDATAW < SRC_LDATAW) begin : g_wider_src_data
|
||||
|
|
|
@ -7,17 +7,21 @@ include ../../common.mk
|
|||
# AFU parameters
|
||||
CONFIGS += -DNOPAE
|
||||
CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
||||
ifeq ($(XLEN),64)
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41
|
||||
else
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25
|
||||
endif
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4
|
||||
endif
|
||||
|
||||
#CONFIGS += -DNUM_CORES=2
|
||||
|
|
|
@ -98,7 +98,7 @@ ifdef PERF
|
|||
endif
|
||||
|
||||
# ast dump flags
|
||||
XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI
|
||||
XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=26 -DPLATFORM_MEMORY_DATA_WIDTH=512 -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI
|
||||
|
||||
all: swconfig ip-gen setup build
|
||||
|
||||
|
|
|
@ -35,6 +35,7 @@ typedef void* vx_buffer_h;
|
|||
#define VX_CAPS_LOCAL_MEM_SIZE 0x6
|
||||
#define VX_CAPS_ISA_FLAGS 0x7
|
||||
#define VX_CAPS_NUM_MEM_BANKS 0x8
|
||||
#define VX_CAPS_MEM_BANK_SIZE 0x9
|
||||
|
||||
// device isa flags
|
||||
#define VX_ISA_STD_A (1ull << ISA_STD_A)
|
||||
|
|
|
@ -163,11 +163,6 @@ public:
|
|||
});
|
||||
|
||||
{
|
||||
// retrieve FPGA global memory size
|
||||
CHECK_FPGA_ERR(api_.fpgaPropertiesGetLocalMemorySize(filter, &global_mem_size_), {
|
||||
global_mem_size_ = GLOBAL_MEM_SIZE;
|
||||
});
|
||||
|
||||
// Load ISA CAPS
|
||||
CHECK_FPGA_ERR(api_.fpgaReadMMIO64(fpga_, 0, MMIO_ISA_CAPS, &isa_caps_), {
|
||||
api_.fpgaClose(fpga_);
|
||||
|
@ -179,6 +174,12 @@ public:
|
|||
api_.fpgaClose(fpga_);
|
||||
return -1;
|
||||
});
|
||||
|
||||
// Determine global memory size
|
||||
uint64_t num_banks, bank_size;
|
||||
this->get_caps(VX_CAPS_NUM_MEM_BANKS, &num_banks);
|
||||
this->get_caps(VX_CAPS_MEM_BANK_SIZE, &bank_size);
|
||||
global_mem_size_ = num_banks * bank_size;
|
||||
}
|
||||
|
||||
#ifdef SCOPE
|
||||
|
@ -231,7 +232,10 @@ public:
|
|||
_value = isa_caps_;
|
||||
break;
|
||||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = MEMORY_BANKS;
|
||||
_value = 1 << ((dev_caps_ >> 48) & 0x7);
|
||||
break;
|
||||
case VX_CAPS_MEM_BANK_SIZE:
|
||||
_value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f));
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
|
||||
|
|
|
@ -80,6 +80,9 @@ public:
|
|||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = MEMORY_BANKS;
|
||||
break;
|
||||
case VX_CAPS_MEM_BANK_SIZE:
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
std::abort();
|
||||
|
|
|
@ -84,6 +84,9 @@ public:
|
|||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = MEMORY_BANKS;
|
||||
break;
|
||||
case VX_CAPS_MEM_BANK_SIZE:
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
std::abort();
|
||||
|
|
|
@ -8,6 +8,7 @@ SRC_DIR := $(VORTEX_HOME)/runtime/xrt
|
|||
|
||||
CXXFLAGS += -std=c++14 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(XILINX_XRT)/include -I$(SIM_DIR)/common
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
|
|
|
@ -49,7 +49,6 @@ using namespace vortex;
|
|||
#define MMIO_ISA_ADDR 0x1C
|
||||
#define MMIO_DCR_ADDR 0x28
|
||||
#define MMIO_SCP_ADDR 0x34
|
||||
#define MMIO_MEM_ADDR 0x40
|
||||
|
||||
#define CTL_AP_START (1 << 0)
|
||||
#define CTL_AP_DONE (1 << 1)
|
||||
|
@ -58,24 +57,6 @@ using namespace vortex;
|
|||
#define CTL_AP_RESET (1 << 4)
|
||||
#define CTL_AP_RESTART (1 << 7)
|
||||
|
||||
struct platform_info_t {
|
||||
const char *prefix_name;
|
||||
uint8_t lg2_num_banks;
|
||||
uint8_t lg2_bank_size;
|
||||
uint64_t mem_base;
|
||||
};
|
||||
|
||||
static const platform_info_t g_platforms[] = {
|
||||
{"vortex_xrtsim", 0, 32, 0x0}, // 16 x 256 MB = 4 GB
|
||||
{"xilinx_u200", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4
|
||||
{"xilinx_u250", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4
|
||||
{"xilinx_u50", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2
|
||||
{"xilinx_u280", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2
|
||||
{"xilinx_u55c", 5, 29, 0x0}, // 32 x 512 MB = 16 GB HBM2
|
||||
{"xilinx_vck5000", 0, 33, 0xC000000000}, // 1 x 8 GB = 8 GB DDR4
|
||||
{"xilinx_kv260", 0, 32, 0x0}, // 1 x 4 GB = 4 GB DDR4
|
||||
};
|
||||
|
||||
#ifdef CPP_API
|
||||
|
||||
typedef xrt::device xrt_device_t;
|
||||
|
@ -113,18 +94,6 @@ static void dump_xrt_error(xrtDeviceHandle xrtDevice, xrtErrorCode err) {
|
|||
}
|
||||
#endif
|
||||
|
||||
static int get_platform_info(const std::string &device_name,
|
||||
platform_info_t *platform_info) {
|
||||
for (size_t i = 0; i < (sizeof(g_platforms) / sizeof(platform_info_t)); ++i) {
|
||||
auto &platform = g_platforms[i];
|
||||
if (device_name.rfind(platform.prefix_name, 0) == 0) {
|
||||
*platform_info = platform;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device {
|
||||
|
@ -181,58 +150,6 @@ public:
|
|||
auto xclbin = xrt::xclbin(xlbin_path_s);
|
||||
auto device_name = xrtDevice.get_info<xrt::info::device::name>();
|
||||
|
||||
/*{
|
||||
uint32_t num_banks = 0;
|
||||
uint64_t bank_size = 0;
|
||||
uint64_t mem_base = 0;
|
||||
|
||||
auto mem_json =
|
||||
nlohmann::json::parse(xrtDevice.get_info<xrt::info::device::memory>()); if
|
||||
(!mem_json.is_null()) { uint32_t index = 0; for (auto& mem :
|
||||
mem_json["board"]["memory"]["memories"]) { auto enabled =
|
||||
mem["enabled"].get<std::string>(); if (enabled == "true") { if (index == 0)
|
||||
{ mem_base = std::stoull(mem["base_address"].get<std::string>(), nullptr,
|
||||
16); bank_size = std::stoull(mem["range_bytes"].get<std::string>(), nullptr,
|
||||
16);
|
||||
}
|
||||
++index;
|
||||
}
|
||||
}
|
||||
num_banks = index;
|
||||
}
|
||||
|
||||
fprintf(stderr, "[VXDRV] memory description: base=0x%lx, size=0x%lx,
|
||||
count=%d\n", mem_base, bank_size, num_banks);
|
||||
}*/
|
||||
|
||||
/*{
|
||||
std::cout << "Device" << device_index << " : " <<
|
||||
xrtDevice.get_info<xrt::info::device::name>() << std::endl; std::cout << "
|
||||
bdf : " << xrtDevice.get_info<xrt::info::device::bdf>() << std::endl;
|
||||
std::cout << " kdma : " <<
|
||||
xrtDevice.get_info<xrt::info::device::kdma>() << std::endl; std::cout << "
|
||||
max_freq : " <<
|
||||
xrtDevice.get_info<xrt::info::device::max_clock_frequency_mhz>() <<
|
||||
std::endl; std::cout << " memory : " <<
|
||||
xrtDevice.get_info<xrt::info::device::memory>() << std::endl; std::cout << "
|
||||
thermal : " << xrtDevice.get_info<xrt::info::device::thermal>() <<
|
||||
std::endl; std::cout << " m2m : " << std::boolalpha <<
|
||||
xrtDevice.get_info<xrt::info::device::m2m>() << std::dec << std::endl;
|
||||
std::cout << " nodma : " << std::boolalpha <<
|
||||
xrtDevice.get_info<xrt::info::device::nodma>() << std::dec << std::endl;
|
||||
|
||||
std::cout << "Memory info :" << std::endl;
|
||||
for (const auto& mem_bank : xclbin.get_mems()) {
|
||||
std::cout << " index : " << mem_bank.get_index() << std::endl;
|
||||
std::cout << " tag : " << mem_bank.get_tag() << std::endl;
|
||||
std::cout << " type : " << (int)mem_bank.get_type() << std::endl;
|
||||
std::cout << " base_address : 0x" << std::hex <<
|
||||
mem_bank.get_base_address() << std::endl; std::cout << " size : 0x" <<
|
||||
(mem_bank.get_size_kb() * 1000) << std::dec << std::endl; std::cout << "
|
||||
used :" << mem_bank.get_used() << std::endl;
|
||||
}
|
||||
}*/
|
||||
|
||||
#else
|
||||
|
||||
CHECK_HANDLE(xrtDevice, xrtDeviceOpen(device_index), {
|
||||
|
@ -275,11 +192,6 @@ public:
|
|||
|
||||
printf("info: device name=%s.\n", device_name.c_str());
|
||||
|
||||
CHECK_ERR(get_platform_info(device_name, &platform_), {
|
||||
fprintf(stderr, "[VXDRV] Error: platform not supported: %s\n", device_name.c_str());
|
||||
return err;
|
||||
});
|
||||
|
||||
CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_RESET), {
|
||||
return err;
|
||||
});
|
||||
|
@ -300,36 +212,13 @@ public:
|
|||
return err;
|
||||
});
|
||||
|
||||
uint32_t num_banks = 1 << platform_.lg2_num_banks;
|
||||
uint64_t bank_size = 1ull << platform_.lg2_bank_size;
|
||||
uint64_t num_banks;
|
||||
this->get_caps(VX_CAPS_NUM_MEM_BANKS, &num_banks);
|
||||
lg2_num_banks_ = log2ceil(num_banks);
|
||||
|
||||
// adjust memory banks allocation to architecture limit
|
||||
int isa_arch = VX_ISA_ARCH(isa_caps_);
|
||||
if (isa_arch == 32) {
|
||||
uint64_t max_mem_size = 1ull << 32;
|
||||
uint32_t need_num_banks = max_mem_size / bank_size;
|
||||
if (num_banks > need_num_banks) {
|
||||
printf("info: adjusted number of banks from %d to %d.\n", num_banks, need_num_banks);
|
||||
num_banks = need_num_banks;
|
||||
platform_.lg2_num_banks = log2ceil(num_banks);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < num_banks; ++i) {
|
||||
uint32_t reg_addr = MMIO_MEM_ADDR + (i * 12);
|
||||
uint64_t reg_value = platform_.mem_base + i * bank_size;
|
||||
|
||||
CHECK_ERR(this->write_register(reg_addr, reg_value & 0xffffffff), {
|
||||
return err;
|
||||
});
|
||||
|
||||
CHECK_ERR(this->write_register(reg_addr + 4, (reg_value >> 32) & 0xffffffff), {
|
||||
return err;
|
||||
});
|
||||
#ifndef BANK_INTERLEAVE
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
uint64_t bank_size;
|
||||
this->get_caps(VX_CAPS_MEM_BANK_SIZE, &bank_size);
|
||||
lg2_bank_size_ = log2ceil(bank_size);
|
||||
|
||||
global_mem_size_ = num_banks * bank_size;
|
||||
|
||||
|
@ -418,7 +307,10 @@ public:
|
|||
_value = isa_caps_;
|
||||
break;
|
||||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = MEMORY_BANKS;
|
||||
_value = 1 << ((dev_caps_ >> 48) & 0x7);
|
||||
break;
|
||||
case VX_CAPS_MEM_BANK_SIZE:
|
||||
_value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f));
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
|
||||
|
@ -734,23 +626,23 @@ private:
|
|||
MemoryAllocator global_mem_;
|
||||
xrt_device_t xrtDevice_;
|
||||
xrt_kernel_t xrtKernel_;
|
||||
platform_info_t platform_;
|
||||
uint64_t dev_caps_;
|
||||
uint64_t isa_caps_;
|
||||
uint64_t global_mem_size_;
|
||||
DeviceConfig dcrs_;
|
||||
std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_;
|
||||
uint32_t lg2_num_banks_;
|
||||
uint32_t lg2_bank_size_;
|
||||
|
||||
#ifdef BANK_INTERLEAVE
|
||||
|
||||
std::vector<xrt_buffer_t> xrtBuffers_;
|
||||
|
||||
int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) {
|
||||
uint32_t num_banks = 1 << platform_.lg2_num_banks;
|
||||
uint32_t num_banks = 1 << lg2_num_banks_;
|
||||
uint64_t block_addr = addr / CACHE_BLOCK_SIZE;
|
||||
uint32_t index = block_addr & (num_banks - 1);
|
||||
uint64_t offset =
|
||||
(block_addr >> platform_.lg2_num_banks) * CACHE_BLOCK_SIZE;
|
||||
uint64_t offset = (block_addr >> lg2_num_banks_) * CACHE_BLOCK_SIZE;
|
||||
if (pIdx) {
|
||||
*pIdx = index;
|
||||
}
|
||||
|
@ -778,9 +670,9 @@ private:
|
|||
std::unordered_map<uint32_t, buf_cnt_t> xrtBuffers_;
|
||||
|
||||
int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) {
|
||||
uint32_t num_banks = 1 << platform_.lg2_num_banks;
|
||||
uint64_t bank_size = 1ull << platform_.lg2_bank_size;
|
||||
uint32_t index = addr >> platform_.lg2_bank_size;
|
||||
uint32_t num_banks = 1 << lg2_num_banks_;
|
||||
uint64_t bank_size = 1ull << lg2_bank_size_;
|
||||
uint32_t index = addr >> lg2_bank_size_;
|
||||
uint64_t offset = addr & (bank_size - 1);
|
||||
if (index > num_banks) {
|
||||
fprintf(stderr, "[VXDRV] Error: address out of range: 0x%lx\n", addr);
|
||||
|
@ -807,7 +699,7 @@ private:
|
|||
}
|
||||
} else {
|
||||
printf("allocating bank%d...\n", bank_id);
|
||||
uint64_t bank_size = 1ull << platform_.lg2_bank_size;
|
||||
uint64_t bank_size = 1ull << lg2_bank_size_;
|
||||
#ifdef CPP_API
|
||||
xrt::bo xrtBuffer(xrtDevice_, bank_size, xrt::bo::flags::normal, bank_id);
|
||||
#else
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -20,30 +20,58 @@ constexpr uint32_t count_leading_zeros(uint32_t value) {
|
|||
return value ? __builtin_clz(value) : 32;
|
||||
}
|
||||
|
||||
constexpr uint32_t count_leading_zeros(uint64_t value) {
|
||||
return value ? __builtin_clzll(value) : 64;
|
||||
}
|
||||
|
||||
constexpr uint32_t count_trailing_zeros(uint32_t value) {
|
||||
return value ? __builtin_ctz(value) : 32;
|
||||
}
|
||||
|
||||
constexpr uint32_t count_trailing_zeros(uint64_t value) {
|
||||
return value ? __builtin_ctzll(value) : 64;
|
||||
}
|
||||
|
||||
constexpr bool ispow2(uint32_t value) {
|
||||
return value && !(value & (value - 1));
|
||||
}
|
||||
|
||||
constexpr bool ispow2(uint64_t value) {
|
||||
return value && !(value & (value - 1));
|
||||
}
|
||||
|
||||
constexpr uint32_t log2ceil(uint32_t value) {
|
||||
return 32 - count_leading_zeros(value - 1);
|
||||
}
|
||||
|
||||
constexpr uint32_t log2ceil(uint64_t value) {
|
||||
return 64 - count_leading_zeros(value - 1);
|
||||
}
|
||||
|
||||
inline unsigned log2up(uint32_t value) {
|
||||
return std::max<uint32_t>(1, log2ceil(value));
|
||||
}
|
||||
|
||||
inline unsigned log2up(uint64_t value) {
|
||||
return std::max<uint32_t>(1, log2ceil(value));
|
||||
}
|
||||
|
||||
constexpr unsigned log2floor(uint32_t value) {
|
||||
return 31 - count_leading_zeros(value);
|
||||
}
|
||||
|
||||
constexpr unsigned log2floor(uint64_t value) {
|
||||
return 63 - count_leading_zeros(value);
|
||||
}
|
||||
|
||||
constexpr unsigned ceil2(uint32_t value) {
|
||||
return 32 - count_leading_zeros(value);
|
||||
}
|
||||
|
||||
constexpr unsigned ceil2(uint64_t value) {
|
||||
return 64 - count_leading_zeros(value);
|
||||
}
|
||||
|
||||
inline uint64_t bit_clr(uint64_t bits, uint32_t index) {
|
||||
assert(index <= 63);
|
||||
return bits & ~(1ull << index);
|
||||
|
@ -86,7 +114,7 @@ template <typename T = uint32_t>
|
|||
T sext(const T& word, uint32_t width) {
|
||||
assert(width > 1);
|
||||
assert(width <= (sizeof(T) * 8));
|
||||
if (width == (sizeof(T) * 8))
|
||||
if (width == (sizeof(T) * 8))
|
||||
return word;
|
||||
T mask((static_cast<T>(1) << width) - 1);
|
||||
return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : (word & mask);
|
||||
|
@ -96,7 +124,7 @@ template <typename T = uint32_t>
|
|||
T zext(const T& word, uint32_t width) {
|
||||
assert(width > 1);
|
||||
assert(width <= (sizeof(T) * 8));
|
||||
if (width == (sizeof(T) * 8))
|
||||
if (width == (sizeof(T) * 8))
|
||||
return word;
|
||||
T mask((static_cast<T>(1) << width) - 1);
|
||||
return word & mask;
|
||||
|
|
|
@ -71,13 +71,14 @@ public:
|
|||
|
||||
// Check if the reservation is within memory capacity bounds
|
||||
if (addr + size > capacity_) {
|
||||
printf("error: address range out of bounds\n");
|
||||
printf("error: address range out of bounds - requested=0x%lx, capacity=0x%lx\n", (addr + size), capacity_);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Ensure the reservation does not overlap with existing pages
|
||||
if (hasPageOverlap(addr, size)) {
|
||||
printf("error: address range overlaps with existing allocation\n");
|
||||
uint64_t overlapStart, overlapEnd;
|
||||
if (hasPageOverlap(addr, size, &overlapStart, &overlapEnd)) {
|
||||
printf("error: address range overlaps with existing allocation - requested=[0x%lx-0x%lx], existing=[0x%lx, 0x%lx]\n", addr, addr+size, overlapStart, overlapEnd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -509,15 +510,15 @@ private:
|
|||
return false;
|
||||
}
|
||||
|
||||
bool hasPageOverlap(uint64_t start, uint64_t size) {
|
||||
bool hasPageOverlap(uint64_t start, uint64_t size, uint64_t* overlapStart, uint64_t* overlapEnd) {
|
||||
page_t* current = pages_;
|
||||
while (current != nullptr) {
|
||||
uint64_t pageStart = current->addr;
|
||||
uint64_t pageEnd = pageStart + current->size;
|
||||
uint64_t requestEnd = start + size;
|
||||
if ((start >= pageStart && start < pageEnd) || // Start of request is inside the page
|
||||
(requestEnd > pageStart && requestEnd <= pageEnd) || // End of request is inside the page
|
||||
(start <= pageStart && requestEnd >= pageEnd)) { // Request envelops the page
|
||||
uint64_t end = start + size;
|
||||
if ((start <= pageEnd) && (end >= pageStart)) {
|
||||
*overlapStart = pageStart;
|
||||
*overlapEnd = pageEnd;
|
||||
return true;
|
||||
}
|
||||
current = current->next;
|
||||
|
|
|
@ -32,18 +32,21 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
|
|||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
|
||||
|
||||
# AFU parameters
|
||||
CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
||||
ifeq ($(XLEN),64)
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41
|
||||
else
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25
|
||||
endif
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4
|
||||
endif
|
||||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
#include <unordered_map>
|
||||
#include <util.h>
|
||||
|
||||
#define PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH/8)
|
||||
#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8)
|
||||
|
||||
#ifndef MEM_CLOCK_RATIO
|
||||
#define MEM_CLOCK_RATIO 1
|
||||
|
@ -145,6 +145,9 @@ public:
|
|||
// allocate RAM
|
||||
ram_ = new RAM(0, RAM_PAGE_SIZE);
|
||||
|
||||
// calculate memory bank size
|
||||
mem_bank_size_ = (1ull << PLATFORM_MEMORY_ADDR_WIDTH) * PLATFORM_MEMORY_DATA_SIZE;
|
||||
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
|
@ -406,14 +409,14 @@ private:
|
|||
}
|
||||
|
||||
void avs_bus_reset() {
|
||||
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
device_->avs_readdatavalid[b] = 0;
|
||||
device_->avs_waitrequest[b] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void avs_bus_eval() {
|
||||
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
// process memory responses
|
||||
device_->avs_readdatavalid[b] = 0;
|
||||
if (!pending_mem_reqs_[b].empty()
|
||||
|
@ -421,7 +424,7 @@ private:
|
|||
auto mem_rd_it = pending_mem_reqs_[b].begin();
|
||||
auto mem_req = *mem_rd_it;
|
||||
device_->avs_readdatavalid[b] = 1;
|
||||
memcpy(device_->avs_readdata[b], mem_req->data.data(), PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
|
||||
memcpy(device_->avs_readdata[b], mem_req->data.data(), PLATFORM_MEMORY_DATA_SIZE);
|
||||
uint32_t addr = mem_req->addr;
|
||||
pending_mem_reqs_[b].erase(mem_rd_it);
|
||||
delete mem_req;
|
||||
|
@ -429,19 +432,20 @@ private:
|
|||
|
||||
// process memory requests
|
||||
assert(!device_->avs_read[b] || !device_->avs_write[b]);
|
||||
uint64_t byte_addr = (uint64_t(device_->avs_address[b]) * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE;
|
||||
uint64_t byte_addr = b * mem_bank_size_ + uint64_t(device_->avs_address[b]) * PLATFORM_MEMORY_DATA_SIZE;
|
||||
if (device_->avs_write[b]) {
|
||||
// process write request
|
||||
uint64_t byteen = device_->avs_byteenable[b];
|
||||
uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data());
|
||||
for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) {
|
||||
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[byte_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, data=0x", timestamp, b, byte_addr);
|
||||
for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) {
|
||||
printf("%02x", data[(PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE-1)-i]);
|
||||
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, byteen=0x%lx, data=0x", timestamp, b, byte_addr, byteen);
|
||||
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", data[i]);
|
||||
}
|
||||
printf("\n");*/
|
||||
|
||||
|
@ -455,22 +459,20 @@ private:
|
|||
dram_queue_.push(mem_req);
|
||||
} else
|
||||
if (device_->avs_read[b]) {
|
||||
// process read request
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->addr = device_->avs_address[b];
|
||||
mem_req->bank_id = b;
|
||||
ram_->read(mem_req->data.data(), byte_addr, PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
|
||||
ram_->read(mem_req->data.data(), byte_addr, PLATFORM_MEMORY_DATA_SIZE);
|
||||
mem_req->write = false;
|
||||
mem_req->ready = false;
|
||||
pending_mem_reqs_[b].emplace_back(mem_req);
|
||||
|
||||
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, mem_req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
|
||||
for (auto& req : pending_mem_reqs_[b]) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
|
||||
else
|
||||
printf(" %0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
|
||||
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, byte_addr);
|
||||
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", mem_req->data[i]);
|
||||
}
|
||||
printf("}\n");*/
|
||||
printf("\n");*/
|
||||
|
||||
// send dram request
|
||||
dram_queue_.push(mem_req);
|
||||
|
@ -481,7 +483,7 @@ private:
|
|||
}
|
||||
|
||||
typedef struct {
|
||||
std::array<uint8_t, PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE> data;
|
||||
std::array<uint8_t, PLATFORM_MEMORY_DATA_SIZE> data;
|
||||
uint32_t addr;
|
||||
uint32_t bank_id;
|
||||
bool write;
|
||||
|
@ -514,9 +516,10 @@ private:
|
|||
bool stop_;
|
||||
|
||||
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
|
||||
int64_t host_buffer_ids_;
|
||||
uint64_t host_buffer_ids_;
|
||||
uint64_t mem_bank_size_;
|
||||
|
||||
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_PARAM_LOCAL_MEMORY_BANKS];
|
||||
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
|
||||
|
||||
std::list<cci_rd_req_t> cci_reads_;
|
||||
std::list<cci_wr_req_t> cci_writes_;
|
||||
|
|
|
@ -78,22 +78,22 @@ module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; (
|
|||
output t_ccip_mmioData af2cp_sTxPort_c2_data,
|
||||
|
||||
// Avalon signals for local memory access
|
||||
output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output t_local_mem_addr avs_address [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
input logic avs_waitrequest [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output logic avs_write [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output logic avs_read [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
input avs_readdatavalid [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS]
|
||||
output t_local_mem_data avs_writedata [`PLATFORM_MEMORY_BANKS],
|
||||
input t_local_mem_data avs_readdata [`PLATFORM_MEMORY_BANKS],
|
||||
output t_local_mem_addr avs_address [`PLATFORM_MEMORY_BANKS],
|
||||
input logic avs_waitrequest [`PLATFORM_MEMORY_BANKS],
|
||||
output logic avs_write [`PLATFORM_MEMORY_BANKS],
|
||||
output logic avs_read [`PLATFORM_MEMORY_BANKS],
|
||||
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_MEMORY_BANKS],
|
||||
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_MEMORY_BANKS],
|
||||
input avs_readdatavalid [`PLATFORM_MEMORY_BANKS]
|
||||
);
|
||||
|
||||
t_if_ccip_Rx cp2af_sRxPort;
|
||||
t_if_ccip_Tx af2cp_sTxPort;
|
||||
|
||||
vortex_afu #(
|
||||
.NUM_LOCAL_MEM_BANKS(`PLATFORM_PARAM_LOCAL_MEMORY_BANKS)
|
||||
.NUM_LOCAL_MEM_BANKS(`PLATFORM_MEMORY_BANKS)
|
||||
) afu (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
|
|
|
@ -32,14 +32,21 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
|
|||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
|
||||
|
||||
# AFU parameters
|
||||
ifeq (,$(findstring M_AXI_MEM_NUM_BANKS,$(CONFIGS)))
|
||||
CONFIGS += -DM_AXI_MEM_NUM_BANKS=1
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
|
||||
endif
|
||||
ifeq (,$(findstring M_AXI_MEM_ADDR_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DM_AXI_MEM_ADDR_WIDTH=32
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
||||
ifeq ($(XLEN),64)
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41
|
||||
else
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25
|
||||
endif
|
||||
endif
|
||||
ifeq (,$(findstring M_AXI_MEM_DATA_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DM_AXI_MEM_DATA_WIDTH=512
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_OFFSET,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_OFFSET=0
|
||||
endif
|
||||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
|
|
@ -11,22 +11,22 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
`include "vortex_afu.vh"
|
||||
|
||||
module vortex_afu_shim #(
|
||||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8),
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
|
||||
) (
|
||||
// System signals
|
||||
input wire ap_clk,
|
||||
input wire ap_rst_n,
|
||||
|
||||
// AXI4 master interface
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
|
||||
// AXI4-Lite slave interface
|
||||
input wire s_axi_ctrl_awvalid,
|
||||
|
@ -50,35 +50,38 @@ module vortex_afu_shim #(
|
|||
output wire interrupt
|
||||
`IGNORE_WARNINGS_END
|
||||
);
|
||||
vortex_afu #(
|
||||
.C_S_AXI_CTRL_ADDR_WIDTH(C_S_AXI_CTRL_ADDR_WIDTH),
|
||||
.C_S_AXI_CTRL_DATA_WIDTH(C_S_AXI_CTRL_DATA_WIDTH),
|
||||
.C_M_AXI_MEM_ID_WIDTH(C_M_AXI_MEM_ID_WIDTH),
|
||||
.C_M_AXI_MEM_ADDR_WIDTH(C_M_AXI_MEM_ADDR_WIDTH),
|
||||
.C_M_AXI_MEM_DATA_WIDTH(C_M_AXI_MEM_DATA_WIDTH)
|
||||
) afu (
|
||||
.ap_clk(ap_clk),
|
||||
.ap_rst_n(ap_rst_n),
|
||||
// AXI4 master interface
|
||||
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
.s_axi_ctrl_awvalid(s_axi_ctrl_awvalid),
|
||||
.s_axi_ctrl_awready(s_axi_ctrl_awready),
|
||||
.s_axi_ctrl_awaddr(s_axi_ctrl_awaddr),
|
||||
.s_axi_ctrl_wvalid(s_axi_ctrl_wvalid),
|
||||
.s_axi_ctrl_wready(s_axi_ctrl_wready),
|
||||
.s_axi_ctrl_wdata(s_axi_ctrl_wdata),
|
||||
.s_axi_ctrl_wstrb(s_axi_ctrl_wstrb),
|
||||
.s_axi_ctrl_arvalid(s_axi_ctrl_arvalid),
|
||||
.s_axi_ctrl_arready(s_axi_ctrl_arready),
|
||||
.s_axi_ctrl_araddr(s_axi_ctrl_araddr),
|
||||
.s_axi_ctrl_rvalid(s_axi_ctrl_rvalid),
|
||||
.s_axi_ctrl_rready(s_axi_ctrl_rready),
|
||||
.s_axi_ctrl_rdata(s_axi_ctrl_rdata),
|
||||
.s_axi_ctrl_rresp(s_axi_ctrl_rresp),
|
||||
.s_axi_ctrl_bvalid(s_axi_ctrl_bvalid),
|
||||
.s_axi_ctrl_bready(s_axi_ctrl_bready),
|
||||
.s_axi_ctrl_bresp(s_axi_ctrl_bresp),
|
||||
.interrupt(interrupt)
|
||||
);
|
||||
VX_afu_wrap #(
|
||||
.C_S_AXI_CTRL_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
|
||||
.C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
|
||||
.C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
|
||||
.C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
|
||||
.C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
|
||||
.C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
|
||||
) afu_wrap (
|
||||
.clk (ap_clk),
|
||||
.reset (~ap_rst_n),
|
||||
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
|
||||
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
|
||||
.s_axi_ctrl_awready (s_axi_ctrl_awready),
|
||||
.s_axi_ctrl_awaddr (s_axi_ctrl_awaddr),
|
||||
.s_axi_ctrl_wvalid (s_axi_ctrl_wvalid),
|
||||
.s_axi_ctrl_wready (s_axi_ctrl_wready),
|
||||
.s_axi_ctrl_wdata (s_axi_ctrl_wdata),
|
||||
.s_axi_ctrl_wstrb (s_axi_ctrl_wstrb),
|
||||
.s_axi_ctrl_arvalid (s_axi_ctrl_arvalid),
|
||||
.s_axi_ctrl_arready (s_axi_ctrl_arready),
|
||||
.s_axi_ctrl_araddr (s_axi_ctrl_araddr),
|
||||
.s_axi_ctrl_rvalid (s_axi_ctrl_rvalid),
|
||||
.s_axi_ctrl_rready (s_axi_ctrl_rready),
|
||||
.s_axi_ctrl_rdata (s_axi_ctrl_rdata),
|
||||
.s_axi_ctrl_rresp (s_axi_ctrl_rresp),
|
||||
.s_axi_ctrl_bvalid (s_axi_ctrl_bvalid),
|
||||
.s_axi_ctrl_bready (s_axi_ctrl_bready),
|
||||
.s_axi_ctrl_bresp (s_axi_ctrl_bresp),
|
||||
|
||||
.interrupt (interrupt)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
|
||||
#include <iostream>
|
||||
|
||||
#define M_AXI_MEM_DATA_SIZE (M_AXI_MEM_DATA_WIDTH/8)
|
||||
#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8)
|
||||
|
||||
#ifndef MEM_CLOCK_RATIO
|
||||
#define MEM_CLOCK_RATIO 1
|
||||
|
@ -59,10 +59,24 @@
|
|||
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
|
||||
#define MEM_BANK_SIZE (1ull << M_AXI_MEM_ADDR_WIDTH)
|
||||
|
||||
#define CPU_GPU_LATENCY 200
|
||||
|
||||
#if PLATFORM_MEMORY_ADDR_WIDTH > 32
|
||||
typedef QData Vl_m_addr_t;
|
||||
#else
|
||||
typedef IData Vl_m_addr_t;
|
||||
#endif
|
||||
|
||||
#if PLATFORM_MEMORY_DATA_WIDTH > 64
|
||||
typedef VlWide<(PLATFORM_MEMORY_DATA_WIDTH/32)> Vl_m_data_t;
|
||||
#else
|
||||
#if PLATFORM_MEMORY_DATA_WIDTH > 32
|
||||
typedef QData Vl_m_data_t;
|
||||
#else
|
||||
typedef IData Vl_m_data_t;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
static uint64_t timestamp = 0;
|
||||
|
@ -134,7 +148,7 @@ public:
|
|||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
delete mem_alloc_[i];
|
||||
}
|
||||
if (ram_) {
|
||||
|
@ -169,15 +183,18 @@ public:
|
|||
tfp_->open("trace.vcd");
|
||||
#endif
|
||||
|
||||
// calculate memory bank size
|
||||
mem_bank_size_ = ((1ull << PLATFORM_MEMORY_ADDR_WIDTH) / PLATFORM_MEMORY_BANKS) * PLATFORM_MEMORY_DATA_SIZE;
|
||||
|
||||
// allocate RAM
|
||||
ram_ = new RAM(0, RAM_PAGE_SIZE);
|
||||
|
||||
// initialize AXI memory interfaces
|
||||
MP_M_AXI_MEM(M_AXI_MEM_NUM_BANKS);
|
||||
MP_M_AXI_MEM(PLATFORM_MEMORY_BANKS);
|
||||
|
||||
// initialize memory allocator
|
||||
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
|
||||
mem_alloc_[i] = new MemoryAllocator(0, MEM_BANK_SIZE, 4096, 64);
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
mem_alloc_[i] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
|
||||
}
|
||||
|
||||
// reset the device
|
||||
|
@ -198,13 +215,13 @@ public:
|
|||
}
|
||||
|
||||
int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) {
|
||||
if (bank_id >= M_AXI_MEM_NUM_BANKS)
|
||||
if (bank_id >= PLATFORM_MEMORY_BANKS)
|
||||
return -1;
|
||||
return mem_alloc_[bank_id]->allocate(size, addr);
|
||||
}
|
||||
|
||||
int mem_free(uint32_t bank_id, uint64_t addr) {
|
||||
if (bank_id >= M_AXI_MEM_NUM_BANKS)
|
||||
if (bank_id >= PLATFORM_MEMORY_BANKS)
|
||||
return -1;
|
||||
return mem_alloc_[bank_id]->release(addr);
|
||||
}
|
||||
|
@ -212,11 +229,11 @@ public:
|
|||
int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
if (bank_id >= M_AXI_MEM_NUM_BANKS)
|
||||
if (bank_id >= PLATFORM_MEMORY_BANKS)
|
||||
return -1;
|
||||
uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr;
|
||||
uint64_t base_addr = bank_id * mem_bank_size_ + addr;
|
||||
ram_->write(data, base_addr, size);
|
||||
/*printf("%0ld: [sim] xrt-mem-write: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size);
|
||||
/*printf("%0ld: [sim] xrt-mem-write: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size);
|
||||
for (int i = size-1; i >= 0; --i) {
|
||||
printf("%02x", ((const uint8_t*)data)[i]);
|
||||
}
|
||||
|
@ -227,11 +244,11 @@ public:
|
|||
int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
if (bank_id >= M_AXI_MEM_NUM_BANKS)
|
||||
if (bank_id >= PLATFORM_MEMORY_BANKS)
|
||||
return -1;
|
||||
uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr;
|
||||
uint64_t base_addr = bank_id * mem_bank_size_ + addr;
|
||||
ram_->read(data, base_addr, size);
|
||||
/*printf("%0ld: [sim] xrt-mem-read: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size);
|
||||
/*printf("%0ld: [sim] xrt-mem-read: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size);
|
||||
for (int i = size-1; i >= 0; --i) {
|
||||
printf("%02x", ((uint8_t*)data)[i]);
|
||||
}
|
||||
|
@ -307,7 +324,7 @@ private:
|
|||
reqs.clear();
|
||||
}
|
||||
|
||||
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
std::queue<mem_req_t*> empty;
|
||||
std::swap(dram_queues_[i], empty);
|
||||
}
|
||||
|
@ -334,7 +351,7 @@ private:
|
|||
void tick() {
|
||||
this->axi_mem_bus_eval();
|
||||
|
||||
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
if (!dram_queues_[i].empty()) {
|
||||
auto mem_req = dram_queues_[i].front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) {
|
||||
|
@ -394,7 +411,7 @@ private:
|
|||
}
|
||||
|
||||
void axi_mem_bus_reset() {
|
||||
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
// address read request
|
||||
*m_axi_mem_[i].arready = 1;
|
||||
|
||||
|
@ -418,7 +435,7 @@ private:
|
|||
}
|
||||
|
||||
void axi_mem_bus_eval() {
|
||||
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
// handle read responses
|
||||
if (m_axi_states_[i].read_rsp_pending && (*m_axi_mem_[i].rready)) {
|
||||
*m_axi_mem_[i].rvalid = 0;
|
||||
|
@ -434,7 +451,7 @@ private:
|
|||
*m_axi_mem_[i].rid = mem_rsp->tag;
|
||||
*m_axi_mem_[i].rresp = 0;
|
||||
*m_axi_mem_[i].rlast = 1;
|
||||
memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), M_AXI_MEM_DATA_SIZE);
|
||||
memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE);
|
||||
pending_mem_reqs_[i].erase(mem_rsp_it);
|
||||
m_axi_states_[i].read_rsp_pending = true;
|
||||
delete mem_rsp;
|
||||
|
@ -465,14 +482,14 @@ private:
|
|||
if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) {
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = *m_axi_mem_[i].arid;
|
||||
mem_req->addr = uint64_t(*m_axi_mem_[i].araddr) * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE;
|
||||
ram_->read(mem_req->data.data(), mem_req->addr, M_AXI_MEM_DATA_SIZE);
|
||||
mem_req->addr = i * mem_bank_size_ + uint64_t(*m_axi_mem_[i].araddr) * PLATFORM_MEMORY_DATA_SIZE;
|
||||
ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE);
|
||||
mem_req->write = false;
|
||||
mem_req->ready = false;
|
||||
pending_mem_reqs_[i].emplace_back(mem_req);
|
||||
|
||||
/*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag);
|
||||
for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) {
|
||||
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", mem_req->data[i]);
|
||||
}
|
||||
printf("\n");*/
|
||||
|
@ -494,9 +511,9 @@ private:
|
|||
|
||||
auto byteen = *m_axi_mem_[i].wstrb;
|
||||
auto data = (uint8_t*)m_axi_mem_[i].wdata->data();
|
||||
auto byte_addr = m_axi_states_[i].write_req_addr * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE;
|
||||
auto byte_addr = i * mem_bank_size_ + m_axi_states_[i].write_req_addr * PLATFORM_MEMORY_DATA_SIZE;
|
||||
|
||||
for (int i = 0; i < M_AXI_MEM_DATA_SIZE; i++) {
|
||||
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[byte_addr + i] = data[i];
|
||||
}
|
||||
|
@ -510,7 +527,7 @@ private:
|
|||
pending_mem_reqs_[i].emplace_back(mem_req);
|
||||
|
||||
/*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag);
|
||||
for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) {
|
||||
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", data[i]);
|
||||
}
|
||||
printf("\n");*/
|
||||
|
@ -535,7 +552,7 @@ private:
|
|||
} m_axi_state_t;
|
||||
|
||||
typedef struct {
|
||||
std::array<uint8_t, M_AXI_MEM_DATA_SIZE> data;
|
||||
std::array<uint8_t, PLATFORM_MEMORY_DATA_SIZE> data;
|
||||
uint32_t tag;
|
||||
uint64_t addr;
|
||||
bool write;
|
||||
|
@ -545,22 +562,22 @@ private:
|
|||
typedef struct {
|
||||
CData* awvalid;
|
||||
CData* awready;
|
||||
QData* awaddr;
|
||||
Vl_m_addr_t* awaddr;
|
||||
IData* awid;
|
||||
CData* awlen;
|
||||
CData* wvalid;
|
||||
CData* wready;
|
||||
VlWide<16>* wdata;
|
||||
Vl_m_data_t* wdata;
|
||||
QData* wstrb;
|
||||
CData* wlast;
|
||||
CData* arvalid;
|
||||
CData* arready;
|
||||
QData* araddr;
|
||||
Vl_m_addr_t* araddr;
|
||||
IData* arid;
|
||||
CData* arlen;
|
||||
CData* rvalid;
|
||||
CData* rready;
|
||||
VlWide<16>* rdata;
|
||||
Vl_m_data_t* rdata;
|
||||
CData* rlast;
|
||||
IData* rid;
|
||||
CData* rresp;
|
||||
|
@ -573,21 +590,22 @@ private:
|
|||
Vvortex_afu_shim* device_;
|
||||
RAM* ram_;
|
||||
DramSim dram_sim_;
|
||||
uint64_t mem_bank_size_;
|
||||
|
||||
std::future<void> future_;
|
||||
bool stop_;
|
||||
|
||||
std::mutex mutex_;
|
||||
|
||||
std::list<mem_req_t*> pending_mem_reqs_[M_AXI_MEM_NUM_BANKS];
|
||||
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
|
||||
|
||||
m_axi_mem_t m_axi_mem_[M_AXI_MEM_NUM_BANKS];
|
||||
m_axi_mem_t m_axi_mem_[PLATFORM_MEMORY_BANKS];
|
||||
|
||||
MemoryAllocator* mem_alloc_[M_AXI_MEM_NUM_BANKS];
|
||||
MemoryAllocator* mem_alloc_[PLATFORM_MEMORY_BANKS];
|
||||
|
||||
m_axi_state_t m_axi_states_[M_AXI_MEM_NUM_BANKS];
|
||||
m_axi_state_t m_axi_states_[PLATFORM_MEMORY_BANKS];
|
||||
|
||||
std::queue<mem_req_t*> dram_queues_[M_AXI_MEM_NUM_BANKS];
|
||||
std::queue<mem_req_t*> dram_queues_[PLATFORM_MEMORY_BANKS];
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC* tfp_;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue