opaesim and xrtsim multi-bank memory support

This commit is contained in:
Blaise Tine 2024-09-22 03:54:40 -07:00
parent 00feb8b424
commit b8199decf4
27 changed files with 488 additions and 464 deletions

View file

@ -273,10 +273,11 @@ config2()
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8 CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
# test single-bank DRAM # test single-bank DRAM
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress CONFIGS="-DPLATFORM_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
# test 27-bit DRAM address # test 27-bit DRAM address
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=xrt --app=mstress
echo "configuration-2 tests done!" echo "configuration-2 tests done!"
} }

View file

@ -15,7 +15,7 @@
module Vortex_axi import VX_gpu_pkg::*; #( module Vortex_axi import VX_gpu_pkg::*; #(
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH, parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH, parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH + (`VX_MEM_DATA_WIDTH/8),
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH, parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
parameter AXI_NUM_BANKS = 1 parameter AXI_NUM_BANKS = 1
)( )(
@ -83,10 +83,9 @@ module Vortex_axi import VX_gpu_pkg::*; #(
output wire busy output wire busy
); );
localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH; localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH;
localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + `CLOG2(`VX_MEM_DATA_WIDTH) - `CLOG2(AXI_DATA_WIDTH);
`STATIC_ASSERT((AXI_DATA_WIDTH == `VX_MEM_DATA_WIDTH), ("invalid memory data size: current=%0d, expected=%0d", AXI_DATA_WIDTH, `VX_MEM_DATA_WIDTH)) `STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag width: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH))
`STATIC_ASSERT((AXI_ADDR_WIDTH >= `MEM_ADDR_WIDTH), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH))
`STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH))
wire mem_req_valid; wire mem_req_valid;
wire mem_req_rw; wire mem_req_rw;
@ -101,94 +100,6 @@ module Vortex_axi import VX_gpu_pkg::*; #(
wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag; wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag;
wire mem_rsp_ready; wire mem_rsp_ready;
wire [`MEM_ADDR_WIDTH-1:0] m_axi_awaddr_unqual [AXI_NUM_BANKS];
wire [`MEM_ADDR_WIDTH-1:0] m_axi_araddr_unqual [AXI_NUM_BANKS];
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_awid_unqual [AXI_NUM_BANKS];
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_arid_unqual [AXI_NUM_BANKS];
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual [AXI_NUM_BANKS];
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual [AXI_NUM_BANKS];
for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin : g_padding
assign m_axi_awaddr[i] = `MEM_ADDR_WIDTH'(m_axi_awaddr_unqual[i]);
assign m_axi_araddr[i] = `MEM_ADDR_WIDTH'(m_axi_araddr_unqual[i]);
assign m_axi_awid[i] = AXI_TID_WIDTH'(m_axi_awid_unqual[i]);
assign m_axi_arid[i] = AXI_TID_WIDTH'(m_axi_arid_unqual[i]);
assign m_axi_rid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_rid[i]);
assign m_axi_bid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_bid[i]);
end
VX_axi_adapter #(
.DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.ADDR_WIDTH (`MEM_ADDR_WIDTH),
.TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.NUM_BANKS (AXI_NUM_BANKS),
.RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0)
) axi_adapter (
.clk (clk),
.reset (reset),
.mem_req_valid (mem_req_valid),
.mem_req_rw (mem_req_rw),
.mem_req_byteen (mem_req_byteen),
.mem_req_addr (mem_req_addr),
.mem_req_data (mem_req_data),
.mem_req_tag (mem_req_tag),
.mem_req_ready (mem_req_ready),
.mem_rsp_valid (mem_rsp_valid),
.mem_rsp_data (mem_rsp_data),
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_ready (mem_rsp_ready),
.m_axi_awvalid (m_axi_awvalid),
.m_axi_awready (m_axi_awready),
.m_axi_awaddr (m_axi_awaddr_unqual),
.m_axi_awid (m_axi_awid_unqual),
.m_axi_awlen (m_axi_awlen),
.m_axi_awsize (m_axi_awsize),
.m_axi_awburst (m_axi_awburst),
.m_axi_awlock (m_axi_awlock),
.m_axi_awcache (m_axi_awcache),
.m_axi_awprot (m_axi_awprot),
.m_axi_awqos (m_axi_awqos),
.m_axi_awregion (m_axi_awregion),
.m_axi_wvalid (m_axi_wvalid),
.m_axi_wready (m_axi_wready),
.m_axi_wdata (m_axi_wdata),
.m_axi_wstrb (m_axi_wstrb),
.m_axi_wlast (m_axi_wlast),
.m_axi_bvalid (m_axi_bvalid),
.m_axi_bready (m_axi_bready),
.m_axi_bid (m_axi_bid_unqual),
.m_axi_bresp (m_axi_bresp),
.m_axi_arvalid (m_axi_arvalid),
.m_axi_arready (m_axi_arready),
.m_axi_araddr (m_axi_araddr_unqual),
.m_axi_arid (m_axi_arid_unqual),
.m_axi_arlen (m_axi_arlen),
.m_axi_arsize (m_axi_arsize),
.m_axi_arburst (m_axi_arburst),
.m_axi_arlock (m_axi_arlock),
.m_axi_arcache (m_axi_arcache),
.m_axi_arprot (m_axi_arprot),
.m_axi_arqos (m_axi_arqos),
.m_axi_arregion (m_axi_arregion),
.m_axi_rvalid (m_axi_rvalid),
.m_axi_rready (m_axi_rready),
.m_axi_rdata (m_axi_rdata),
.m_axi_rlast (m_axi_rlast) ,
.m_axi_rid (m_axi_rid_unqual),
.m_axi_rresp (m_axi_rresp)
);
`SCOPE_IO_SWITCH (1) `SCOPE_IO_SWITCH (1)
Vortex vortex ( Vortex vortex (
@ -217,4 +128,127 @@ module Vortex_axi import VX_gpu_pkg::*; #(
.busy (busy) .busy (busy)
); );
wire mem_req_valid_a;
wire mem_req_rw_a;
wire [(AXI_DATA_WIDTH/8)-1:0] mem_req_byteen_a;
wire [VX_MEM_ADDR_A_WIDTH-1:0] mem_req_addr_a;
wire [AXI_DATA_WIDTH-1:0] mem_req_data_a;
wire [AXI_TID_WIDTH-1:0] mem_req_tag_a;
wire mem_req_ready_a;
wire mem_rsp_valid_a;
wire [AXI_DATA_WIDTH-1:0] mem_rsp_data_a;
wire [AXI_TID_WIDTH-1:0] mem_rsp_tag_a;
wire mem_rsp_ready_a;
VX_mem_adapter #(
.SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.DST_DATA_WIDTH (AXI_DATA_WIDTH),
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
.DST_ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH),
.SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.DST_TAG_WIDTH (AXI_TID_WIDTH),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) mem_adapter (
.clk (clk),
.reset (reset),
.mem_req_valid_in (mem_req_valid),
.mem_req_addr_in (mem_req_addr),
.mem_req_rw_in (mem_req_rw),
.mem_req_byteen_in (mem_req_byteen),
.mem_req_data_in (mem_req_data),
.mem_req_tag_in (mem_req_tag),
.mem_req_ready_in (mem_req_ready),
.mem_rsp_valid_in (mem_rsp_valid),
.mem_rsp_data_in (mem_rsp_data),
.mem_rsp_tag_in (mem_rsp_tag),
.mem_rsp_ready_in (mem_rsp_ready),
.mem_req_valid_out (mem_req_valid_a),
.mem_req_addr_out (mem_req_addr_a),
.mem_req_rw_out (mem_req_rw_a),
.mem_req_byteen_out (mem_req_byteen_a),
.mem_req_data_out (mem_req_data_a),
.mem_req_tag_out (mem_req_tag_a),
.mem_req_ready_out (mem_req_ready_a),
.mem_rsp_valid_out (mem_rsp_valid_a),
.mem_rsp_data_out (mem_rsp_data_a),
.mem_rsp_tag_out (mem_rsp_tag_a),
.mem_rsp_ready_out (mem_rsp_ready_a)
);
VX_axi_adapter #(
.DATA_WIDTH (AXI_DATA_WIDTH),
.ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH),
.TAG_WIDTH (AXI_TID_WIDTH),
.NUM_BANKS (AXI_NUM_BANKS),
.AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
.BANK_INTERLEAVE (0),
.RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0)
) axi_adapter (
.clk (clk),
.reset (reset),
.mem_req_valid (mem_req_valid_a),
.mem_req_rw (mem_req_rw_a),
.mem_req_byteen (mem_req_byteen_a),
.mem_req_addr (mem_req_addr_a),
.mem_req_data (mem_req_data_a),
.mem_req_tag (mem_req_tag_a),
.mem_req_ready (mem_req_ready_a),
.mem_rsp_valid (mem_rsp_valid_a),
.mem_rsp_data (mem_rsp_data_a),
.mem_rsp_tag (mem_rsp_tag_a),
.mem_rsp_ready (mem_rsp_ready_a),
.m_axi_awvalid (m_axi_awvalid),
.m_axi_awready (m_axi_awready),
.m_axi_awaddr (m_axi_awaddr),
.m_axi_awid (m_axi_awid),
.m_axi_awlen (m_axi_awlen),
.m_axi_awsize (m_axi_awsize),
.m_axi_awburst (m_axi_awburst),
.m_axi_awlock (m_axi_awlock),
.m_axi_awcache (m_axi_awcache),
.m_axi_awprot (m_axi_awprot),
.m_axi_awqos (m_axi_awqos),
.m_axi_awregion (m_axi_awregion),
.m_axi_wvalid (m_axi_wvalid),
.m_axi_wready (m_axi_wready),
.m_axi_wdata (m_axi_wdata),
.m_axi_wstrb (m_axi_wstrb),
.m_axi_wlast (m_axi_wlast),
.m_axi_bvalid (m_axi_bvalid),
.m_axi_bready (m_axi_bready),
.m_axi_bid (m_axi_bid),
.m_axi_bresp (m_axi_bresp),
.m_axi_arvalid (m_axi_arvalid),
.m_axi_arready (m_axi_arready),
.m_axi_araddr (m_axi_araddr),
.m_axi_arid (m_axi_arid),
.m_axi_arlen (m_axi_arlen),
.m_axi_arsize (m_axi_arsize),
.m_axi_arburst (m_axi_arburst),
.m_axi_arlock (m_axi_arlock),
.m_axi_arcache (m_axi_arcache),
.m_axi_arprot (m_axi_arprot),
.m_axi_arqos (m_axi_arqos),
.m_axi_arregion (m_axi_arregion),
.m_axi_rvalid (m_axi_rvalid),
.m_axi_rready (m_axi_rready),
.m_axi_rdata (m_axi_rdata),
.m_axi_rlast (m_axi_rlast),
.m_axi_rid (m_axi_rid),
.m_axi_rresp (m_axi_rresp)
);
endmodule endmodule

View file

@ -30,7 +30,17 @@
//`include "platform_afu_top_config.vh" //`include "platform_afu_top_config.vh"
`ifdef PLATFORM_PROVIDES_LOCAL_MEMORY `ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH `PLATFORM_MEMORY_ADDR_WIDTH
`endif
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH `PLATFORM_MEMORY_DATA_WIDTH
`endif
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH `PLATFORM_MEMORY_BURST_CNT_WIDTH
`endif
package local_mem_cfg_pkg; package local_mem_cfg_pkg;
@ -57,5 +67,3 @@ package local_mem_cfg_pkg;
typedef logic [LOCAL_MEM_DATA_N_BYTES-1:0] t_local_mem_byte_mask; typedef logic [LOCAL_MEM_DATA_N_BYTES-1:0] t_local_mem_byte_mask;
endpackage // local_mem_cfg_pkg endpackage // local_mem_cfg_pkg
`endif // PLATFORM_PROVIDES_LOCAL_MEMORY

View file

@ -42,7 +42,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
); );
localparam LMEM_DATA_WIDTH = $bits(t_local_mem_data); localparam LMEM_DATA_WIDTH = $bits(t_local_mem_data);
localparam LMEM_DATA_SIZE = LMEM_DATA_WIDTH / 8; localparam LMEM_DATA_SIZE = LMEM_DATA_WIDTH / 8;
localparam LMEM_ADDR_WIDTH = $bits(t_local_mem_addr); localparam LMEM_ADDR_WIDTH = `VX_MEM_ADDR_WIDTH + ($clog2(`VX_MEM_DATA_WIDTH) - $clog2(LMEM_DATA_WIDTH));
localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt); localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt);
localparam CCI_DATA_WIDTH = $bits(t_ccip_clData); localparam CCI_DATA_WIDTH = $bits(t_ccip_clData);
@ -96,9 +96,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
localparam STATE_DCR_WRITE = 4; localparam STATE_DCR_WRITE = 4;
localparam STATE_WIDTH = `CLOG2(STATE_DCR_WRITE+1); localparam STATE_WIDTH = `CLOG2(STATE_DCR_WRITE+1);
localparam BANK_BYTE_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + `CLOG2(`PLATFORM_MEMORY_DATA_WIDTH/8);
wire [127:0] afu_id = `AFU_ACCEL_UUID; wire [127:0] afu_id = `AFU_ACCEL_UUID;
wire [63:0] dev_caps = {16'b0, wire [63:0] dev_caps = {8'b0,
5'(BANK_BYTE_ADDR_WIDTH-16),
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
16'(`NUM_CORES * `NUM_CLUSTERS), 16'(`NUM_CORES * `NUM_CLUSTERS),
8'(`NUM_WARPS), 8'(`NUM_WARPS),
@ -601,6 +605,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.NUM_BANKS (NUM_LOCAL_MEM_BANKS), .NUM_BANKS (NUM_LOCAL_MEM_BANKS),
.TAG_WIDTH (AVS_REQ_TAGW + 1), .TAG_WIDTH (AVS_REQ_TAGW + 1),
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE), .RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE),
.AVS_ADDR_WIDTH($bits(t_local_mem_addr)),
.BANK_INTERLEAVE (1),
.REQ_OUT_BUF (2), .REQ_OUT_BUF (2),
.RSP_OUT_BUF (0) .RSP_OUT_BUF (0)
) avs_adapter ( ) avs_adapter (

View file

@ -14,21 +14,21 @@
`include "vortex_afu.vh" `include "vortex_afu.vh"
module VX_afu_ctrl #( module VX_afu_ctrl #(
parameter AXI_ADDR_WIDTH = 8, parameter S_AXI_ADDR_WIDTH = 8,
parameter AXI_DATA_WIDTH = 32, parameter S_AXI_DATA_WIDTH = 32,
parameter AXI_NUM_BANKS = 1 parameter M_AXI_ADDR_WIDTH = 25
) ( ) (
// axi4 lite slave signals // axi4 lite slave signals
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire s_axi_awvalid, input wire s_axi_awvalid,
input wire [AXI_ADDR_WIDTH-1:0] s_axi_awaddr, input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
output wire s_axi_awready, output wire s_axi_awready,
input wire s_axi_wvalid, input wire s_axi_wvalid,
input wire [AXI_DATA_WIDTH-1:0] s_axi_wdata, input wire [S_AXI_DATA_WIDTH-1:0] s_axi_wdata,
input wire [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb, input wire [S_AXI_DATA_WIDTH/8-1:0]s_axi_wstrb,
output wire s_axi_wready, output wire s_axi_wready,
output wire s_axi_bvalid, output wire s_axi_bvalid,
@ -36,11 +36,11 @@ module VX_afu_ctrl #(
input wire s_axi_bready, input wire s_axi_bready,
input wire s_axi_arvalid, input wire s_axi_arvalid,
input wire [AXI_ADDR_WIDTH-1:0] s_axi_araddr, input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_araddr,
output wire s_axi_arready, output wire s_axi_arready,
output wire s_axi_rvalid, output wire s_axi_rvalid,
output wire [AXI_DATA_WIDTH-1:0] s_axi_rdata, output wire [S_AXI_DATA_WIDTH-1:0] s_axi_rdata,
output wire [1:0] s_axi_rresp, output wire [1:0] s_axi_rresp,
input wire s_axi_rready, input wire s_axi_rready,
@ -56,8 +56,6 @@ module VX_afu_ctrl #(
output wire scope_bus_out, output wire scope_bus_out,
`endif `endif
output wire [63:0] mem_base [AXI_NUM_BANKS],
output wire dcr_wr_valid, output wire dcr_wr_valid,
output wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr, output wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
output wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data output wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data
@ -125,10 +123,6 @@ module VX_afu_ctrl #(
//ADDR_SCP_CTRL = 8'h3C, //ADDR_SCP_CTRL = 8'h3C,
`endif `endif
ADDR_MEM_0 = 8'h40,
ADDR_MEM_1 = 8'h44,
//ADDR_MEM_CTRL = 8'h48,
ADDR_BITS = 8; ADDR_BITS = 8;
localparam localparam
@ -144,7 +138,9 @@ module VX_afu_ctrl #(
RSTATE_WIDTH = 2; RSTATE_WIDTH = 2;
// device caps // device caps
wire [63:0] dev_caps = {16'b0, wire [63:0] dev_caps = {8'b0,
5'(M_AXI_ADDR_WIDTH-16),
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
16'(`NUM_CORES * `NUM_CLUSTERS), 16'(`NUM_CORES * `NUM_CLUSTERS),
8'(`NUM_WARPS), 8'(`NUM_WARPS),
@ -174,7 +170,6 @@ module VX_afu_ctrl #(
reg gie_r; reg gie_r;
reg [1:0] ier_r; reg [1:0] ier_r;
reg [1:0] isr_r; reg [1:0] isr_r;
reg [63:0] mem_r [AXI_NUM_BANKS];
reg [31:0] dcra_r; reg [31:0] dcra_r;
reg [31:0] dcrv_r; reg [31:0] dcrv_r;
reg dcr_wr_valid_r; reg dcr_wr_valid_r;
@ -311,10 +306,6 @@ module VX_afu_ctrl #(
dcra_r <= '0; dcra_r <= '0;
dcrv_r <= '0; dcrv_r <= '0;
dcr_wr_valid_r <= 0; dcr_wr_valid_r <= 0;
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
mem_r[i] <= '0;
end
end else begin end else begin
dcr_wr_valid_r <= 0; dcr_wr_valid_r <= 0;
ap_reset_r <= 0; ap_reset_r <= 0;
@ -353,16 +344,7 @@ module VX_afu_ctrl #(
dcrv_r <= (s_axi_wdata & wmask) | (dcrv_r & ~wmask); dcrv_r <= (s_axi_wdata & wmask) | (dcrv_r & ~wmask);
dcr_wr_valid_r <= 1; dcr_wr_valid_r <= 1;
end end
default: begin default:;
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
if (waddr == (ADDR_MEM_0 + 8'(i) * 8'd12)) begin
mem_r[i][31:0] <= (s_axi_wdata & wmask) | (mem_r[i][31:0] & ~wmask);
end
if (waddr == (ADDR_MEM_1 + 8'(i) * 8'd12)) begin
mem_r[i][63:32] <= (s_axi_wdata & wmask) | (mem_r[i][63:32] & ~wmask);
end
end
end
endcase endcase
if (ier_r[0] & ap_done) if (ier_r[0] & ap_done)
@ -453,8 +435,6 @@ module VX_afu_ctrl #(
assign ap_start = ap_start_r; assign ap_start = ap_start_r;
assign interrupt = gie_r & (| isr_r); assign interrupt = gie_r & (| isr_r);
assign mem_base = mem_r;
assign dcr_wr_valid = dcr_wr_valid_r; assign dcr_wr_valid = dcr_wr_valid_r;
assign dcr_wr_addr = `VX_DCR_ADDR_WIDTH'(dcra_r); assign dcr_wr_addr = `VX_DCR_ADDR_WIDTH'(dcra_r);
assign dcr_wr_data = `VX_DCR_DATA_WIDTH'(dcrv_r); assign dcr_wr_data = `VX_DCR_DATA_WIDTH'(dcrv_r);

View file

@ -16,16 +16,17 @@
module VX_afu_wrap #( module VX_afu_wrap #(
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, parameter C_M_AXI_MEM_ID_WIDTH = 32,
parameter C_M_AXI_MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, parameter C_M_AXI_MEM_ADDR_WIDTH = 25,
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH parameter C_M_AXI_MEM_DATA_WIDTH = 512,
parameter C_M_AXI_MEM_NUM_BANKS = 2
) ( ) (
// System signals // System signals
input wire clk, input wire clk,
input wire reset, input wire reset,
// AXI4 master interface // AXI4 master interface
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
// AXI4-Lite slave interface // AXI4-Lite slave interface
input wire s_axi_ctrl_awvalid, input wire s_axi_ctrl_awvalid,
@ -48,7 +49,6 @@ module VX_afu_wrap #(
output wire interrupt output wire interrupt
); );
localparam C_M_AXI_MEM_NUM_BANKS = `M_AXI_MEM_NUM_BANKS;
localparam STATE_IDLE = 0; localparam STATE_IDLE = 0;
localparam STATE_RUN = 1; localparam STATE_RUN = 1;
@ -80,7 +80,7 @@ module VX_afu_wrap #(
wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS]; wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS];
// convert memory interface to array // convert memory interface to array
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
reg [15:0] vx_pending_writes; reg [15:0] vx_pending_writes;
@ -88,8 +88,6 @@ module VX_afu_wrap #(
reg vx_reset = 1; // asserted at initialization reg vx_reset = 1; // asserted at initialization
wire vx_busy; wire vx_busy;
wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS];
wire dcr_wr_valid; wire dcr_wr_valid;
wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr; wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr;
wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data; wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data;
@ -181,9 +179,9 @@ module VX_afu_wrap #(
end end
VX_afu_ctrl #( VX_afu_ctrl #(
.AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), .S_AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
.AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), .S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) .M_AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH)
) afu_ctrl ( ) afu_ctrl (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -218,26 +216,24 @@ module VX_afu_wrap #(
.scope_bus_out (scope_bus_in), .scope_bus_out (scope_bus_in),
`endif `endif
.mem_base (mem_base),
.dcr_wr_valid (dcr_wr_valid), .dcr_wr_valid (dcr_wr_valid),
.dcr_wr_addr (dcr_wr_addr), .dcr_wr_addr (dcr_wr_addr),
.dcr_wr_data (dcr_wr_data) .dcr_wr_data (dcr_wr_data)
); );
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS];
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS];
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); assign m_axi_mem_awaddr_a[i] = m_axi_mem_awaddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET);
assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); assign m_axi_mem_araddr_a[i] = m_axi_mem_araddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET);
end end
`SCOPE_IO_SWITCH (2) `SCOPE_IO_SWITCH (2)
Vortex_axi #( Vortex_axi #(
.AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), .AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
.AXI_ADDR_WIDTH (`MEM_ADDR_WIDTH), .AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
.AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH), .AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) .AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
) vortex_axi ( ) vortex_axi (

View file

@ -16,16 +16,17 @@
module vortex_afu #( module vortex_afu #(
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
parameter C_M_AXI_MEM_ADDR_WIDTH = `M_AXI_MEM_ADDR_WIDTH, parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8),
parameter C_M_AXI_MEM_DATA_WIDTH = `M_AXI_MEM_DATA_WIDTH parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
) ( ) (
// System signals // System signals
input wire ap_clk, input wire ap_clk,
input wire ap_rst_n, input wire ap_rst_n,
// AXI4 master interface // AXI4 master interface
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
// AXI4-Lite slave interface // AXI4-Lite slave interface
input wire s_axi_ctrl_awvalid, input wire s_axi_ctrl_awvalid,
@ -54,12 +55,13 @@ module vortex_afu #(
.C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), .C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
.C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH), .C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
.C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
.C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH) .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
.C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
) afu_wrap ( ) afu_wrap (
.clk (ap_clk), .clk (ap_clk),
.reset (~ap_rst_n), .reset (~ap_rst_n),
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid), .s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
.s_axi_ctrl_awready (s_axi_ctrl_awready), .s_axi_ctrl_awready (s_axi_ctrl_awready),

View file

@ -14,20 +14,24 @@
`ifndef VORTEX_AFU_VH `ifndef VORTEX_AFU_VH
`define VORTEX_AFU_VH `define VORTEX_AFU_VH
`ifndef M_AXI_MEM_NUM_BANKS `ifndef PLATFORM_MEMORY_BANKS
`define M_AXI_MEM_NUM_BANKS 1 `define PLATFORM_MEMORY_BANKS 2
`endif `endif
`ifndef M_AXI_MEM_ADDR_WIDTH `ifndef PLATFORM_MEMORY_ADDR_WIDTH
`define M_AXI_MEM_ADDR_WIDTH 34 `define PLATFORM_MEMORY_ADDR_WIDTH 25
`endif `endif
`ifndef M_AXI_MEM_DATA_WIDTH `ifndef PLATFORM_MEMORY_DATA_WIDTH
`define M_AXI_MEM_DATA_WIDTH 512 `define PLATFORM_MEMORY_DATA_WIDTH 512
`endif `endif
`ifndef M_AXI_MEM_ID_WIDTH `ifndef PLATFORM_MEMORY_OFFSET
`define M_AXI_MEM_ID_WIDTH 32 `define PLATFORM_MEMORY_OFFSET 0
`endif
`ifndef PLATFORM_MEMORY_ID_WIDTH
`define PLATFORM_MEMORY_ID_WIDTH 32
`endif `endif
`define GEN_AXI_MEM(i) \ `define GEN_AXI_MEM(i) \

View file

@ -21,6 +21,8 @@ module VX_avs_adapter #(
parameter NUM_BANKS = 1, parameter NUM_BANKS = 1,
parameter TAG_WIDTH = 1, parameter TAG_WIDTH = 1,
parameter RD_QUEUE_SIZE = 1, parameter RD_QUEUE_SIZE = 1,
parameter BANK_INTERLEAVE= 0,
parameter AVS_ADDR_WIDTH = ADDR_WIDTH - `CLOG2(NUM_BANKS),
parameter REQ_OUT_BUF = 0, parameter REQ_OUT_BUF = 0,
parameter RSP_OUT_BUF = 0 parameter RSP_OUT_BUF = 0
) ( ) (
@ -45,7 +47,7 @@ module VX_avs_adapter #(
// AVS bus // AVS bus
output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS], output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS],
input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS], input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS],
output wire [ADDR_WIDTH-1:0] avs_address [NUM_BANKS], output wire [AVS_ADDR_WIDTH-1:0] avs_address [NUM_BANKS],
input wire avs_waitrequest [NUM_BANKS], input wire avs_waitrequest [NUM_BANKS],
output wire avs_write [NUM_BANKS], output wire avs_write [NUM_BANKS],
output wire avs_read [NUM_BANKS], output wire avs_read [NUM_BANKS],
@ -54,26 +56,33 @@ module VX_avs_adapter #(
input wire avs_readdatavalid [NUM_BANKS] input wire avs_readdatavalid [NUM_BANKS]
); );
localparam DATA_SIZE = DATA_WIDTH/8; localparam DATA_SIZE = DATA_WIDTH/8;
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS); localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS; localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS;
`STATIC_ASSERT ((AVS_ADDR_WIDTH >= BANK_OFFSETW), ("invalid parameter"))
// Requests handling ////////////////////////////////////////////////////// // Requests handling //////////////////////////////////////////////////////
wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop; wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out;
wire [NUM_BANKS-1:0] req_queue_going_full; wire [NUM_BANKS-1:0] req_queue_going_full;
wire [BANK_ADDRW-1:0] req_bank_sel; wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
wire [BANK_OFFSETW-1:0] req_bank_off; wire [BANK_OFFSETW-1:0] req_bank_off;
wire [NUM_BANKS-1:0] bank_req_ready; wire [NUM_BANKS-1:0] bank_req_ready;
if (NUM_BANKS > 1) begin : g_bank_sel if (NUM_BANKS > 1) begin : g_bank_sel
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; if (BANK_INTERLEAVE) begin : g_interleave
end else begin : g_bank_sel assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0];
assign req_bank_sel = '0; assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW];
end else begin : g_no_interleave
assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS];
assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0];
end
end else begin : g_no_bank_sel
assign req_bank_sel = '0;
assign req_bank_off = mem_req_addr;
end end
assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS];
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push
assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i); assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i);
@ -142,7 +151,7 @@ module VX_avs_adapter #(
assign avs_read[i] = valid_out && ~rw_out; assign avs_read[i] = valid_out && ~rw_out;
assign avs_write[i] = valid_out && rw_out; assign avs_write[i] = valid_out && rw_out;
assign avs_address[i] = ADDR_WIDTH'(addr_out); assign avs_address[i] = AVS_ADDR_WIDTH'(addr_out);
assign avs_byteenable[i] = byteen_out; assign avs_byteenable[i] = byteen_out;
assign avs_writedata[i] = data_out; assign avs_writedata[i] = data_out;
assign avs_burstcount[i] = BURST_WIDTH'(1); assign avs_burstcount[i] = BURST_WIDTH'(1);

View file

@ -19,7 +19,8 @@ module VX_axi_adapter #(
parameter ADDR_WIDTH = 32, parameter ADDR_WIDTH = 32,
parameter TAG_WIDTH = 8, parameter TAG_WIDTH = 8,
parameter NUM_BANKS = 1, parameter NUM_BANKS = 1,
parameter AVS_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)), parameter AXI_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)),
parameter BANK_INTERLEAVE= 0,
parameter RSP_OUT_BUF = 0 parameter RSP_OUT_BUF = 0
) ( ) (
input wire clk, input wire clk,
@ -29,7 +30,7 @@ module VX_axi_adapter #(
input wire mem_req_valid, input wire mem_req_valid,
input wire mem_req_rw, input wire mem_req_rw,
input wire [DATA_WIDTH/8-1:0] mem_req_byteen, input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
input wire [AVS_ADDR_WIDTH-1:0] mem_req_addr, input wire [ADDR_WIDTH-1:0] mem_req_addr,
input wire [DATA_WIDTH-1:0] mem_req_data, input wire [DATA_WIDTH-1:0] mem_req_data,
input wire [TAG_WIDTH-1:0] mem_req_tag, input wire [TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_ready, output wire mem_req_ready,
@ -43,7 +44,7 @@ module VX_axi_adapter #(
// AXI write request address channel // AXI write request address channel
output wire m_axi_awvalid [NUM_BANKS], output wire m_axi_awvalid [NUM_BANKS],
input wire m_axi_awready [NUM_BANKS], input wire m_axi_awready [NUM_BANKS],
output wire [ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS], output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS],
output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS], output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS],
output wire [7:0] m_axi_awlen [NUM_BANKS], output wire [7:0] m_axi_awlen [NUM_BANKS],
output wire [2:0] m_axi_awsize [NUM_BANKS], output wire [2:0] m_axi_awsize [NUM_BANKS],
@ -70,7 +71,7 @@ module VX_axi_adapter #(
// AXI read address channel // AXI read address channel
output wire m_axi_arvalid [NUM_BANKS], output wire m_axi_arvalid [NUM_BANKS],
input wire m_axi_arready [NUM_BANKS], input wire m_axi_arready [NUM_BANKS],
output wire [ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS], output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS],
output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS], output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS],
output wire [7:0] m_axi_arlen [NUM_BANKS], output wire [7:0] m_axi_arlen [NUM_BANKS],
output wire [2:0] m_axi_arsize [NUM_BANKS], output wire [2:0] m_axi_arsize [NUM_BANKS],
@ -89,15 +90,28 @@ module VX_axi_adapter #(
input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS], input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS],
input wire [1:0] m_axi_rresp [NUM_BANKS] input wire [1:0] m_axi_rresp [NUM_BANKS]
); );
localparam AXSIZE = `CLOG2(DATA_WIDTH/8); localparam DATA_SIZE = `CLOG2(DATA_WIDTH/8);
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS); localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS;
localparam DST_ADDR_WDITH = BANK_OFFSETW + `CLOG2(DATA_WIDTH/8);
wire [BANK_ADDRW-1:0] req_bank_sel; `STATIC_ASSERT ((AXI_ADDR_WIDTH >= DST_ADDR_WDITH), ("invalid tag width: current=%0d, expected=%0d", AXI_ADDR_WIDTH, DST_ADDR_WDITH))
if (NUM_BANKS > 1) begin : g_req_bank_sel
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; wire [BANK_SEL_WIDTH-1:0] req_bank_sel;
end else begin : g_req_bank_sel_0 wire [BANK_OFFSETW-1:0] req_bank_off;
if (NUM_BANKS > 1) begin : g_bank_sel
if (BANK_INTERLEAVE) begin : g_interleave
assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0];
assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW];
end else begin : g_no_interleave
assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS];
assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0];
end
end else begin : g_no_bank_sel
assign req_bank_sel = '0; assign req_bank_sel = '0;
assign req_bank_off = mem_req_addr;
end end
wire mem_req_fire = mem_req_valid && mem_req_ready; wire mem_req_fire = mem_req_valid && mem_req_ready;
@ -134,10 +148,10 @@ module VX_axi_adapter #(
// AXI write request address channel // AXI write request address channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i];
assign m_axi_awaddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; assign m_axi_awaddr[i] = AXI_ADDR_WIDTH'(req_bank_off);
assign m_axi_awid[i] = mem_req_tag; assign m_axi_awid[i] = mem_req_tag;
assign m_axi_awlen[i] = 8'b00000000; assign m_axi_awlen[i] = 8'b00000000;
assign m_axi_awsize[i] = 3'(AXSIZE); assign m_axi_awsize[i] = 3'(DATA_SIZE);
assign m_axi_awburst[i] = 2'b00; assign m_axi_awburst[i] = 2'b00;
assign m_axi_awlock[i] = 2'b00; assign m_axi_awlock[i] = 2'b00;
assign m_axi_awcache[i] = 4'b0000; assign m_axi_awcache[i] = 4'b0000;
@ -166,10 +180,10 @@ module VX_axi_adapter #(
// AXI read request channel // AXI read request channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i); assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i);
assign m_axi_araddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; assign m_axi_araddr[i] = AXI_ADDR_WIDTH'(req_bank_off);
assign m_axi_arid[i] = mem_req_tag; assign m_axi_arid[i] = mem_req_tag;
assign m_axi_arlen[i] = 8'b00000000; assign m_axi_arlen[i] = 8'b00000000;
assign m_axi_arsize[i] = 3'(AXSIZE); assign m_axi_arsize[i] = 3'(DATA_SIZE);
assign m_axi_arburst[i] = 2'b00; assign m_axi_arburst[i] = 2'b00;
assign m_axi_arlock[i] = 2'b00; assign m_axi_arlock[i] = 2'b00;
assign m_axi_arcache[i] = 4'b0000; assign m_axi_arcache[i] = 4'b0000;

View file

@ -53,8 +53,6 @@ module VX_mem_adapter #(
input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_out, input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_out,
output wire mem_rsp_ready_out output wire mem_rsp_ready_out
); );
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
localparam DST_DATA_SIZE = (DST_DATA_WIDTH / 8); localparam DST_DATA_SIZE = (DST_DATA_WIDTH / 8);
localparam DST_LDATAW = `CLOG2(DST_DATA_WIDTH); localparam DST_LDATAW = `CLOG2(DST_DATA_WIDTH);
localparam SRC_LDATAW = `CLOG2(SRC_DATA_WIDTH); localparam SRC_LDATAW = `CLOG2(SRC_DATA_WIDTH);
@ -74,6 +72,7 @@ module VX_mem_adapter #(
wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in_w; wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in_w;
wire mem_rsp_ready_in_w; wire mem_rsp_ready_in_w;
`UNUSED_VAR (mem_req_tag_in)
`UNUSED_VAR (mem_rsp_tag_out) `UNUSED_VAR (mem_rsp_tag_out)
if (DST_LDATAW > SRC_LDATAW) begin : g_wider_dst_data if (DST_LDATAW > SRC_LDATAW) begin : g_wider_dst_data
@ -122,7 +121,7 @@ module VX_mem_adapter #(
assign mem_rsp_valid_in_w = mem_rsp_valid_out; assign mem_rsp_valid_in_w = mem_rsp_valid_out;
assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx]; assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx];
assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]); assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[DST_TAG_WIDTH-1:D]);
assign mem_rsp_ready_out = mem_rsp_ready_in_w; assign mem_rsp_ready_out = mem_rsp_ready_in_w;
end else if (DST_LDATAW < SRC_LDATAW) begin : g_wider_src_data end else if (DST_LDATAW < SRC_LDATAW) begin : g_wider_src_data

View file

@ -7,17 +7,21 @@ include ../../common.mk
# AFU parameters # AFU parameters
CONFIGS += -DNOPAE CONFIGS += -DNOPAE
CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 CONFIGS += -DPLATFORM_MEMORY_BANKS=2
endif endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 ifeq ($(XLEN),64)
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41
else
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25
endif endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512
endif endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
endif
ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4
endif endif
#CONFIGS += -DNUM_CORES=2 #CONFIGS += -DNUM_CORES=2

View file

@ -98,7 +98,7 @@ ifdef PERF
endif endif
# ast dump flags # ast dump flags
XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=26 -DPLATFORM_MEMORY_DATA_WIDTH=512 -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI
all: swconfig ip-gen setup build all: swconfig ip-gen setup build

View file

@ -35,6 +35,7 @@ typedef void* vx_buffer_h;
#define VX_CAPS_LOCAL_MEM_SIZE 0x6 #define VX_CAPS_LOCAL_MEM_SIZE 0x6
#define VX_CAPS_ISA_FLAGS 0x7 #define VX_CAPS_ISA_FLAGS 0x7
#define VX_CAPS_NUM_MEM_BANKS 0x8 #define VX_CAPS_NUM_MEM_BANKS 0x8
#define VX_CAPS_MEM_BANK_SIZE 0x9
// device isa flags // device isa flags
#define VX_ISA_STD_A (1ull << ISA_STD_A) #define VX_ISA_STD_A (1ull << ISA_STD_A)

View file

@ -163,11 +163,6 @@ public:
}); });
{ {
// retrieve FPGA global memory size
CHECK_FPGA_ERR(api_.fpgaPropertiesGetLocalMemorySize(filter, &global_mem_size_), {
global_mem_size_ = GLOBAL_MEM_SIZE;
});
// Load ISA CAPS // Load ISA CAPS
CHECK_FPGA_ERR(api_.fpgaReadMMIO64(fpga_, 0, MMIO_ISA_CAPS, &isa_caps_), { CHECK_FPGA_ERR(api_.fpgaReadMMIO64(fpga_, 0, MMIO_ISA_CAPS, &isa_caps_), {
api_.fpgaClose(fpga_); api_.fpgaClose(fpga_);
@ -179,6 +174,12 @@ public:
api_.fpgaClose(fpga_); api_.fpgaClose(fpga_);
return -1; return -1;
}); });
// Determine global memory size
uint64_t num_banks, bank_size;
this->get_caps(VX_CAPS_NUM_MEM_BANKS, &num_banks);
this->get_caps(VX_CAPS_MEM_BANK_SIZE, &bank_size);
global_mem_size_ = num_banks * bank_size;
} }
#ifdef SCOPE #ifdef SCOPE
@ -231,7 +232,10 @@ public:
_value = isa_caps_; _value = isa_caps_;
break; break;
case VX_CAPS_NUM_MEM_BANKS: case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS; _value = 1 << ((dev_caps_ >> 48) & 0x7);
break;
case VX_CAPS_MEM_BANK_SIZE:
_value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f));
break; break;
default: default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);

View file

@ -80,6 +80,9 @@ public:
case VX_CAPS_NUM_MEM_BANKS: case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS; _value = MEMORY_BANKS;
break; break;
case VX_CAPS_MEM_BANK_SIZE:
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
break;
default: default:
std::cout << "invalid caps id: " << caps_id << std::endl; std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort(); std::abort();

View file

@ -84,6 +84,9 @@ public:
case VX_CAPS_NUM_MEM_BANKS: case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS; _value = MEMORY_BANKS;
break; break;
case VX_CAPS_MEM_BANK_SIZE:
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
break;
default: default:
std::cout << "invalid caps id: " << caps_id << std::endl; std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort(); std::abort();

View file

@ -8,6 +8,7 @@ SRC_DIR := $(VORTEX_HOME)/runtime/xrt
CXXFLAGS += -std=c++14 -Wall -Wextra -Wfatal-errors CXXFLAGS += -std=c++14 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(XILINX_XRT)/include -I$(SIM_DIR)/common CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(XILINX_XRT)/include -I$(SIM_DIR)/common
CXXFLAGS += -DXLEN_$(XLEN)
CXXFLAGS += -fPIC CXXFLAGS += -fPIC
LDFLAGS += -shared -pthread LDFLAGS += -shared -pthread

View file

@ -49,7 +49,6 @@ using namespace vortex;
#define MMIO_ISA_ADDR 0x1C #define MMIO_ISA_ADDR 0x1C
#define MMIO_DCR_ADDR 0x28 #define MMIO_DCR_ADDR 0x28
#define MMIO_SCP_ADDR 0x34 #define MMIO_SCP_ADDR 0x34
#define MMIO_MEM_ADDR 0x40
#define CTL_AP_START (1 << 0) #define CTL_AP_START (1 << 0)
#define CTL_AP_DONE (1 << 1) #define CTL_AP_DONE (1 << 1)
@ -58,24 +57,6 @@ using namespace vortex;
#define CTL_AP_RESET (1 << 4) #define CTL_AP_RESET (1 << 4)
#define CTL_AP_RESTART (1 << 7) #define CTL_AP_RESTART (1 << 7)
struct platform_info_t {
const char *prefix_name;
uint8_t lg2_num_banks;
uint8_t lg2_bank_size;
uint64_t mem_base;
};
static const platform_info_t g_platforms[] = {
{"vortex_xrtsim", 0, 32, 0x0}, // 16 x 256 MB = 4 GB
{"xilinx_u200", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4
{"xilinx_u250", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4
{"xilinx_u50", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2
{"xilinx_u280", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2
{"xilinx_u55c", 5, 29, 0x0}, // 32 x 512 MB = 16 GB HBM2
{"xilinx_vck5000", 0, 33, 0xC000000000}, // 1 x 8 GB = 8 GB DDR4
{"xilinx_kv260", 0, 32, 0x0}, // 1 x 4 GB = 4 GB DDR4
};
#ifdef CPP_API #ifdef CPP_API
typedef xrt::device xrt_device_t; typedef xrt::device xrt_device_t;
@ -113,18 +94,6 @@ static void dump_xrt_error(xrtDeviceHandle xrtDevice, xrtErrorCode err) {
} }
#endif #endif
static int get_platform_info(const std::string &device_name,
platform_info_t *platform_info) {
for (size_t i = 0; i < (sizeof(g_platforms) / sizeof(platform_info_t)); ++i) {
auto &platform = g_platforms[i];
if (device_name.rfind(platform.prefix_name, 0) == 0) {
*platform_info = platform;
return 0;
}
}
return -1;
}
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
class vx_device { class vx_device {
@ -181,58 +150,6 @@ public:
auto xclbin = xrt::xclbin(xlbin_path_s); auto xclbin = xrt::xclbin(xlbin_path_s);
auto device_name = xrtDevice.get_info<xrt::info::device::name>(); auto device_name = xrtDevice.get_info<xrt::info::device::name>();
/*{
uint32_t num_banks = 0;
uint64_t bank_size = 0;
uint64_t mem_base = 0;
auto mem_json =
nlohmann::json::parse(xrtDevice.get_info<xrt::info::device::memory>()); if
(!mem_json.is_null()) { uint32_t index = 0; for (auto& mem :
mem_json["board"]["memory"]["memories"]) { auto enabled =
mem["enabled"].get<std::string>(); if (enabled == "true") { if (index == 0)
{ mem_base = std::stoull(mem["base_address"].get<std::string>(), nullptr,
16); bank_size = std::stoull(mem["range_bytes"].get<std::string>(), nullptr,
16);
}
++index;
}
}
num_banks = index;
}
fprintf(stderr, "[VXDRV] memory description: base=0x%lx, size=0x%lx,
count=%d\n", mem_base, bank_size, num_banks);
}*/
/*{
std::cout << "Device" << device_index << " : " <<
xrtDevice.get_info<xrt::info::device::name>() << std::endl; std::cout << "
bdf : " << xrtDevice.get_info<xrt::info::device::bdf>() << std::endl;
std::cout << " kdma : " <<
xrtDevice.get_info<xrt::info::device::kdma>() << std::endl; std::cout << "
max_freq : " <<
xrtDevice.get_info<xrt::info::device::max_clock_frequency_mhz>() <<
std::endl; std::cout << " memory : " <<
xrtDevice.get_info<xrt::info::device::memory>() << std::endl; std::cout << "
thermal : " << xrtDevice.get_info<xrt::info::device::thermal>() <<
std::endl; std::cout << " m2m : " << std::boolalpha <<
xrtDevice.get_info<xrt::info::device::m2m>() << std::dec << std::endl;
std::cout << " nodma : " << std::boolalpha <<
xrtDevice.get_info<xrt::info::device::nodma>() << std::dec << std::endl;
std::cout << "Memory info :" << std::endl;
for (const auto& mem_bank : xclbin.get_mems()) {
std::cout << " index : " << mem_bank.get_index() << std::endl;
std::cout << " tag : " << mem_bank.get_tag() << std::endl;
std::cout << " type : " << (int)mem_bank.get_type() << std::endl;
std::cout << " base_address : 0x" << std::hex <<
mem_bank.get_base_address() << std::endl; std::cout << " size : 0x" <<
(mem_bank.get_size_kb() * 1000) << std::dec << std::endl; std::cout << "
used :" << mem_bank.get_used() << std::endl;
}
}*/
#else #else
CHECK_HANDLE(xrtDevice, xrtDeviceOpen(device_index), { CHECK_HANDLE(xrtDevice, xrtDeviceOpen(device_index), {
@ -275,11 +192,6 @@ public:
printf("info: device name=%s.\n", device_name.c_str()); printf("info: device name=%s.\n", device_name.c_str());
CHECK_ERR(get_platform_info(device_name, &platform_), {
fprintf(stderr, "[VXDRV] Error: platform not supported: %s\n", device_name.c_str());
return err;
});
CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_RESET), { CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_RESET), {
return err; return err;
}); });
@ -300,36 +212,13 @@ public:
return err; return err;
}); });
uint32_t num_banks = 1 << platform_.lg2_num_banks; uint64_t num_banks;
uint64_t bank_size = 1ull << platform_.lg2_bank_size; this->get_caps(VX_CAPS_NUM_MEM_BANKS, &num_banks);
lg2_num_banks_ = log2ceil(num_banks);
// adjust memory banks allocation to architecture limit uint64_t bank_size;
int isa_arch = VX_ISA_ARCH(isa_caps_); this->get_caps(VX_CAPS_MEM_BANK_SIZE, &bank_size);
if (isa_arch == 32) { lg2_bank_size_ = log2ceil(bank_size);
uint64_t max_mem_size = 1ull << 32;
uint32_t need_num_banks = max_mem_size / bank_size;
if (num_banks > need_num_banks) {
printf("info: adjusted number of banks from %d to %d.\n", num_banks, need_num_banks);
num_banks = need_num_banks;
platform_.lg2_num_banks = log2ceil(num_banks);
}
}
for (uint32_t i = 0; i < num_banks; ++i) {
uint32_t reg_addr = MMIO_MEM_ADDR + (i * 12);
uint64_t reg_value = platform_.mem_base + i * bank_size;
CHECK_ERR(this->write_register(reg_addr, reg_value & 0xffffffff), {
return err;
});
CHECK_ERR(this->write_register(reg_addr + 4, (reg_value >> 32) & 0xffffffff), {
return err;
});
#ifndef BANK_INTERLEAVE
break;
#endif
}
global_mem_size_ = num_banks * bank_size; global_mem_size_ = num_banks * bank_size;
@ -418,7 +307,10 @@ public:
_value = isa_caps_; _value = isa_caps_;
break; break;
case VX_CAPS_NUM_MEM_BANKS: case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS; _value = 1 << ((dev_caps_ >> 48) & 0x7);
break;
case VX_CAPS_MEM_BANK_SIZE:
_value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f));
break; break;
default: default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
@ -734,23 +626,23 @@ private:
MemoryAllocator global_mem_; MemoryAllocator global_mem_;
xrt_device_t xrtDevice_; xrt_device_t xrtDevice_;
xrt_kernel_t xrtKernel_; xrt_kernel_t xrtKernel_;
platform_info_t platform_;
uint64_t dev_caps_; uint64_t dev_caps_;
uint64_t isa_caps_; uint64_t isa_caps_;
uint64_t global_mem_size_; uint64_t global_mem_size_;
DeviceConfig dcrs_; DeviceConfig dcrs_;
std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_; std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_;
uint32_t lg2_num_banks_;
uint32_t lg2_bank_size_;
#ifdef BANK_INTERLEAVE #ifdef BANK_INTERLEAVE
std::vector<xrt_buffer_t> xrtBuffers_; std::vector<xrt_buffer_t> xrtBuffers_;
int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) { int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) {
uint32_t num_banks = 1 << platform_.lg2_num_banks; uint32_t num_banks = 1 << lg2_num_banks_;
uint64_t block_addr = addr / CACHE_BLOCK_SIZE; uint64_t block_addr = addr / CACHE_BLOCK_SIZE;
uint32_t index = block_addr & (num_banks - 1); uint32_t index = block_addr & (num_banks - 1);
uint64_t offset = uint64_t offset = (block_addr >> lg2_num_banks_) * CACHE_BLOCK_SIZE;
(block_addr >> platform_.lg2_num_banks) * CACHE_BLOCK_SIZE;
if (pIdx) { if (pIdx) {
*pIdx = index; *pIdx = index;
} }
@ -778,9 +670,9 @@ private:
std::unordered_map<uint32_t, buf_cnt_t> xrtBuffers_; std::unordered_map<uint32_t, buf_cnt_t> xrtBuffers_;
int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) { int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) {
uint32_t num_banks = 1 << platform_.lg2_num_banks; uint32_t num_banks = 1 << lg2_num_banks_;
uint64_t bank_size = 1ull << platform_.lg2_bank_size; uint64_t bank_size = 1ull << lg2_bank_size_;
uint32_t index = addr >> platform_.lg2_bank_size; uint32_t index = addr >> lg2_bank_size_;
uint64_t offset = addr & (bank_size - 1); uint64_t offset = addr & (bank_size - 1);
if (index > num_banks) { if (index > num_banks) {
fprintf(stderr, "[VXDRV] Error: address out of range: 0x%lx\n", addr); fprintf(stderr, "[VXDRV] Error: address out of range: 0x%lx\n", addr);
@ -807,7 +699,7 @@ private:
} }
} else { } else {
printf("allocating bank%d...\n", bank_id); printf("allocating bank%d...\n", bank_id);
uint64_t bank_size = 1ull << platform_.lg2_bank_size; uint64_t bank_size = 1ull << lg2_bank_size_;
#ifdef CPP_API #ifdef CPP_API
xrt::bo xrtBuffer(xrtDevice_, bank_size, xrt::bo::flags::normal, bank_id); xrt::bo xrtBuffer(xrtDevice_, bank_size, xrt::bo::flags::normal, bank_id);
#else #else

View file

@ -20,30 +20,58 @@ constexpr uint32_t count_leading_zeros(uint32_t value) {
return value ? __builtin_clz(value) : 32; return value ? __builtin_clz(value) : 32;
} }
constexpr uint32_t count_leading_zeros(uint64_t value) {
return value ? __builtin_clzll(value) : 64;
}
constexpr uint32_t count_trailing_zeros(uint32_t value) { constexpr uint32_t count_trailing_zeros(uint32_t value) {
return value ? __builtin_ctz(value) : 32; return value ? __builtin_ctz(value) : 32;
} }
constexpr uint32_t count_trailing_zeros(uint64_t value) {
return value ? __builtin_ctzll(value) : 64;
}
constexpr bool ispow2(uint32_t value) { constexpr bool ispow2(uint32_t value) {
return value && !(value & (value - 1)); return value && !(value & (value - 1));
} }
constexpr bool ispow2(uint64_t value) {
return value && !(value & (value - 1));
}
constexpr uint32_t log2ceil(uint32_t value) { constexpr uint32_t log2ceil(uint32_t value) {
return 32 - count_leading_zeros(value - 1); return 32 - count_leading_zeros(value - 1);
} }
constexpr uint32_t log2ceil(uint64_t value) {
return 64 - count_leading_zeros(value - 1);
}
inline unsigned log2up(uint32_t value) { inline unsigned log2up(uint32_t value) {
return std::max<uint32_t>(1, log2ceil(value)); return std::max<uint32_t>(1, log2ceil(value));
} }
inline unsigned log2up(uint64_t value) {
return std::max<uint32_t>(1, log2ceil(value));
}
constexpr unsigned log2floor(uint32_t value) { constexpr unsigned log2floor(uint32_t value) {
return 31 - count_leading_zeros(value); return 31 - count_leading_zeros(value);
} }
constexpr unsigned log2floor(uint64_t value) {
return 63 - count_leading_zeros(value);
}
constexpr unsigned ceil2(uint32_t value) { constexpr unsigned ceil2(uint32_t value) {
return 32 - count_leading_zeros(value); return 32 - count_leading_zeros(value);
} }
constexpr unsigned ceil2(uint64_t value) {
return 64 - count_leading_zeros(value);
}
inline uint64_t bit_clr(uint64_t bits, uint32_t index) { inline uint64_t bit_clr(uint64_t bits, uint32_t index) {
assert(index <= 63); assert(index <= 63);
return bits & ~(1ull << index); return bits & ~(1ull << index);

View file

@ -71,13 +71,14 @@ public:
// Check if the reservation is within memory capacity bounds // Check if the reservation is within memory capacity bounds
if (addr + size > capacity_) { if (addr + size > capacity_) {
printf("error: address range out of bounds\n"); printf("error: address range out of bounds - requested=0x%lx, capacity=0x%lx\n", (addr + size), capacity_);
return -1; return -1;
} }
// Ensure the reservation does not overlap with existing pages // Ensure the reservation does not overlap with existing pages
if (hasPageOverlap(addr, size)) { uint64_t overlapStart, overlapEnd;
printf("error: address range overlaps with existing allocation\n"); if (hasPageOverlap(addr, size, &overlapStart, &overlapEnd)) {
printf("error: address range overlaps with existing allocation - requested=[0x%lx-0x%lx], existing=[0x%lx, 0x%lx]\n", addr, addr+size, overlapStart, overlapEnd);
return -1; return -1;
} }
@ -509,15 +510,15 @@ private:
return false; return false;
} }
bool hasPageOverlap(uint64_t start, uint64_t size) { bool hasPageOverlap(uint64_t start, uint64_t size, uint64_t* overlapStart, uint64_t* overlapEnd) {
page_t* current = pages_; page_t* current = pages_;
while (current != nullptr) { while (current != nullptr) {
uint64_t pageStart = current->addr; uint64_t pageStart = current->addr;
uint64_t pageEnd = pageStart + current->size; uint64_t pageEnd = pageStart + current->size;
uint64_t requestEnd = start + size; uint64_t end = start + size;
if ((start >= pageStart && start < pageEnd) || // Start of request is inside the page if ((start <= pageEnd) && (end >= pageStart)) {
(requestEnd > pageStart && requestEnd <= pageEnd) || // End of request is inside the page *overlapStart = pageStart;
(start <= pageStart && requestEnd >= pageEnd)) { // Request envelops the page *overlapEnd = pageEnd;
return true; return true;
} }
current = current->next; current = current->next;

View file

@ -32,18 +32,21 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
# AFU parameters # AFU parameters
CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) CONFIGS += -DPLATFORM_MEMORY_BANKS=2
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2
endif endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 ifeq ($(XLEN),64)
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41
else
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25
endif endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512
endif endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
endif
ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4
endif endif
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)

View file

@ -35,7 +35,7 @@
#include <unordered_map> #include <unordered_map>
#include <util.h> #include <util.h>
#define PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH/8) #define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8)
#ifndef MEM_CLOCK_RATIO #ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1 #define MEM_CLOCK_RATIO 1
@ -145,6 +145,9 @@ public:
// allocate RAM // allocate RAM
ram_ = new RAM(0, RAM_PAGE_SIZE); ram_ = new RAM(0, RAM_PAGE_SIZE);
// calculate memory bank size
mem_bank_size_ = (1ull << PLATFORM_MEMORY_ADDR_WIDTH) * PLATFORM_MEMORY_DATA_SIZE;
// reset the device // reset the device
this->reset(); this->reset();
@ -406,14 +409,14 @@ private:
} }
void avs_bus_reset() { void avs_bus_reset() {
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
device_->avs_readdatavalid[b] = 0; device_->avs_readdatavalid[b] = 0;
device_->avs_waitrequest[b] = 0; device_->avs_waitrequest[b] = 0;
} }
} }
void avs_bus_eval() { void avs_bus_eval() {
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
// process memory responses // process memory responses
device_->avs_readdatavalid[b] = 0; device_->avs_readdatavalid[b] = 0;
if (!pending_mem_reqs_[b].empty() if (!pending_mem_reqs_[b].empty()
@ -421,7 +424,7 @@ private:
auto mem_rd_it = pending_mem_reqs_[b].begin(); auto mem_rd_it = pending_mem_reqs_[b].begin();
auto mem_req = *mem_rd_it; auto mem_req = *mem_rd_it;
device_->avs_readdatavalid[b] = 1; device_->avs_readdatavalid[b] = 1;
memcpy(device_->avs_readdata[b], mem_req->data.data(), PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); memcpy(device_->avs_readdata[b], mem_req->data.data(), PLATFORM_MEMORY_DATA_SIZE);
uint32_t addr = mem_req->addr; uint32_t addr = mem_req->addr;
pending_mem_reqs_[b].erase(mem_rd_it); pending_mem_reqs_[b].erase(mem_rd_it);
delete mem_req; delete mem_req;
@ -429,19 +432,20 @@ private:
// process memory requests // process memory requests
assert(!device_->avs_read[b] || !device_->avs_write[b]); assert(!device_->avs_read[b] || !device_->avs_write[b]);
uint64_t byte_addr = (uint64_t(device_->avs_address[b]) * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; uint64_t byte_addr = b * mem_bank_size_ + uint64_t(device_->avs_address[b]) * PLATFORM_MEMORY_DATA_SIZE;
if (device_->avs_write[b]) { if (device_->avs_write[b]) {
// process write request
uint64_t byteen = device_->avs_byteenable[b]; uint64_t byteen = device_->avs_byteenable[b];
uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data()); uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data());
for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) { for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
if ((byteen >> i) & 0x1) { if ((byteen >> i) & 0x1) {
(*ram_)[byte_addr + i] = data[i]; (*ram_)[byte_addr + i] = data[i];
} }
} }
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, data=0x", timestamp, b, byte_addr); /*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, byteen=0x%lx, data=0x", timestamp, b, byte_addr, byteen);
for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) { for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", data[(PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE-1)-i]); printf("%02x", data[i]);
} }
printf("\n");*/ printf("\n");*/
@ -455,22 +459,20 @@ private:
dram_queue_.push(mem_req); dram_queue_.push(mem_req);
} else } else
if (device_->avs_read[b]) { if (device_->avs_read[b]) {
// process read request
auto mem_req = new mem_req_t(); auto mem_req = new mem_req_t();
mem_req->addr = device_->avs_address[b]; mem_req->addr = device_->avs_address[b];
mem_req->bank_id = b; mem_req->bank_id = b;
ram_->read(mem_req->data.data(), byte_addr, PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); ram_->read(mem_req->data.data(), byte_addr, PLATFORM_MEMORY_DATA_SIZE);
mem_req->write = false; mem_req->write = false;
mem_req->ready = false; mem_req->ready = false;
pending_mem_reqs_[b].emplace_back(mem_req); pending_mem_reqs_[b].emplace_back(mem_req);
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, mem_req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, byte_addr);
for (auto& req : pending_mem_reqs_[b]) { for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
if (req.cycles_left != 0) printf("%02x", mem_req->data[i]);
printf(" !%0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
else
printf(" %0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
} }
printf("}\n");*/ printf("\n");*/
// send dram request // send dram request
dram_queue_.push(mem_req); dram_queue_.push(mem_req);
@ -481,7 +483,7 @@ private:
} }
typedef struct { typedef struct {
std::array<uint8_t, PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE> data; std::array<uint8_t, PLATFORM_MEMORY_DATA_SIZE> data;
uint32_t addr; uint32_t addr;
uint32_t bank_id; uint32_t bank_id;
bool write; bool write;
@ -514,9 +516,10 @@ private:
bool stop_; bool stop_;
std::unordered_map<int64_t, host_buffer_t> host_buffers_; std::unordered_map<int64_t, host_buffer_t> host_buffers_;
int64_t host_buffer_ids_; uint64_t host_buffer_ids_;
uint64_t mem_bank_size_;
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_PARAM_LOCAL_MEMORY_BANKS]; std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
std::list<cci_rd_req_t> cci_reads_; std::list<cci_rd_req_t> cci_reads_;
std::list<cci_wr_req_t> cci_writes_; std::list<cci_wr_req_t> cci_writes_;

View file

@ -78,22 +78,22 @@ module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; (
output t_ccip_mmioData af2cp_sTxPort_c2_data, output t_ccip_mmioData af2cp_sTxPort_c2_data,
// Avalon signals for local memory access // Avalon signals for local memory access
output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], output t_local_mem_data avs_writedata [`PLATFORM_MEMORY_BANKS],
input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], input t_local_mem_data avs_readdata [`PLATFORM_MEMORY_BANKS],
output t_local_mem_addr avs_address [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], output t_local_mem_addr avs_address [`PLATFORM_MEMORY_BANKS],
input logic avs_waitrequest [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], input logic avs_waitrequest [`PLATFORM_MEMORY_BANKS],
output logic avs_write [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], output logic avs_write [`PLATFORM_MEMORY_BANKS],
output logic avs_read [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], output logic avs_read [`PLATFORM_MEMORY_BANKS],
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], output t_local_mem_byte_mask avs_byteenable [`PLATFORM_MEMORY_BANKS],
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_MEMORY_BANKS],
input avs_readdatavalid [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS] input avs_readdatavalid [`PLATFORM_MEMORY_BANKS]
); );
t_if_ccip_Rx cp2af_sRxPort; t_if_ccip_Rx cp2af_sRxPort;
t_if_ccip_Tx af2cp_sTxPort; t_if_ccip_Tx af2cp_sTxPort;
vortex_afu #( vortex_afu #(
.NUM_LOCAL_MEM_BANKS(`PLATFORM_PARAM_LOCAL_MEMORY_BANKS) .NUM_LOCAL_MEM_BANKS(`PLATFORM_MEMORY_BANKS)
) afu ( ) afu (
.clk(clk), .clk(clk),
.reset(reset), .reset(reset),

View file

@ -32,14 +32,21 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
# AFU parameters # AFU parameters
ifeq (,$(findstring M_AXI_MEM_NUM_BANKS,$(CONFIGS))) ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
CONFIGS += -DM_AXI_MEM_NUM_BANKS=1 CONFIGS += -DPLATFORM_MEMORY_BANKS=2
endif endif
ifeq (,$(findstring M_AXI_MEM_ADDR_WIDTH,$(CONFIGS))) ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
CONFIGS += -DM_AXI_MEM_ADDR_WIDTH=32 ifeq ($(XLEN),64)
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41
else
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25
endif endif
ifeq (,$(findstring M_AXI_MEM_DATA_WIDTH,$(CONFIGS))) endif
CONFIGS += -DM_AXI_MEM_DATA_WIDTH=512 ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
endif
ifeq (,$(findstring PLATFORM_MEMORY_OFFSET,$(CONFIGS)))
CONFIGS += -DPLATFORM_MEMORY_OFFSET=0
endif endif
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)

View file

@ -11,22 +11,22 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
`include "VX_platform.vh"
`include "vortex_afu.vh" `include "vortex_afu.vh"
module vortex_afu_shim #( module vortex_afu_shim #(
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
parameter C_M_AXI_MEM_ADDR_WIDTH = 64, parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8),
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
) ( ) (
// System signals // System signals
input wire ap_clk, input wire ap_clk,
input wire ap_rst_n, input wire ap_rst_n,
// AXI4 master interface // AXI4 master interface
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
// AXI4-Lite slave interface // AXI4-Lite slave interface
input wire s_axi_ctrl_awvalid, input wire s_axi_ctrl_awvalid,
@ -50,17 +50,19 @@ module vortex_afu_shim #(
output wire interrupt output wire interrupt
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
); );
vortex_afu #( VX_afu_wrap #(
.C_S_AXI_CTRL_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), .C_S_AXI_CTRL_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH),
.C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), .C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH),
.C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH), .C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH),
.C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH),
.C_M_AXI_MEM_DATA_WIDTH(C_M_AXI_MEM_DATA_WIDTH) .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH),
) afu ( .C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
.ap_clk(ap_clk), ) afu_wrap (
.ap_rst_n(ap_rst_n), .clk (ap_clk),
// AXI4 master interface .reset (~ap_rst_n),
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid), .s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
.s_axi_ctrl_awready (s_axi_ctrl_awready), .s_axi_ctrl_awready (s_axi_ctrl_awready),
.s_axi_ctrl_awaddr (s_axi_ctrl_awaddr), .s_axi_ctrl_awaddr (s_axi_ctrl_awaddr),
@ -78,6 +80,7 @@ module vortex_afu_shim #(
.s_axi_ctrl_bvalid (s_axi_ctrl_bvalid), .s_axi_ctrl_bvalid (s_axi_ctrl_bvalid),
.s_axi_ctrl_bready (s_axi_ctrl_bready), .s_axi_ctrl_bready (s_axi_ctrl_bready),
.s_axi_ctrl_bresp (s_axi_ctrl_bresp), .s_axi_ctrl_bresp (s_axi_ctrl_bresp),
.interrupt (interrupt) .interrupt (interrupt)
); );

View file

@ -37,7 +37,7 @@
#include <iostream> #include <iostream>
#define M_AXI_MEM_DATA_SIZE (M_AXI_MEM_DATA_WIDTH/8) #define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8)
#ifndef MEM_CLOCK_RATIO #ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1 #define MEM_CLOCK_RATIO 1
@ -59,10 +59,24 @@
#define RAM_PAGE_SIZE 4096 #define RAM_PAGE_SIZE 4096
#define MEM_BANK_SIZE (1ull << M_AXI_MEM_ADDR_WIDTH)
#define CPU_GPU_LATENCY 200 #define CPU_GPU_LATENCY 200
#if PLATFORM_MEMORY_ADDR_WIDTH > 32
typedef QData Vl_m_addr_t;
#else
typedef IData Vl_m_addr_t;
#endif
#if PLATFORM_MEMORY_DATA_WIDTH > 64
typedef VlWide<(PLATFORM_MEMORY_DATA_WIDTH/32)> Vl_m_data_t;
#else
#if PLATFORM_MEMORY_DATA_WIDTH > 32
typedef QData Vl_m_data_t;
#else
typedef IData Vl_m_data_t;
#endif
#endif
using namespace vortex; using namespace vortex;
static uint64_t timestamp = 0; static uint64_t timestamp = 0;
@ -134,7 +148,7 @@ public:
if (future_.valid()) { if (future_.valid()) {
future_.wait(); future_.wait();
} }
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
delete mem_alloc_[i]; delete mem_alloc_[i];
} }
if (ram_) { if (ram_) {
@ -169,15 +183,18 @@ public:
tfp_->open("trace.vcd"); tfp_->open("trace.vcd");
#endif #endif
// calculate memory bank size
mem_bank_size_ = ((1ull << PLATFORM_MEMORY_ADDR_WIDTH) / PLATFORM_MEMORY_BANKS) * PLATFORM_MEMORY_DATA_SIZE;
// allocate RAM // allocate RAM
ram_ = new RAM(0, RAM_PAGE_SIZE); ram_ = new RAM(0, RAM_PAGE_SIZE);
// initialize AXI memory interfaces // initialize AXI memory interfaces
MP_M_AXI_MEM(M_AXI_MEM_NUM_BANKS); MP_M_AXI_MEM(PLATFORM_MEMORY_BANKS);
// initialize memory allocator // initialize memory allocator
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
mem_alloc_[i] = new MemoryAllocator(0, MEM_BANK_SIZE, 4096, 64); mem_alloc_[i] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
} }
// reset the device // reset the device
@ -198,13 +215,13 @@ public:
} }
int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) { int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) {
if (bank_id >= M_AXI_MEM_NUM_BANKS) if (bank_id >= PLATFORM_MEMORY_BANKS)
return -1; return -1;
return mem_alloc_[bank_id]->allocate(size, addr); return mem_alloc_[bank_id]->allocate(size, addr);
} }
int mem_free(uint32_t bank_id, uint64_t addr) { int mem_free(uint32_t bank_id, uint64_t addr) {
if (bank_id >= M_AXI_MEM_NUM_BANKS) if (bank_id >= PLATFORM_MEMORY_BANKS)
return -1; return -1;
return mem_alloc_[bank_id]->release(addr); return mem_alloc_[bank_id]->release(addr);
} }
@ -212,11 +229,11 @@ public:
int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) { int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) {
std::lock_guard<std::mutex> guard(mutex_); std::lock_guard<std::mutex> guard(mutex_);
if (bank_id >= M_AXI_MEM_NUM_BANKS) if (bank_id >= PLATFORM_MEMORY_BANKS)
return -1; return -1;
uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; uint64_t base_addr = bank_id * mem_bank_size_ + addr;
ram_->write(data, base_addr, size); ram_->write(data, base_addr, size);
/*printf("%0ld: [sim] xrt-mem-write: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size); /*printf("%0ld: [sim] xrt-mem-write: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size);
for (int i = size-1; i >= 0; --i) { for (int i = size-1; i >= 0; --i) {
printf("%02x", ((const uint8_t*)data)[i]); printf("%02x", ((const uint8_t*)data)[i]);
} }
@ -227,11 +244,11 @@ public:
int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) { int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) {
std::lock_guard<std::mutex> guard(mutex_); std::lock_guard<std::mutex> guard(mutex_);
if (bank_id >= M_AXI_MEM_NUM_BANKS) if (bank_id >= PLATFORM_MEMORY_BANKS)
return -1; return -1;
uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; uint64_t base_addr = bank_id * mem_bank_size_ + addr;
ram_->read(data, base_addr, size); ram_->read(data, base_addr, size);
/*printf("%0ld: [sim] xrt-mem-read: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size); /*printf("%0ld: [sim] xrt-mem-read: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size);
for (int i = size-1; i >= 0; --i) { for (int i = size-1; i >= 0; --i) {
printf("%02x", ((uint8_t*)data)[i]); printf("%02x", ((uint8_t*)data)[i]);
} }
@ -307,7 +324,7 @@ private:
reqs.clear(); reqs.clear();
} }
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
std::queue<mem_req_t*> empty; std::queue<mem_req_t*> empty;
std::swap(dram_queues_[i], empty); std::swap(dram_queues_[i], empty);
} }
@ -334,7 +351,7 @@ private:
void tick() { void tick() {
this->axi_mem_bus_eval(); this->axi_mem_bus_eval();
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
if (!dram_queues_[i].empty()) { if (!dram_queues_[i].empty()) {
auto mem_req = dram_queues_[i].front(); auto mem_req = dram_queues_[i].front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) { if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) {
@ -394,7 +411,7 @@ private:
} }
void axi_mem_bus_reset() { void axi_mem_bus_reset() {
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
// address read request // address read request
*m_axi_mem_[i].arready = 1; *m_axi_mem_[i].arready = 1;
@ -418,7 +435,7 @@ private:
} }
void axi_mem_bus_eval() { void axi_mem_bus_eval() {
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
// handle read responses // handle read responses
if (m_axi_states_[i].read_rsp_pending && (*m_axi_mem_[i].rready)) { if (m_axi_states_[i].read_rsp_pending && (*m_axi_mem_[i].rready)) {
*m_axi_mem_[i].rvalid = 0; *m_axi_mem_[i].rvalid = 0;
@ -434,7 +451,7 @@ private:
*m_axi_mem_[i].rid = mem_rsp->tag; *m_axi_mem_[i].rid = mem_rsp->tag;
*m_axi_mem_[i].rresp = 0; *m_axi_mem_[i].rresp = 0;
*m_axi_mem_[i].rlast = 1; *m_axi_mem_[i].rlast = 1;
memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), M_AXI_MEM_DATA_SIZE); memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE);
pending_mem_reqs_[i].erase(mem_rsp_it); pending_mem_reqs_[i].erase(mem_rsp_it);
m_axi_states_[i].read_rsp_pending = true; m_axi_states_[i].read_rsp_pending = true;
delete mem_rsp; delete mem_rsp;
@ -465,14 +482,14 @@ private:
if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) { if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) {
auto mem_req = new mem_req_t(); auto mem_req = new mem_req_t();
mem_req->tag = *m_axi_mem_[i].arid; mem_req->tag = *m_axi_mem_[i].arid;
mem_req->addr = uint64_t(*m_axi_mem_[i].araddr) * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE; mem_req->addr = i * mem_bank_size_ + uint64_t(*m_axi_mem_[i].araddr) * PLATFORM_MEMORY_DATA_SIZE;
ram_->read(mem_req->data.data(), mem_req->addr, M_AXI_MEM_DATA_SIZE); ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE);
mem_req->write = false; mem_req->write = false;
mem_req->ready = false; mem_req->ready = false;
pending_mem_reqs_[i].emplace_back(mem_req); pending_mem_reqs_[i].emplace_back(mem_req);
/*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag); /*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag);
for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) { for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", mem_req->data[i]); printf("%02x", mem_req->data[i]);
} }
printf("\n");*/ printf("\n");*/
@ -494,9 +511,9 @@ private:
auto byteen = *m_axi_mem_[i].wstrb; auto byteen = *m_axi_mem_[i].wstrb;
auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); auto data = (uint8_t*)m_axi_mem_[i].wdata->data();
auto byte_addr = m_axi_states_[i].write_req_addr * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE; auto byte_addr = i * mem_bank_size_ + m_axi_states_[i].write_req_addr * PLATFORM_MEMORY_DATA_SIZE;
for (int i = 0; i < M_AXI_MEM_DATA_SIZE; i++) { for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
if ((byteen >> i) & 0x1) { if ((byteen >> i) & 0x1) {
(*ram_)[byte_addr + i] = data[i]; (*ram_)[byte_addr + i] = data[i];
} }
@ -510,7 +527,7 @@ private:
pending_mem_reqs_[i].emplace_back(mem_req); pending_mem_reqs_[i].emplace_back(mem_req);
/*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag); /*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag);
for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) { for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", data[i]); printf("%02x", data[i]);
} }
printf("\n");*/ printf("\n");*/
@ -535,7 +552,7 @@ private:
} m_axi_state_t; } m_axi_state_t;
typedef struct { typedef struct {
std::array<uint8_t, M_AXI_MEM_DATA_SIZE> data; std::array<uint8_t, PLATFORM_MEMORY_DATA_SIZE> data;
uint32_t tag; uint32_t tag;
uint64_t addr; uint64_t addr;
bool write; bool write;
@ -545,22 +562,22 @@ private:
typedef struct { typedef struct {
CData* awvalid; CData* awvalid;
CData* awready; CData* awready;
QData* awaddr; Vl_m_addr_t* awaddr;
IData* awid; IData* awid;
CData* awlen; CData* awlen;
CData* wvalid; CData* wvalid;
CData* wready; CData* wready;
VlWide<16>* wdata; Vl_m_data_t* wdata;
QData* wstrb; QData* wstrb;
CData* wlast; CData* wlast;
CData* arvalid; CData* arvalid;
CData* arready; CData* arready;
QData* araddr; Vl_m_addr_t* araddr;
IData* arid; IData* arid;
CData* arlen; CData* arlen;
CData* rvalid; CData* rvalid;
CData* rready; CData* rready;
VlWide<16>* rdata; Vl_m_data_t* rdata;
CData* rlast; CData* rlast;
IData* rid; IData* rid;
CData* rresp; CData* rresp;
@ -573,21 +590,22 @@ private:
Vvortex_afu_shim* device_; Vvortex_afu_shim* device_;
RAM* ram_; RAM* ram_;
DramSim dram_sim_; DramSim dram_sim_;
uint64_t mem_bank_size_;
std::future<void> future_; std::future<void> future_;
bool stop_; bool stop_;
std::mutex mutex_; std::mutex mutex_;
std::list<mem_req_t*> pending_mem_reqs_[M_AXI_MEM_NUM_BANKS]; std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
m_axi_mem_t m_axi_mem_[M_AXI_MEM_NUM_BANKS]; m_axi_mem_t m_axi_mem_[PLATFORM_MEMORY_BANKS];
MemoryAllocator* mem_alloc_[M_AXI_MEM_NUM_BANKS]; MemoryAllocator* mem_alloc_[PLATFORM_MEMORY_BANKS];
m_axi_state_t m_axi_states_[M_AXI_MEM_NUM_BANKS]; m_axi_state_t m_axi_states_[PLATFORM_MEMORY_BANKS];
std::queue<mem_req_t*> dram_queues_[M_AXI_MEM_NUM_BANKS]; std::queue<mem_req_t*> dram_queues_[PLATFORM_MEMORY_BANKS];
#ifdef VCD_OUTPUT #ifdef VCD_OUTPUT
VerilatedVcdC* tfp_; VerilatedVcdC* tfp_;