mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
ramulator memory addressing bug fix + platform memory refactoring
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
This commit is contained in:
parent
e80ee2c819
commit
22398c991d
33 changed files with 310 additions and 281 deletions
|
@ -301,11 +301,11 @@ config2()
|
|||
|
||||
# test single-bank memory
|
||||
if [ "$XLEN" == "64" ]; then
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
else
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
fi
|
||||
|
||||
# test larger memory address
|
||||
|
@ -322,10 +322,10 @@ config2()
|
|||
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
|
||||
# test memory ports
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=opae --app=mstress --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=xrt --app=mstress --threads=8
|
||||
|
||||
|
|
|
@ -172,8 +172,26 @@
|
|||
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_BANKS
|
||||
`define PLATFORM_MEMORY_BANKS 2
|
||||
// Platform memory parameters
|
||||
|
||||
`ifndef PLATFORM_MEMORY_NUM_BANKS
|
||||
`define PLATFORM_MEMORY_NUM_BANKS 2
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_ADDR_WIDTH
|
||||
`ifdef XLEN_64
|
||||
`define PLATFORM_MEMORY_ADDR_WIDTH 48
|
||||
`else
|
||||
`define PLATFORM_MEMORY_ADDR_WIDTH 32
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_DATA_SIZE
|
||||
`define PLATFORM_MEMORY_DATA_SIZE 64
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_INTERLEAVE
|
||||
`define PLATFORM_MEMORY_INTERLEAVE 1
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
|
@ -656,9 +674,9 @@
|
|||
// Number of Memory Ports
|
||||
`ifndef L1_MEM_PORTS
|
||||
`ifdef L1_DISABLE
|
||||
`define L1_MEM_PORTS `MIN(DCACHE_NUM_REQS, `PLATFORM_MEMORY_BANKS)
|
||||
`define L1_MEM_PORTS `MIN(DCACHE_NUM_REQS, `PLATFORM_MEMORY_NUM_BANKS)
|
||||
`else
|
||||
`define L1_MEM_PORTS `MIN(`DCACHE_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
|
||||
`define L1_MEM_PORTS `MIN(`DCACHE_NUM_BANKS, `PLATFORM_MEMORY_NUM_BANKS)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
|
@ -735,9 +753,9 @@
|
|||
// Number of Memory Ports
|
||||
`ifndef L2_MEM_PORTS
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_MEM_PORTS `MIN(`L2_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
|
||||
`define L2_MEM_PORTS `MIN(`L2_NUM_BANKS, `PLATFORM_MEMORY_NUM_BANKS)
|
||||
`else
|
||||
`define L2_MEM_PORTS `MIN(L2_NUM_REQS, `PLATFORM_MEMORY_BANKS)
|
||||
`define L2_MEM_PORTS `MIN(L2_NUM_REQS, `PLATFORM_MEMORY_NUM_BANKS)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
|
@ -796,9 +814,9 @@
|
|||
// Number of Memory Ports
|
||||
`ifndef L3_MEM_PORTS
|
||||
`ifdef L3_ENABLE
|
||||
`define L3_MEM_PORTS `MIN(`L3_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
|
||||
`define L3_MEM_PORTS `MIN(`L3_NUM_BANKS, `PLATFORM_MEMORY_NUM_BANKS)
|
||||
`else
|
||||
`define L3_MEM_PORTS `MIN(L3_NUM_REQS, `PLATFORM_MEMORY_BANKS)
|
||||
`define L3_MEM_PORTS `MIN(L3_NUM_REQS, `PLATFORM_MEMORY_NUM_BANKS)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
|
|
|
@ -193,7 +193,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH_OUT (AXI_TID_WIDTH),
|
||||
.NUM_PORTS_IN (`VX_MEM_PORTS),
|
||||
.NUM_BANKS_OUT (AXI_NUM_BANKS),
|
||||
.INTERLEAVE (0),
|
||||
.INTERLEAVE (`PLATFORM_MEMORY_INTERLEAVE),
|
||||
.REQ_OUT_BUF ((`VX_MEM_PORTS > 1) ? 2 : 0),
|
||||
.RSP_OUT_BUF ((`VX_MEM_PORTS > 1 || AXI_NUM_BANKS > 1) ? 2 : 0)
|
||||
) axi_adapter (
|
||||
|
|
|
@ -28,18 +28,18 @@
|
|||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
//`include "platform_afu_top_config.vh"
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH (`PLATFORM_MEMORY_ADDR_WIDTH - $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8))
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH ((`PLATFORM_MEMORY_ADDR_WIDTH - $clog2(`PLATFORM_MEMORY_NUM_BANKS)) - $clog2(`PLATFORM_MEMORY_DATA_SIZE))
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH `PLATFORM_MEMORY_DATA_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH (`PLATFORM_MEMORY_DATA_SIZE * 8)
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH `PLATFORM_MEMORY_BURST_CNT_WIDTH
|
||||
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH 4
|
||||
`endif
|
||||
|
||||
package local_mem_cfg_pkg;
|
||||
|
|
|
@ -11,18 +11,14 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef NOPAE
|
||||
`include "afu_json_info.vh"
|
||||
`else
|
||||
`include "vortex_afu.vh"
|
||||
`endif
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef PLATFORM_MEMORY_INTERLEAVE
|
||||
`define PLATFORM_MEMORY_INTERLEAVE 1
|
||||
`endif
|
||||
|
||||
module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_gpu_pkg::*; #(
|
||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||
) (
|
||||
|
|
|
@ -134,10 +134,12 @@ module VX_afu_ctrl #(
|
|||
RSTATE_RESP = 2'd2,
|
||||
RSTATE_WIDTH = 2;
|
||||
|
||||
localparam MEMORY_BANK_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH - `CLOG2(`PLATFORM_MEMORY_NUM_BANKS);
|
||||
|
||||
// device caps
|
||||
wire [63:0] dev_caps = {8'b0,
|
||||
5'(`PLATFORM_MEMORY_ADDR_WIDTH-20),
|
||||
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
|
||||
5'(MEMORY_BANK_ADDR_WIDTH-20),
|
||||
3'(`CLOG2(`PLATFORM_MEMORY_NUM_BANKS)),
|
||||
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
|
||||
16'(`NUM_CORES * `NUM_CLUSTERS),
|
||||
8'(`NUM_WARPS),
|
||||
|
|
|
@ -31,7 +31,7 @@ module VX_afu_wrap #(
|
|||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
`REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`else
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`endif
|
||||
// AXI4-Lite slave interface
|
||||
input wire s_axi_ctrl_awvalid,
|
||||
|
@ -58,11 +58,7 @@ module VX_afu_wrap #(
|
|||
|
||||
output wire interrupt
|
||||
);
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_BANKS);
|
||||
`else
|
||||
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH;
|
||||
`endif
|
||||
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH - $clog2(C_M_AXI_MEM_NUM_BANKS);
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
STATE_IDLE = 0,
|
||||
|
@ -71,8 +67,8 @@ module VX_afu_wrap #(
|
|||
STATE_DONE = 3
|
||||
} state_e;
|
||||
|
||||
localparam PENDING_SIZEW = 12; // max outstanding requests size
|
||||
localparam C_M_AXI_MEM_NUM_BANKS_SW = `CLOG2(C_M_AXI_MEM_NUM_BANKS+1);
|
||||
localparam PENDING_WR_SIZEW = 12; // max outstanding requests size
|
||||
localparam NUM_MEM_BANKS_SIZEW = `CLOG2(C_M_AXI_MEM_NUM_BANKS+1);
|
||||
|
||||
wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
@ -108,11 +104,11 @@ module VX_afu_wrap #(
|
|||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
`REPEAT (1, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||
`else
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||
`endif
|
||||
|
||||
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
|
||||
reg [PENDING_SIZEW-1:0] vx_pending_writes;
|
||||
reg [PENDING_WR_SIZEW-1:0] vx_pending_writes;
|
||||
reg vx_reset = 1; // asserted at initialization
|
||||
wire vx_busy;
|
||||
|
||||
|
@ -200,7 +196,7 @@ module VX_afu_wrap #(
|
|||
end
|
||||
|
||||
wire [C_M_AXI_MEM_NUM_BANKS-1:0] m_axi_wr_req_fire, m_axi_wr_rsp_fire;
|
||||
wire [C_M_AXI_MEM_NUM_BANKS_SW-1:0] cur_wr_reqs, cur_wr_rsps;
|
||||
wire [NUM_MEM_BANKS_SIZEW-1:0] cur_wr_reqs, cur_wr_rsps;
|
||||
|
||||
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_m_axi_wr_req_fire
|
||||
VX_axi_write_ack axi_write_ack (
|
||||
|
@ -224,14 +220,14 @@ module VX_afu_wrap #(
|
|||
`POP_COUNT(cur_wr_reqs, m_axi_wr_req_fire);
|
||||
`POP_COUNT(cur_wr_rsps, m_axi_wr_rsp_fire);
|
||||
|
||||
wire signed [C_M_AXI_MEM_NUM_BANKS_SW:0] reqs_sub = (C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_reqs) -
|
||||
(C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_rsps);
|
||||
wire signed [NUM_MEM_BANKS_SIZEW:0] reqs_sub = (NUM_MEM_BANKS_SIZEW+1)'(cur_wr_reqs) -
|
||||
(NUM_MEM_BANKS_SIZEW+1)'(cur_wr_rsps);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
vx_pending_writes <= '0;
|
||||
end else begin
|
||||
vx_pending_writes <= vx_pending_writes + PENDING_SIZEW'(reqs_sub);
|
||||
vx_pending_writes <= vx_pending_writes + PENDING_WR_SIZEW'(reqs_sub);
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -270,7 +266,7 @@ module VX_afu_wrap #(
|
|||
.ap_ready (ap_ready),
|
||||
.ap_idle (ap_idle),
|
||||
.interrupt (interrupt),
|
||||
|
||||
|
||||
.ap_ctrl_read (ap_ctrl_read),
|
||||
|
||||
`ifdef SCOPE
|
||||
|
|
|
@ -17,12 +17,12 @@ module vortex_afu #(
|
|||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = (`PLATFORM_MEMORY_DATA_SIZE * 8),
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
|
||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = 1
|
||||
`else
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_NUM_BANKS
|
||||
`endif
|
||||
) (
|
||||
// System signals
|
||||
|
@ -33,7 +33,7 @@ module vortex_afu #(
|
|||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
`REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`else
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`endif
|
||||
|
||||
// AXI4-Lite slave interface
|
||||
|
@ -75,7 +75,7 @@ module vortex_afu #(
|
|||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||
`REPEAT (1, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
`else
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
`endif
|
||||
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
|
||||
.s_axi_ctrl_awready (s_axi_ctrl_awready),
|
||||
|
@ -94,7 +94,7 @@ module vortex_afu #(
|
|||
.s_axi_ctrl_rready (s_axi_ctrl_rready),
|
||||
.s_axi_ctrl_rdata (s_axi_ctrl_rdata),
|
||||
.s_axi_ctrl_rresp (s_axi_ctrl_rresp),
|
||||
|
||||
|
||||
.s_axi_ctrl_bvalid (s_axi_ctrl_bvalid),
|
||||
.s_axi_ctrl_bready (s_axi_ctrl_bready),
|
||||
.s_axi_ctrl_bresp (s_axi_ctrl_bresp),
|
||||
|
|
|
@ -14,18 +14,6 @@
|
|||
`ifndef VORTEX_AFU_VH
|
||||
`define VORTEX_AFU_VH
|
||||
|
||||
`ifndef PLATFORM_MEMORY_BANKS
|
||||
`define PLATFORM_MEMORY_BANKS 2
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_ADDR_WIDTH
|
||||
`define PLATFORM_MEMORY_ADDR_WIDTH 31
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_DATA_WIDTH
|
||||
`define PLATFORM_MEMORY_DATA_WIDTH 512
|
||||
`endif
|
||||
|
||||
`ifndef PLATFORM_MEMORY_OFFSET
|
||||
`define PLATFORM_MEMORY_OFFSET 0
|
||||
`endif
|
||||
|
|
|
@ -221,7 +221,7 @@ module VX_async_ram_patch #(
|
|||
VX_placeholder #(
|
||||
.O (1)
|
||||
) placeholder2 (
|
||||
.in (),
|
||||
.in (1'b0),
|
||||
.out (is_raddr_reg)
|
||||
);
|
||||
wire [DATAW-1:0] rdata_a;
|
||||
|
|
|
@ -280,7 +280,13 @@ module VX_axi_adapter #(
|
|||
end
|
||||
|
||||
assign m_axi_arvalid[i] = req_xbar_valid_out[i] && ~xbar_rw_out;
|
||||
assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(xbar_addr_out) << LOG2_DATA_SIZE;
|
||||
|
||||
// convert address to byte-addressable space
|
||||
if (INTERLEAVE) begin : g_m_axi_araddr_i
|
||||
assign m_axi_araddr[i] = (ADDR_WIDTH_OUT'(xbar_addr_out) << (BANK_SEL_BITS + LOG2_DATA_SIZE)) | (ADDR_WIDTH_OUT'(i) << LOG2_DATA_SIZE);
|
||||
end else begin : g_m_axi_araddr_ni
|
||||
assign m_axi_araddr[i] = (ADDR_WIDTH_OUT'(xbar_addr_out) << LOG2_DATA_SIZE) | (ADDR_WIDTH_OUT'(i) << (BANK_ADDR_WIDTH + LOG2_DATA_SIZE));
|
||||
end
|
||||
assign m_axi_arid[i] = TAG_WIDTH_OUT'(xbar_tag_r_out);
|
||||
assign m_axi_arlen[i] = 8'b00000000;
|
||||
assign m_axi_arsize[i] = 3'(LOG2_DATA_SIZE);
|
||||
|
|
|
@ -7,22 +7,6 @@ include ../../common.mk
|
|||
# AFU parameters
|
||||
CONFIGS += -DNOPAE
|
||||
CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
||||
ifeq ($(XLEN),64)
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47
|
||||
else
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31
|
||||
endif
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4
|
||||
endif
|
||||
|
||||
#CONFIGS += -DNUM_CORES=2
|
||||
#CONFIGS += -DNUM_WARPS=32
|
||||
|
|
|
@ -99,7 +99,7 @@ ifdef PERF
|
|||
endif
|
||||
|
||||
# ast dump flags
|
||||
XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 -DPLATFORM_MEMORY_DATA_WIDTH=512 -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI
|
||||
XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_NUM_BANKS=1 -DNOPAE -DSV_DPI
|
||||
|
||||
all: swconfig ip-gen setup build
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ foreach def $vdefines_list {
|
|||
if { $name == "CHIPSCOPE" } {
|
||||
set chipscope 1
|
||||
}
|
||||
if { $name == "PLATFORM_MEMORY_BANKS" } {
|
||||
if { $name == "PLATFORM_MEMORY_NUM_BANKS" } {
|
||||
set num_banks [lindex $fields 1]
|
||||
}
|
||||
if { $name == "PLATFORM_MERGED_MEMORY_INTERFACE" } {
|
||||
|
|
|
@ -5,31 +5,36 @@ CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
|
|||
|
||||
ifeq ($(DEV_ARCH), zynquplus)
|
||||
# zynquplus
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32
|
||||
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32
|
||||
else ifeq ($(DEV_ARCH), versal)
|
||||
# versal
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32
|
||||
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32
|
||||
ifneq ($(findstring xilinx_vck5000,$(XSA)),)
|
||||
CONFIGS += -DPLATFORM_MEMORY_OFFSET=40'hC000000000
|
||||
endif
|
||||
else
|
||||
# alveo
|
||||
ifneq ($(findstring xilinx_u55c,$(XSA)),)
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=29
|
||||
# 16 GB of HBM2 with 32 channels (512 MB per channel)
|
||||
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=34
|
||||
CONFIGS += -DPLATFORM_MERGED_MEMORY_INTERFACE
|
||||
VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31]
|
||||
#VPP_FLAGS += $(foreach i,$(shell seq 0 31), --connectivity.sp vortex_afu_1.m_axi_mem_$(i):HBM[$(i)])
|
||||
else ifneq ($(findstring xilinx_u50,$(XSA)),)
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28
|
||||
# 8 GB of HBM2 with 32 channels (256 MB per channel)
|
||||
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=33
|
||||
VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31]
|
||||
else ifneq ($(findstring xilinx_u280,$(XSA)),)
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28
|
||||
# 8 GB of HBM2 with 32 channels (256 MB per channel)
|
||||
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=33
|
||||
VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31]
|
||||
else ifneq ($(findstring xilinx_u250,$(XSA)),)
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34
|
||||
# 64 GB of DDR4 with 4 channels (16 GB per channel)
|
||||
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=36
|
||||
else ifneq ($(findstring xilinx_u200,$(XSA)),)
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34
|
||||
# 64 GB of DDR4 with 4 channels (16 GB per channel)
|
||||
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=36
|
||||
else
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32
|
||||
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32
|
||||
endif
|
||||
endif
|
||||
|
|
|
@ -78,10 +78,10 @@ public:
|
|||
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
||||
break;
|
||||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = PLATFORM_MEMORY_BANKS;
|
||||
_value = PLATFORM_MEMORY_NUM_BANKS;
|
||||
break;
|
||||
case VX_CAPS_MEM_BANK_SIZE:
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_BANKS);
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_NUM_BANKS);
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
|
|
|
@ -113,10 +113,10 @@ public:
|
|||
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
||||
break;
|
||||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = PLATFORM_MEMORY_BANKS;
|
||||
_value = PLATFORM_MEMORY_NUM_BANKS;
|
||||
break;
|
||||
case VX_CAPS_MEM_BANK_SIZE:
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_BANKS);
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_NUM_BANKS);
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
|
|
|
@ -29,19 +29,54 @@ using namespace vortex;
|
|||
|
||||
class DramSim::Impl {
|
||||
private:
|
||||
struct mem_req_t {
|
||||
uint64_t addr;
|
||||
bool is_write;
|
||||
ResponseCallback callback;
|
||||
void* arg;
|
||||
};
|
||||
|
||||
Ramulator::IFrontEnd* ramulator_frontend_;
|
||||
Ramulator::IMemorySystem* ramulator_memorysystem_;
|
||||
uint32_t cpu_channel_size_;
|
||||
uint64_t cpu_cycles_;
|
||||
uint32_t scaled_dram_cycles_;
|
||||
static const uint32_t tick_cycles_ = 1000;
|
||||
static const uint32_t dram_channel_size_ = 16; // 128 bits
|
||||
std::queue<mem_req_t> pending_reqs_;
|
||||
|
||||
void handle_pending_requests() {
|
||||
if (pending_reqs_.empty())
|
||||
return;
|
||||
auto& req = pending_reqs_.front();
|
||||
auto req_type = req.is_write ? Ramulator::Request::Type::Write : Ramulator::Request::Type::Read;
|
||||
std::function<void(Ramulator::Request&)> callback = nullptr;
|
||||
if (req.callback) {
|
||||
callback = [req_callback = std::move(req.callback), req_arg = std::move(req.arg)](Ramulator::Request& /*dram_req*/) {
|
||||
req_callback(req_arg);
|
||||
};
|
||||
}
|
||||
if (ramulator_frontend_->receive_external_requests(req_type, req.addr, 0, callback)) {
|
||||
if (req.is_write) {
|
||||
// Ramulator does not handle write responses, so we fire the callback ourselves.
|
||||
if (req.callback) {
|
||||
req.callback(req.arg);
|
||||
}
|
||||
}
|
||||
pending_reqs_.pop();
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
Impl(int clock_ratio) {
|
||||
Impl(uint32_t num_channels, uint32_t channel_size, float clock_ratio) {
|
||||
YAML::Node dram_config;
|
||||
dram_config["Frontend"]["impl"] = "GEM5";
|
||||
dram_config["MemorySystem"]["impl"] = "GenericDRAM";
|
||||
dram_config["MemorySystem"]["clock_ratio"] = clock_ratio;
|
||||
dram_config["MemorySystem"]["clock_ratio"] = 1;
|
||||
dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2";
|
||||
dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb";
|
||||
dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192;
|
||||
dram_config["MemorySystem"]["DRAM"]["org"]["channel"] = 8;
|
||||
dram_config["MemorySystem"]["DRAM"]["org"]["channel"] = num_channels;
|
||||
dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps";
|
||||
dram_config["MemorySystem"]["Controller"]["impl"] = "Generic";
|
||||
dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS";
|
||||
|
@ -59,6 +94,10 @@ public:
|
|||
ramulator_memorysystem_ = Ramulator::Factory::create_memory_system(dram_config);
|
||||
ramulator_frontend_->connect_memory_system(ramulator_memorysystem_);
|
||||
ramulator_memorysystem_->connect_frontend(ramulator_frontend_);
|
||||
|
||||
cpu_channel_size_ = channel_size;
|
||||
scaled_dram_cycles_ = static_cast<uint64_t>(clock_ratio * tick_cycles_);
|
||||
this->reset();
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
|
@ -66,41 +105,49 @@ public:
|
|||
auto original_buf = std::cout.rdbuf();
|
||||
std::cout.rdbuf(nullstream.rdbuf());
|
||||
ramulator_frontend_->finalize();
|
||||
ramulator_memorysystem_->finalize();
|
||||
ramulator_memorysystem_->finalize();
|
||||
std::cout.rdbuf(original_buf);
|
||||
}
|
||||
|
||||
void reset() {
|
||||
//--
|
||||
cpu_cycles_ = 0;
|
||||
}
|
||||
|
||||
void tick() {
|
||||
ramulator_memorysystem_->tick();
|
||||
cpu_cycles_ += tick_cycles_;
|
||||
while (cpu_cycles_ >= scaled_dram_cycles_) {
|
||||
this->handle_pending_requests();
|
||||
ramulator_memorysystem_->tick();
|
||||
cpu_cycles_ -= scaled_dram_cycles_;
|
||||
}
|
||||
}
|
||||
|
||||
bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback response_cb, void* arg) {
|
||||
if (!ramulator_frontend_->receive_external_requests(
|
||||
is_write ? Ramulator::Request::Type::Write : Ramulator::Request::Type::Read,
|
||||
addr,
|
||||
source_id,
|
||||
[callback_ = std::move(response_cb), arg_ = std::move(arg)](Ramulator::Request& /*dram_req*/) {
|
||||
callback_(arg_);
|
||||
void send_request(uint64_t addr, bool is_write, ResponseCallback response_cb, void* arg) {
|
||||
// enqueue the request
|
||||
if (cpu_channel_size_ > dram_channel_size_) {
|
||||
uint32_t n = cpu_channel_size_ / dram_channel_size_;
|
||||
for (uint32_t i = 0; i < n; ++i) {
|
||||
uint64_t dram_byte_addr = (addr / cpu_channel_size_) * dram_channel_size_ + (i * dram_channel_size_);
|
||||
if (i == 0) {
|
||||
pending_reqs_.push({dram_byte_addr, is_write, response_cb, arg});
|
||||
} else {
|
||||
pending_reqs_.push({dram_byte_addr, is_write, nullptr, nullptr});
|
||||
}
|
||||
}
|
||||
)) {
|
||||
return false;
|
||||
} else if (cpu_channel_size_ < dram_channel_size_) {
|
||||
uint64_t dram_byte_addr = (addr / cpu_channel_size_) * dram_channel_size_;
|
||||
pending_reqs_.push({dram_byte_addr, is_write, response_cb, arg});
|
||||
} else {
|
||||
uint64_t dram_byte_addr = addr;
|
||||
pending_reqs_.push({dram_byte_addr, is_write, response_cb, arg});
|
||||
}
|
||||
if (is_write) {
|
||||
// Ramulator does not handle write responses, so we call the callback ourselves
|
||||
response_cb(arg);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
DramSim::DramSim(int clock_ratio)
|
||||
: impl_(new Impl(clock_ratio))
|
||||
DramSim::DramSim(uint32_t num_channels, uint32_t channel_size, float clock_ratio)
|
||||
: impl_(new Impl(num_channels, channel_size, clock_ratio))
|
||||
{}
|
||||
|
||||
DramSim::~DramSim() {
|
||||
|
@ -115,6 +162,6 @@ void DramSim::tick() {
|
|||
impl_->tick();
|
||||
}
|
||||
|
||||
bool DramSim::send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback callback, void* arg) {
|
||||
return impl_->send_request(is_write, addr, source_id, callback, arg);
|
||||
void DramSim::send_request(uint64_t addr, bool is_write, ResponseCallback callback, void* arg) {
|
||||
impl_->send_request(addr, is_write, callback, arg);
|
||||
}
|
|
@ -19,14 +19,15 @@ class DramSim {
|
|||
public:
|
||||
typedef void (*ResponseCallback)(void *arg);
|
||||
|
||||
DramSim(int clock_ratio);
|
||||
DramSim(uint32_t num_channels, uint32_t channel_size, float clock_ratio);
|
||||
~DramSim();
|
||||
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
|
||||
bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback response_cb, void* arg);
|
||||
// addr: per-channel block address
|
||||
void send_request(uint64_t addr, bool is_write, ResponseCallback response_cb, void* arg);
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
|
|
|
@ -31,24 +31,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE
|
|||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
|
||||
|
||||
# Platform parameters
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
||||
ifeq ($(XLEN),64)
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47
|
||||
else
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31
|
||||
endif
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4
|
||||
endif
|
||||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
|
|
|
@ -35,8 +35,6 @@
|
|||
#include <unordered_map>
|
||||
#include <util.h>
|
||||
|
||||
#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8)
|
||||
|
||||
#ifndef MEM_CLOCK_RATIO
|
||||
#define MEM_CLOCK_RATIO 1
|
||||
#endif
|
||||
|
@ -66,6 +64,8 @@
|
|||
|
||||
using namespace vortex;
|
||||
|
||||
static uint32_t g_mem_bank_addr_width = (PLATFORM_MEMORY_ADDR_WIDTH - log2ceil(PLATFORM_MEMORY_NUM_BANKS));
|
||||
|
||||
static uint64_t timestamp = 0;
|
||||
|
||||
double sc_time_stamp() {
|
||||
|
@ -95,7 +95,7 @@ public:
|
|||
Impl()
|
||||
: device_(nullptr)
|
||||
, ram_(nullptr)
|
||||
, dram_sim_(MEM_CLOCK_RATIO)
|
||||
, dram_sim_(PLATFORM_MEMORY_NUM_BANKS, PLATFORM_MEMORY_DATA_SIZE, MEM_CLOCK_RATIO)
|
||||
, stop_(false)
|
||||
, host_buffer_ids_(0)
|
||||
#ifdef VCD_OUTPUT
|
||||
|
@ -146,9 +146,6 @@ public:
|
|||
// allocate RAM
|
||||
ram_ = new RAM(0, RAM_PAGE_SIZE);
|
||||
|
||||
// calculate memory bank size
|
||||
mem_bank_size_ = 1ull << PLATFORM_MEMORY_ADDR_WIDTH;
|
||||
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
|
@ -274,16 +271,15 @@ private:
|
|||
|
||||
if (!dram_queue_.empty()) {
|
||||
auto mem_req = dram_queue_.front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, mem_req->bank_id, [](void* arg) {
|
||||
dram_sim_.send_request(mem_req->addr, mem_req->write, [](void* arg) {
|
||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||
if (orig_req->ready) {
|
||||
delete orig_req;
|
||||
} else {
|
||||
orig_req->ready = true;
|
||||
}
|
||||
}, mem_req)) {
|
||||
dram_queue_.pop();
|
||||
}
|
||||
}, mem_req);
|
||||
dram_queue_.pop();
|
||||
}
|
||||
|
||||
dram_sim_.tick();
|
||||
|
@ -407,14 +403,14 @@ private:
|
|||
}
|
||||
|
||||
void avs_bus_reset() {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
device_->avs_readdatavalid[b] = 0;
|
||||
device_->avs_waitrequest[b] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void avs_bus_eval() {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
// process memory responses
|
||||
device_->avs_readdatavalid[b] = 0;
|
||||
if (!pending_mem_reqs_[b].empty()
|
||||
|
@ -430,7 +426,12 @@ private:
|
|||
|
||||
// process memory requests
|
||||
assert(!device_->avs_read[b] || !device_->avs_write[b]);
|
||||
uint64_t byte_addr = b * mem_bank_size_ + uint64_t(device_->avs_address[b]) * PLATFORM_MEMORY_DATA_SIZE;
|
||||
#if PLATFORM_MEMORY_INTERLEAVE == 1
|
||||
uint64_t byte_addr = (uint64_t(device_->avs_address[b]) * PLATFORM_MEMORY_NUM_BANKS + b) * PLATFORM_MEMORY_DATA_SIZE;
|
||||
#else
|
||||
uint64_t byte_addr = (uint64_t(device_->avs_address[b]) + (b << g_mem_bank_addr_width)) * PLATFORM_MEMORY_DATA_SIZE;
|
||||
#endif
|
||||
|
||||
if (device_->avs_write[b]) {
|
||||
// process write request
|
||||
uint64_t byteen = device_->avs_byteenable[b];
|
||||
|
@ -515,9 +516,8 @@ private:
|
|||
|
||||
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
|
||||
uint64_t host_buffer_ids_;
|
||||
uint64_t mem_bank_size_;
|
||||
|
||||
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
|
||||
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_NUM_BANKS];
|
||||
|
||||
std::list<cci_rd_req_t> cci_reads_;
|
||||
std::list<cci_wr_req_t> cci_writes_;
|
||||
|
|
|
@ -78,22 +78,22 @@ module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; (
|
|||
output t_ccip_mmioData af2cp_sTxPort_c2_data,
|
||||
|
||||
// Avalon signals for local memory access
|
||||
output t_local_mem_data avs_writedata [`PLATFORM_MEMORY_BANKS],
|
||||
input t_local_mem_data avs_readdata [`PLATFORM_MEMORY_BANKS],
|
||||
output t_local_mem_addr avs_address [`PLATFORM_MEMORY_BANKS],
|
||||
input logic avs_waitrequest [`PLATFORM_MEMORY_BANKS],
|
||||
output logic avs_write [`PLATFORM_MEMORY_BANKS],
|
||||
output logic avs_read [`PLATFORM_MEMORY_BANKS],
|
||||
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_MEMORY_BANKS],
|
||||
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_MEMORY_BANKS],
|
||||
input avs_readdatavalid [`PLATFORM_MEMORY_BANKS]
|
||||
output t_local_mem_data avs_writedata [`PLATFORM_MEMORY_NUM_BANKS],
|
||||
input t_local_mem_data avs_readdata [`PLATFORM_MEMORY_NUM_BANKS],
|
||||
output t_local_mem_addr avs_address [`PLATFORM_MEMORY_NUM_BANKS],
|
||||
input logic avs_waitrequest [`PLATFORM_MEMORY_NUM_BANKS],
|
||||
output logic avs_write [`PLATFORM_MEMORY_NUM_BANKS],
|
||||
output logic avs_read [`PLATFORM_MEMORY_NUM_BANKS],
|
||||
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_MEMORY_NUM_BANKS],
|
||||
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_MEMORY_NUM_BANKS],
|
||||
input avs_readdatavalid [`PLATFORM_MEMORY_NUM_BANKS]
|
||||
);
|
||||
|
||||
t_if_ccip_Rx cp2af_sRxPort;
|
||||
t_if_ccip_Tx af2cp_sTxPort;
|
||||
|
||||
vortex_afu #(
|
||||
.NUM_LOCAL_MEM_BANKS(`PLATFORM_MEMORY_BANKS)
|
||||
.NUM_LOCAL_MEM_BANKS(`PLATFORM_MEMORY_NUM_BANKS)
|
||||
) afu (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
|
|
|
@ -24,21 +24,6 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
|
|||
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR
|
||||
|
||||
# Platform parameters
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
||||
ifeq ($(XLEN),64)
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47
|
||||
else
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31
|
||||
endif
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
|
||||
endif
|
||||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
||||
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
|
||||
|
|
|
@ -35,8 +35,6 @@
|
|||
#include <dram_sim.h>
|
||||
#include <util.h>
|
||||
|
||||
#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8)
|
||||
|
||||
#ifndef MEM_CLOCK_RATIO
|
||||
#define MEM_CLOCK_RATIO 1
|
||||
#endif
|
||||
|
@ -66,6 +64,8 @@ typedef uint64_t Word;
|
|||
|
||||
using namespace vortex;
|
||||
|
||||
static uint32_t g_mem_bank_addr_width = (PLATFORM_MEMORY_ADDR_WIDTH - log2ceil(PLATFORM_MEMORY_NUM_BANKS));
|
||||
|
||||
static uint64_t timestamp = 0;
|
||||
|
||||
double sc_time_stamp() {
|
||||
|
@ -93,7 +93,7 @@ void sim_trace_enable(bool enable) {
|
|||
|
||||
class Processor::Impl {
|
||||
public:
|
||||
Impl() : dram_sim_(MEM_CLOCK_RATIO) {
|
||||
Impl() : dram_sim_(PLATFORM_MEMORY_NUM_BANKS, PLATFORM_MEMORY_DATA_SIZE, MEM_CLOCK_RATIO) {
|
||||
// force random values for uninitialized signals
|
||||
Verilated::randReset(VERILATOR_RESET_VALUE);
|
||||
Verilated::randSeed(50);
|
||||
|
@ -154,7 +154,7 @@ public:
|
|||
|
||||
// start
|
||||
device_->reset = 0;
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
device_->mem_req_ready[b] = 1;
|
||||
}
|
||||
|
||||
|
@ -195,7 +195,7 @@ private:
|
|||
reqs.clear();
|
||||
}
|
||||
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
std::queue<mem_req_t*> empty;
|
||||
std::swap(dram_queue_[b], empty);
|
||||
}
|
||||
|
@ -224,17 +224,15 @@ private:
|
|||
|
||||
dram_sim_.tick();
|
||||
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
if (!dram_queue_[b].empty()) {
|
||||
auto mem_req = dram_queue_[b].front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, b, [](void* arg) {
|
||||
dram_sim_.send_request(mem_req->addr, mem_req->write, [](void* arg) {
|
||||
// mark completed request as ready
|
||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||
orig_req->ready = true;
|
||||
}, mem_req)) {
|
||||
// was successfully sent to dram, remove from queue
|
||||
dram_queue_[b].pop();
|
||||
}
|
||||
}, mem_req);
|
||||
dram_queue_[b].pop();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -254,7 +252,7 @@ private:
|
|||
}
|
||||
|
||||
void mem_bus_reset() {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
device_->mem_req_ready[b] = 0;
|
||||
device_->mem_rsp_valid[b] = 0;
|
||||
}
|
||||
|
@ -262,13 +260,13 @@ private:
|
|||
|
||||
void mem_bus_eval(bool clk) {
|
||||
if (!clk) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
mem_rd_rsp_ready_[b] = device_->mem_rsp_ready[b];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
// process memory responses
|
||||
if (device_->mem_rsp_valid[b] && mem_rd_rsp_ready_[b]) {
|
||||
device_->mem_rsp_valid[b] = 0;
|
||||
|
@ -293,11 +291,16 @@ private:
|
|||
|
||||
// process memory requests
|
||||
if (device_->mem_req_valid[b] && device_->mem_req_ready[b]) {
|
||||
uint64_t byte_addr = (device_->mem_req_addr[b] * PLATFORM_MEMORY_DATA_SIZE);
|
||||
#if PLATFORM_MEMORY_INTERLEAVE == 1
|
||||
uint64_t byte_addr = (uint64_t(device_->mem_req_addr[b]) * PLATFORM_MEMORY_NUM_BANKS + b) * PLATFORM_MEMORY_DATA_SIZE;
|
||||
#else
|
||||
uint64_t byte_addr = (uint64_t(device_->mem_req_addr[b]) + (b << g_mem_bank_addr_width)) * PLATFORM_MEMORY_DATA_SIZE;
|
||||
#endif
|
||||
// check read/write
|
||||
if (device_->mem_req_rw[b]) {
|
||||
auto byteen = device_->mem_req_byteen[b];
|
||||
auto data = VDataCast<uint8_t*, PLATFORM_MEMORY_DATA_SIZE>::get(device_->mem_req_data[b]);
|
||||
// check address range
|
||||
// check if console output address
|
||||
if (byte_addr >= uint64_t(IO_COUT_ADDR)
|
||||
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||
// process console output
|
||||
|
@ -313,21 +316,23 @@ private:
|
|||
}
|
||||
}
|
||||
} else {
|
||||
// process writes
|
||||
// process memory writes
|
||||
/*printf("%0ld: [sim] MEM Wr Req[%d]: addr=0x%0lx, tag=0x%0lx, byteen=0x", timestamp, b, byte_addr, device_->mem_req_tag[b]);
|
||||
for (int i = (PLATFORM_MEMORY_DATA_SIZE/4)-1; i >= 0; --i) {
|
||||
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
|
||||
}
|
||||
printf(", data=0x");
|
||||
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
|
||||
printf("%d=%02x,", i, data[i]);
|
||||
printf("%02x", data[i]);
|
||||
}
|
||||
printf("\n");*/
|
||||
|
||||
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[byte_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->mem_req_tag[b];
|
||||
mem_req->addr = byte_addr;
|
||||
|
@ -341,7 +346,7 @@ private:
|
|||
pending_mem_reqs_[b].emplace_back(mem_req);
|
||||
}
|
||||
} else {
|
||||
// process reads
|
||||
// process memory reads
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->mem_req_tag[b];
|
||||
mem_req->addr = byte_addr;
|
||||
|
@ -388,11 +393,11 @@ private:
|
|||
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
||||
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
|
||||
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_NUM_BANKS];
|
||||
|
||||
std::queue<mem_req_t*> dram_queue_[PLATFORM_MEMORY_BANKS];
|
||||
std::queue<mem_req_t*> dram_queue_[PLATFORM_MEMORY_NUM_BANKS];
|
||||
|
||||
std::array<bool, PLATFORM_MEMORY_BANKS> mem_rd_rsp_ready_;
|
||||
std::array<bool, PLATFORM_MEMORY_NUM_BANKS> mem_rd_rsp_ready_;
|
||||
|
||||
DramSim dram_sim_;
|
||||
|
||||
|
|
|
@ -14,9 +14,9 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module rtlsim_shim import VX_gpu_pkg::*; #(
|
||||
parameter MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
|
||||
parameter MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH,
|
||||
parameter MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS,
|
||||
parameter MEM_DATA_WIDTH = (`PLATFORM_MEMORY_DATA_SIZE * 8),
|
||||
parameter MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH - $clog2(`PLATFORM_MEMORY_NUM_BANKS),
|
||||
parameter MEM_NUM_BANKS = `PLATFORM_MEMORY_NUM_BANKS,
|
||||
parameter MEM_TAG_WIDTH = 64
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
@ -159,7 +159,7 @@ module rtlsim_shim import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH_OUT (MEM_TAG_WIDTH),
|
||||
.NUM_PORTS_IN (`VX_MEM_PORTS),
|
||||
.NUM_BANKS_OUT (MEM_NUM_BANKS),
|
||||
.INTERLEAVE (0),
|
||||
.INTERLEAVE (`PLATFORM_MEMORY_INTERLEAVE),
|
||||
.REQ_OUT_BUF ((`VX_MEM_PORTS > 1) ? 2 : 0),
|
||||
.RSP_OUT_BUF ((`VX_MEM_PORTS > 1 || MEM_NUM_BANKS > 1) ? 2 : 0)
|
||||
) mem_bank_adapter (
|
||||
|
|
|
@ -43,8 +43,13 @@ public:
|
|||
|
||||
char sname[100];
|
||||
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
|
||||
uint32_t wsel_bits = log2ceil(config_.line_size);
|
||||
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::Priority, config.num_reqs, (1 << config.B), wsel_bits);
|
||||
uint32_t lg2_line_size = log2ceil(config_.line_size);
|
||||
uint32_t num_banks = 1 << config.B;
|
||||
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::Priority, config.num_reqs, num_banks, 1,
|
||||
[lg2_line_size, num_banks](const MemCrossBar::ReqType& req) {
|
||||
// Custom logic to calculate the output index using bank interleaving
|
||||
return (uint32_t)((req.addr >> lg2_line_size) & (num_banks-1));
|
||||
});
|
||||
for (uint32_t i = 0; i < config.num_reqs; ++i) {
|
||||
simobject->Inputs.at(i).bind(&mem_xbar_->ReqIn.at(i));
|
||||
mem_xbar_->RspIn.at(i).bind(&simobject->Outputs.at(i));
|
||||
|
|
|
@ -30,7 +30,6 @@ private:
|
|||
MemCrossBar::Ptr mem_xbar_;
|
||||
DramSim dram_sim_;
|
||||
mutable PerfStats perf_stats_;
|
||||
|
||||
struct DramCallbackArgs {
|
||||
MemSim::Impl* memsim;
|
||||
MemReq request;
|
||||
|
@ -41,11 +40,15 @@ public:
|
|||
Impl(MemSim* simobject, const Config& config)
|
||||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, dram_sim_(MEM_CLOCK_RATIO)
|
||||
, dram_sim_(config.num_banks, config.block_size, config.clock_ratio)
|
||||
{
|
||||
char sname[100];
|
||||
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
|
||||
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_ports, config.num_banks);
|
||||
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_ports, config.num_banks, 1,
|
||||
[lg2_block_size = log2ceil(config.block_size), num_banks = config.num_banks](const MemCrossBar::ReqType& req) {
|
||||
// Custom logic to calculate the output index using bank interleaving
|
||||
return (uint32_t)((req.addr >> lg2_block_size) & (num_banks-1));
|
||||
});
|
||||
for (uint32_t i = 0; i < config.num_ports; ++i) {
|
||||
simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i));
|
||||
mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i));
|
||||
|
@ -74,16 +77,15 @@ public:
|
|||
|
||||
auto& mem_req = mem_xbar_->ReqOut.at(i).front();
|
||||
|
||||
// try to enqueue the request to the memory system
|
||||
// enqueue the request to the memory system
|
||||
auto req_args = new DramCallbackArgs{this, mem_req, i};
|
||||
auto enqueue_success = dram_sim_.send_request(
|
||||
mem_req.write,
|
||||
dram_sim_.send_request(
|
||||
mem_req.addr,
|
||||
0,
|
||||
mem_req.write,
|
||||
[](void* arg) {
|
||||
auto rsp_args = reinterpret_cast<const DramCallbackArgs*>(arg);
|
||||
// only send a response for read requests
|
||||
if (!rsp_args->request.write) {
|
||||
// only send a response for read requests
|
||||
MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
|
||||
rsp_args->memsim->mem_xbar_->RspOut.at(rsp_args->bank_id).push(mem_rsp, 1);
|
||||
DT(3, rsp_args->memsim->simobject_->name() << "-mem-rsp[" << rsp_args->bank_id << "]: " << mem_rsp);
|
||||
|
@ -93,14 +95,7 @@ public:
|
|||
req_args
|
||||
);
|
||||
|
||||
// check if the request was enqueued successfully
|
||||
if (!enqueue_success) {
|
||||
delete req_args;
|
||||
continue;
|
||||
}
|
||||
|
||||
DT(3, simobject_->name() << "-mem-req[" << i << "]: " << mem_req);
|
||||
|
||||
mem_xbar_->ReqOut.at(i).pop();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,8 @@ public:
|
|||
struct Config {
|
||||
uint32_t num_banks;
|
||||
uint32_t num_ports;
|
||||
uint32_t block_size;
|
||||
float clock_ratio;
|
||||
};
|
||||
|
||||
struct PerfStats {
|
||||
|
|
|
@ -22,10 +22,14 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
{
|
||||
SimPlatform::instance().initialize();
|
||||
|
||||
assert(PLATFORM_MEMORY_DATA_SIZE == MEM_BLOCK_SIZE);
|
||||
|
||||
// create memory simulator
|
||||
memsim_ = MemSim::Create("dram", MemSim::Config{
|
||||
PLATFORM_MEMORY_BANKS,
|
||||
L3_MEM_PORTS
|
||||
PLATFORM_MEMORY_NUM_BANKS,
|
||||
L3_MEM_PORTS,
|
||||
MEM_BLOCK_SIZE,
|
||||
MEM_CLOCK_RATIO
|
||||
});
|
||||
|
||||
// create clusters
|
||||
|
|
|
@ -398,6 +398,8 @@ inline std::ostream &operator<<(std::ostream &os, const MemRsp& rsp) {
|
|||
template <typename T>
|
||||
class HashTable {
|
||||
public:
|
||||
typedef T DataType;
|
||||
|
||||
HashTable(uint32_t capacity)
|
||||
: entries_(capacity)
|
||||
, size_(0)
|
||||
|
@ -470,6 +472,8 @@ private:
|
|||
template <typename Type>
|
||||
class Arbiter : public SimObject<Arbiter<Type>> {
|
||||
public:
|
||||
typedef Type ReqType;
|
||||
|
||||
std::vector<SimPort<Type>> Inputs;
|
||||
std::vector<SimPort<Type>> Outputs;
|
||||
|
||||
|
@ -556,6 +560,8 @@ protected:
|
|||
template <typename Type>
|
||||
class CrossBar : public SimObject<CrossBar<Type>> {
|
||||
public:
|
||||
typedef Type ReqType;
|
||||
|
||||
std::vector<SimPort<Type>> Inputs;
|
||||
std::vector<SimPort<Type>> Outputs;
|
||||
|
||||
|
@ -565,8 +571,8 @@ public:
|
|||
ArbiterType type,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_outputs = 1,
|
||||
uint32_t addr_start = 0,
|
||||
uint32_t delay = 1
|
||||
uint32_t delay = 1,
|
||||
std::function<uint32_t(const Type& req)> output_sel = nullptr
|
||||
)
|
||||
: SimObject<CrossBar<Type>>(ctx, name)
|
||||
, Inputs(num_inputs, this)
|
||||
|
@ -576,12 +582,18 @@ public:
|
|||
, grants_(num_outputs, 0)
|
||||
, lg2_inputs_(log2ceil(num_inputs))
|
||||
, lg2_outputs_(log2ceil(num_outputs))
|
||||
, addr_start_(addr_start)
|
||||
, collisions_(0) {
|
||||
assert(delay != 0);
|
||||
assert(num_inputs <= 64);
|
||||
assert(num_outputs <= 64);
|
||||
assert(ispow2(num_outputs));
|
||||
if (output_sel != nullptr) {
|
||||
output_sel_ = output_sel;
|
||||
} else {
|
||||
output_sel_ = [this](const Type& req) {
|
||||
return (uint32_t)bit_getw(req.addr, 0, (lg2_outputs_-1));
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void reset() {
|
||||
|
@ -609,7 +621,8 @@ public:
|
|||
auto& req = req_in.front();
|
||||
uint32_t output_idx = 0;
|
||||
if (lg2_outputs_ != 0) {
|
||||
output_idx = (uint32_t)bit_getw(req.addr, addr_start_, addr_start_ + (lg2_outputs_-1));
|
||||
// select output index
|
||||
output_idx = output_sel_(req);
|
||||
// skip if input is not going to current output
|
||||
if (output_idx != o)
|
||||
continue;
|
||||
|
@ -649,7 +662,7 @@ protected:
|
|||
std::vector<uint32_t> grants_;
|
||||
uint32_t lg2_inputs_;
|
||||
uint32_t lg2_outputs_;
|
||||
uint32_t addr_start_;
|
||||
std::function<uint32_t(const Type& req)> output_sel_;
|
||||
uint64_t collisions_;
|
||||
};
|
||||
|
||||
|
@ -658,6 +671,9 @@ protected:
|
|||
template <typename Req, typename Rsp>
|
||||
class TxArbiter : public SimObject<TxArbiter<Req, Rsp>> {
|
||||
public:
|
||||
typedef Req ReqType;
|
||||
typedef Rsp RspType;
|
||||
|
||||
std::vector<SimPort<Req>> ReqIn;
|
||||
std::vector<SimPort<Rsp>> RspIn;
|
||||
|
||||
|
@ -771,6 +787,9 @@ protected:
|
|||
template <typename Req, typename Rsp>
|
||||
class TxCrossBar : public SimObject<TxCrossBar<Req, Rsp>> {
|
||||
public:
|
||||
typedef Req ReqType;
|
||||
typedef Rsp RspType;
|
||||
|
||||
std::vector<SimPort<Req>> ReqIn;
|
||||
std::vector<SimPort<Rsp>> RspIn;
|
||||
|
||||
|
@ -783,8 +802,8 @@ public:
|
|||
ArbiterType type,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_outputs = 1,
|
||||
uint32_t addr_start = 0,
|
||||
uint32_t delay = 1
|
||||
uint32_t delay = 1,
|
||||
std::function<uint32_t(const Req& req)> output_sel = nullptr
|
||||
)
|
||||
: SimObject<TxCrossBar<Req, Rsp>>(ctx, name)
|
||||
, ReqIn(num_inputs, this)
|
||||
|
@ -797,7 +816,6 @@ public:
|
|||
, rsp_grants_(num_inputs, 0)
|
||||
, lg2_inputs_(log2ceil(num_inputs))
|
||||
, lg2_outputs_(log2ceil(num_outputs))
|
||||
, addr_start_(addr_start)
|
||||
, req_collisions_(0)
|
||||
, rsp_collisions_(0) {
|
||||
assert(delay != 0);
|
||||
|
@ -805,6 +823,13 @@ public:
|
|||
assert(num_outputs <= 64);
|
||||
assert(ispow2(num_inputs));
|
||||
assert(ispow2(num_outputs));
|
||||
if (output_sel != nullptr) {
|
||||
output_sel_ = output_sel;
|
||||
} else {
|
||||
output_sel_ = [this](const Req& req) {
|
||||
return (uint32_t)bit_getw(req.addr, 0, (lg2_outputs_-1));
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void reset() {
|
||||
|
@ -875,7 +900,8 @@ public:
|
|||
auto& req = req_in.front();
|
||||
uint32_t output_idx = 0;
|
||||
if (lg2_outputs_ != 0) {
|
||||
output_idx = (uint32_t)bit_getw(req.addr, addr_start_, addr_start_ + (lg2_outputs_-1));
|
||||
// select output index
|
||||
output_idx = output_sel_(req);
|
||||
// skip if request is not going to current output
|
||||
if (output_idx != o)
|
||||
continue;
|
||||
|
@ -929,7 +955,7 @@ protected:
|
|||
std::vector<uint32_t> rsp_grants_;
|
||||
uint32_t lg2_inputs_;
|
||||
uint32_t lg2_outputs_;
|
||||
uint32_t addr_start_;
|
||||
std::function<uint32_t(const Req& req)> output_sel_;
|
||||
uint64_t req_collisions_;
|
||||
uint64_t rsp_collisions_;
|
||||
};
|
||||
|
|
|
@ -31,21 +31,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE
|
|||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
|
||||
|
||||
# Platform parameters
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
||||
ifeq ($(XLEN),64)
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47
|
||||
else
|
||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31
|
||||
endif
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
|
||||
endif
|
||||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
|
|
|
@ -17,16 +17,16 @@ module vortex_afu_shim #(
|
|||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
|
||||
parameter C_M_AXI_MEM_DATA_WIDTH = (`PLATFORM_MEMORY_DATA_SIZE * 8),
|
||||
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
|
||||
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_NUM_BANKS
|
||||
) (
|
||||
// System signals
|
||||
input wire ap_clk,
|
||||
input wire ap_rst_n,
|
||||
|
||||
// AXI4 master interface
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||
|
||||
// AXI4-Lite slave interface
|
||||
input wire s_axi_ctrl_awvalid,
|
||||
|
@ -61,7 +61,7 @@ module vortex_afu_shim #(
|
|||
.clk (ap_clk),
|
||||
.reset (~ap_rst_n),
|
||||
|
||||
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||
|
||||
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
|
||||
.s_axi_ctrl_awready (s_axi_ctrl_awready),
|
||||
|
|
|
@ -37,8 +37,6 @@
|
|||
|
||||
#include <iostream>
|
||||
|
||||
#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8)
|
||||
|
||||
#ifndef MEM_CLOCK_RATIO
|
||||
#define MEM_CLOCK_RATIO 1
|
||||
#endif
|
||||
|
@ -61,10 +59,10 @@
|
|||
|
||||
#define CPU_GPU_LATENCY 200
|
||||
|
||||
#if PLATFORM_MEMORY_DATA_WIDTH > 64
|
||||
typedef VlWide<(PLATFORM_MEMORY_DATA_WIDTH/32)> Vl_m_data_t;
|
||||
#if PLATFORM_MEMORY_DATA_SIZE > 8
|
||||
typedef VlWide<(PLATFORM_MEMORY_DATA_SIZE/4)> Vl_m_data_t;
|
||||
#else
|
||||
#if PLATFORM_MEMORY_DATA_WIDTH > 32
|
||||
#if PLATFORM_MEMORY_DATA_SIZE > 4
|
||||
typedef QData Vl_m_data_t;
|
||||
#else
|
||||
typedef IData Vl_m_data_t;
|
||||
|
@ -130,7 +128,7 @@ public:
|
|||
Impl()
|
||||
: device_(nullptr)
|
||||
, ram_(nullptr)
|
||||
, dram_sim_(MEM_CLOCK_RATIO)
|
||||
, dram_sim_(PLATFORM_MEMORY_NUM_BANKS, PLATFORM_MEMORY_DATA_SIZE, MEM_CLOCK_RATIO)
|
||||
, stop_(false)
|
||||
#ifdef VCD_OUTPUT
|
||||
, tfp_(nullptr)
|
||||
|
@ -142,7 +140,7 @@ public:
|
|||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
delete mem_alloc_[b];
|
||||
}
|
||||
if (ram_) {
|
||||
|
@ -178,16 +176,16 @@ public:
|
|||
#endif
|
||||
|
||||
// calculate memory bank size
|
||||
mem_bank_size_ = 1ull << PLATFORM_MEMORY_ADDR_WIDTH;
|
||||
mem_bank_size_ = (1ull << PLATFORM_MEMORY_ADDR_WIDTH) / PLATFORM_MEMORY_NUM_BANKS;
|
||||
|
||||
// allocate RAM
|
||||
ram_ = new RAM(0, RAM_PAGE_SIZE);
|
||||
|
||||
// initialize AXI memory interfaces
|
||||
MP_M_AXI_MEM(PLATFORM_MEMORY_BANKS);
|
||||
MP_M_AXI_MEM(PLATFORM_MEMORY_NUM_BANKS);
|
||||
|
||||
// initialize memory allocator
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
mem_alloc_[b] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
|
||||
}
|
||||
|
||||
|
@ -209,13 +207,13 @@ public:
|
|||
}
|
||||
|
||||
int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) {
|
||||
if (bank_id >= PLATFORM_MEMORY_BANKS)
|
||||
if (bank_id >= PLATFORM_MEMORY_NUM_BANKS)
|
||||
return -1;
|
||||
return mem_alloc_[bank_id]->allocate(size, addr);
|
||||
}
|
||||
|
||||
int mem_free(uint32_t bank_id, uint64_t addr) {
|
||||
if (bank_id >= PLATFORM_MEMORY_BANKS)
|
||||
if (bank_id >= PLATFORM_MEMORY_NUM_BANKS)
|
||||
return -1;
|
||||
return mem_alloc_[bank_id]->release(addr);
|
||||
}
|
||||
|
@ -223,7 +221,7 @@ public:
|
|||
int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
if (bank_id >= PLATFORM_MEMORY_BANKS)
|
||||
if (bank_id >= PLATFORM_MEMORY_NUM_BANKS)
|
||||
return -1;
|
||||
uint64_t base_addr = bank_id * mem_bank_size_ + addr;
|
||||
ram_->write(data, base_addr, size);
|
||||
|
@ -238,7 +236,7 @@ public:
|
|||
int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
if (bank_id >= PLATFORM_MEMORY_BANKS)
|
||||
if (bank_id >= PLATFORM_MEMORY_NUM_BANKS)
|
||||
return -1;
|
||||
uint64_t base_addr = bank_id * mem_bank_size_ + addr;
|
||||
ram_->read(data, base_addr, size);
|
||||
|
@ -321,7 +319,7 @@ private:
|
|||
reqs.clear();
|
||||
}
|
||||
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
std::queue<mem_req_t*> empty;
|
||||
std::swap(dram_queues_[b], empty);
|
||||
}
|
||||
|
@ -338,7 +336,7 @@ private:
|
|||
device_->ap_rst_n = 1;
|
||||
|
||||
// this AXI device is always ready to accept new requests
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
*m_axi_mem_[b].arready = 1;
|
||||
*m_axi_mem_[b].awready = 1;
|
||||
*m_axi_mem_[b].wready = 1;
|
||||
|
@ -358,19 +356,18 @@ private:
|
|||
|
||||
dram_sim_.tick();
|
||||
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
if (!dram_queues_[b].empty()) {
|
||||
auto mem_req = dram_queues_[b].front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, b, [](void* arg) {
|
||||
dram_sim_.send_request(mem_req->addr, mem_req->write, [](void* arg) {
|
||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||
if (orig_req->ready) {
|
||||
delete orig_req;
|
||||
} else {
|
||||
orig_req->ready = true;
|
||||
}
|
||||
}, mem_req)) {
|
||||
dram_queues_[b].pop();
|
||||
}
|
||||
}, mem_req);
|
||||
dram_queues_[b].pop();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -411,7 +408,7 @@ private:
|
|||
}
|
||||
|
||||
void axi_mem_bus_reset() {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
// read request address
|
||||
*m_axi_mem_[b].arready = 0;
|
||||
|
||||
|
@ -435,14 +432,14 @@ private:
|
|||
|
||||
void axi_mem_bus_eval(bool clk) {
|
||||
if (!clk) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
m_axi_states_[b].read_rsp_ready = *m_axi_mem_[b].rready;
|
||||
m_axi_states_[b].write_rsp_ready = *m_axi_mem_[b].bready;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||
// handle read responses
|
||||
if (*m_axi_mem_[b].rvalid && m_axi_states_[b].read_rsp_ready) {
|
||||
*m_axi_mem_[b].rvalid = 0;
|
||||
|
@ -607,15 +604,15 @@ private:
|
|||
|
||||
std::mutex mutex_;
|
||||
|
||||
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
|
||||
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_NUM_BANKS];
|
||||
|
||||
m_axi_mem_t m_axi_mem_[PLATFORM_MEMORY_BANKS];
|
||||
m_axi_mem_t m_axi_mem_[PLATFORM_MEMORY_NUM_BANKS];
|
||||
|
||||
MemoryAllocator* mem_alloc_[PLATFORM_MEMORY_BANKS];
|
||||
MemoryAllocator* mem_alloc_[PLATFORM_MEMORY_NUM_BANKS];
|
||||
|
||||
m_axi_state_t m_axi_states_[PLATFORM_MEMORY_BANKS];
|
||||
m_axi_state_t m_axi_states_[PLATFORM_MEMORY_NUM_BANKS];
|
||||
|
||||
std::queue<mem_req_t*> dram_queues_[PLATFORM_MEMORY_BANKS];
|
||||
std::queue<mem_req_t*> dram_queues_[PLATFORM_MEMORY_NUM_BANKS];
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC* tfp_;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue