xrtsim implementation

This commit is contained in:
Blaise Tine 2024-09-19 04:24:20 -07:00
parent f0bff2a4a2
commit a37309c6b0
21 changed files with 940 additions and 406 deletions

View file

@ -92,10 +92,12 @@ regression()
# test global barrier
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tgbar" --cores=2
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tgbar" --cores=2
# test local barrier
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar"
./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tbar"
# test temp driver mode for
./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3
@ -230,15 +232,18 @@ config2()
# test opaesim
./ci/blackbox.sh --driver=opae --app=printf
./ci/blackbox.sh --driver=opae --app=diverge
./ci/blackbox.sh --driver=xrt --app=diverge
# disable DPI
if [ "$XLEN" == "64" ]; then
# need to disable trig on 64-bit due to a bug inside fpnew's sqrt core.
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-xtrig -xbar -xgbar"
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-xtrig -xbar -xgbar"
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-xtrig -xbar -xgbar"
else
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood
fi
# custom program startup address
@ -255,11 +260,9 @@ config2()
# disabling ZICOND extension
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
# test AXI bus
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=mstress
# test 128-bit MEM block
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=xrt --app=mstress
# test XLEN-bit MEM block
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress
@ -299,10 +302,11 @@ debug()
test_csv_trace
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
CONFIGS="-O0 -DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
CONFIGS="-O0 -DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
./ci/blackbox.sh --driver=opae --scope --app=demo --args="-n1"
echo "debugging tests done!"
}
@ -312,7 +316,7 @@ stress()
# test verilator reset values
CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=xrt --app=sgemmx --args="-n128" --l2cache
echo "stress tests done!"
}

View file

@ -17,8 +17,8 @@ module vortex_afu #(
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH
parameter C_M_AXI_MEM_ADDR_WIDTH = `M_AXI_MEM_ADDR_WIDTH,
parameter C_M_AXI_MEM_DATA_WIDTH = `M_AXI_MEM_DATA_WIDTH
) (
// System signals
input wire ap_clk,

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,7 +15,15 @@
`define VORTEX_AFU_VH
`ifndef M_AXI_MEM_NUM_BANKS
`define M_AXI_MEM_NUM_BANKS 1
`define M_AXI_MEM_NUM_BANKS 4
`endif
`ifndef M_AXI_MEM_ADDR_WIDTH
`define M_AXI_MEM_ADDR_WIDTH 30
`endif
`ifndef M_AXI_MEM_DATA_WIDTH
`define M_AXI_MEM_DATA_WIDTH 512
`endif
`ifndef M_AXI_MEM_ID_WIDTH

View file

@ -273,15 +273,15 @@ module VX_cache_bank #(
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
if (WRITE_ENABLE) begin : g_data_sel
if (WRITE_ENABLE) begin : g_data_sel_lo
assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data);
end else begin : g_data_sel_ro
end else begin : g_data_sel_lo_ro
assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0];
`UNUSED_VAR (core_req_data)
`UNUSED_VAR (replay_data)
end
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin : g_data_sel
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin : g_data_sel_hi
assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel
end

View file

@ -69,11 +69,9 @@ module VX_operands import VX_gpu_pkg::*; #(
wire pipe_valid_st2, pipe_ready_st2;
wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2;
reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_m_st2;
wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2;
reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st2, src_data_m_st2;
reg [NUM_SRC_OPDS-1:0] data_fetched_n;
wire [NUM_SRC_OPDS-1:0] data_fetched_st1;
reg [NUM_SRC_OPDS-1:0] data_fetched_st1;
reg has_collision_n;
wire has_collision_st1;
@ -139,15 +137,6 @@ module VX_operands import VX_gpu_pkg::*; #(
wire [NUM_SRC_OPDS-1:0] req_fire_in = req_valid_in & req_ready_in;
always @(*) begin
data_fetched_n = data_fetched_st1;
if (scoreboard_if.ready) begin
data_fetched_n = '0;
end else begin
data_fetched_n = data_fetched_st1 | req_fire_in;
end
end
assign pipe_data = {
scoreboard_if.data.wis,
scoreboard_if.data.tmask,
@ -166,33 +155,37 @@ module VX_operands import VX_gpu_pkg::*; #(
wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2;
VX_pipe_buffer #(
.DATAW (NUM_SRC_OPDS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)),
.RESETW (NUM_SRC_OPDS)
.DATAW (NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH))
) pipe_reg1 (
.clk (clk),
.reset (reset),
.valid_in (scoreboard_if.valid),
.ready_in (pipe_ready_in),
.data_in ({data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}),
.data_out ({data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}),
.data_in ({gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}),
.data_out ({gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}),
.valid_out(pipe_valid_st1),
.ready_out(pipe_ready_st1)
);
assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_m_st2;
always @(posedge clk) begin
if (reset || scoreboard_if.ready) begin
data_fetched_st1 <= 0;
end else begin
data_fetched_st1 <= data_fetched_st1 | req_fire_in;
end
end
wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1;
VX_pipe_buffer #(
.DATAW (NUM_SRC_OPDS * REGS_DATAW + NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH),
.RESETW (NUM_SRC_OPDS * REGS_DATAW)
.DATAW (NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH)
) pipe_reg2 (
.clk (clk),
.reset (reset),
.valid_in (pipe_valid2_st1),
.ready_in (pipe_ready_st1),
.data_in ({src_data_st1, gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}),
.data_out ({src_data_st2, gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}),
.data_in ({gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}),
.data_out ({gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}),
.valid_out(pipe_valid_st2),
.ready_out(pipe_ready_st2)
);
@ -206,6 +199,14 @@ module VX_operands import VX_gpu_pkg::*; #(
end
end
always @(posedge clk) begin
if (reset || pipe_fire_st2) begin
src_data_st2 <= 0;
end else begin
src_data_st2 <= src_data_m_st2;
end
end
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),

View file

@ -94,49 +94,36 @@ module VX_axi_adapter #(
localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS);
wire [BANK_ADDRW-1:0] req_bank_sel;
if (NUM_BANKS > 1) begin : g_req_bank_sel
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
end else begin : g_req_bank_sel_0
assign req_bank_sel = '0;
end
wire mem_req_fire = mem_req_valid && mem_req_ready;
reg [NUM_BANKS-1:0] m_axi_aw_ack;
reg [NUM_BANKS-1:0] m_axi_w_ack;
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_m_axi_w
wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i];
wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i];
always @(posedge clk) begin
if (reset) begin
m_axi_aw_ack[i] <= 0;
m_axi_w_ack[i] <= 0;
end else begin
if (mem_req_fire && (req_bank_sel == i)) begin
m_axi_aw_ack[i] <= 0;
m_axi_w_ack[i] <= 0;
end else begin
if (m_axi_aw_fire)
m_axi_aw_ack[i] <= 1;
if (m_axi_w_fire)
m_axi_w_ack[i] <= 1;
end
end
end
end
wire axi_write_ready [NUM_BANKS];
wire [NUM_BANKS-1:0] axi_aw_ready, axi_write_ready;
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready
assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i])
&& (m_axi_wready[i] || m_axi_w_ack[i]);
assign axi_aw_ready[i] = m_axi_awready[i] || m_axi_aw_ack[i];
assign axi_write_ready[i] = m_axi_wready[i] && axi_aw_ready[i];
end
// request ack
assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel];
reg [NUM_BANKS-1:0] m_axi_aw_ack;
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_m_axi_w
always @(posedge clk) begin
if (reset) begin
m_axi_aw_ack[i] <= 0;
end else begin
if (m_axi_wvalid[i] && m_axi_wready[i]) begin
m_axi_aw_ack[i] <= 0;
end else if (m_axi_awvalid[i] && m_axi_awready[i]) begin
m_axi_aw_ack[i] <= 1;
end
end
end
end
// AXI write request address channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i];
@ -154,7 +141,7 @@ module VX_axi_adapter #(
// AXI write request data channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data
assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i];
assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && axi_aw_ready[i];
assign m_axi_wdata[i] = mem_req_data;
assign m_axi_wstrb[i] = mem_req_byteen;
assign m_axi_wlast[i] = 1'b1;
@ -190,14 +177,13 @@ module VX_axi_adapter #(
wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in;
wire [NUM_BANKS-1:0] rsp_arb_ready_in;
`UNUSED_VAR (m_axi_rlast)
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_rsp
assign rsp_arb_valid_in[i] = m_axi_rvalid[i];
assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]};
assign m_axi_rready[i] = rsp_arb_ready_in[i];
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time))
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time))
`UNUSED_VAR (m_axi_rlast[i])
end
VX_stream_arb #(

View file

@ -13,11 +13,12 @@
#pragma once
#include <assert.h>
#include <vortex.h>
#include <VX_config.h>
#include <VX_types.h>
#include <callbacks.h>
#include <malloc.h>
#include <mem_alloc.h>
#include <cstdint>
#include <unordered_map>

View file

@ -10,7 +10,7 @@ SYN_DIR := $(HW_DIR)/syn/altera/opae
SRC_DIR := $(VORTEX_HOME)/runtime/opae
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(DESTDIR)
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(DESTDIR) -I$(SIM_DIR)/common
CXXFLAGS += -DXLEN_$(XLEN)
# Position independent code

View file

@ -18,15 +18,15 @@
#endif
// XRT includes
#ifndef XRTSIM
#ifdef XRTSIM
#include <xrt.h>
#else
#include "experimental/xrt_bo.h"
#include "experimental/xrt_device.h"
#include "experimental/xrt_error.h"
#include "experimental/xrt_ip.h"
#include "experimental/xrt_kernel.h"
#include "experimental/xrt_xclbin.h"
#else
#include <fpga.h>
#endif
#include <limits>
@ -66,7 +66,7 @@ struct platform_info_t {
};
static const platform_info_t g_platforms[] = {
{"vortex_xrtsim", 4, 16, 0x0}, // 16 x 64 KB = 1 MB
{"vortex_xrtsim", 0, 32, 0x0}, // 16 x 256 MB = 4 GB
{"xilinx_u200", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4
{"xilinx_u250", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4
{"xilinx_u50", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2
@ -258,7 +258,7 @@ public:
return -1;
});
#else
xrtKernelHandle xrtKernel = nullptr;
xrtKernelHandle xrtKernel = xrtDevice;
#endif
// get device name
@ -538,7 +538,6 @@ public:
return err;
});
#endif
DBGPRINT("*** write_register: addr=0x%x, value=0x%x\n", addr, value);
return 0;
}
@ -551,7 +550,6 @@ public:
return err;
});
#endif
DBGPRINT("*** read_register: addr=0x%x, value=0x%x\n", addr, *value);
return 0;
}

327
sim/common/mp_macros.h Normal file
View file

@ -0,0 +1,327 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
// macro primitives
#define MP_COMMA ,
#define MP_REM(...) __VA_ARGS__
#define MP_EAT(...)
#define MP_STRINGIZE_(x) #x
#define MP_STRINGIZE(x) MP_STRINGIZE_(x)
#define MP_CONCAT_(x, ...) x ## __VA_ARGS__
#define MP_CONCAT(x, ...) MP_CONCAT_(x, __VA_ARGS__)
#define MP_COUNTOF(arr) (sizeof(arr) / sizeof(arr[0]))
// conditional macro
#define MP_IIF_0(x, y) y
#define MP_IIF_1(x, y) x
#define MP_IIF(c) MP_CONCAT(MP_IIF_, c)
#define MP_PAIR_FIRST(a, b) a
#define MP_PAIR_SECOND(a, b) b
// pair macros
#define MP_PAIR(x) MP_REM x
#define MP_PAIR_HEAD_(x, ...) MP_PAIR(x)
#define MP_PAIR_PROBE_(...) (__VA_ARGS__),
#define MP_PAIR_L_(...) MP_PAIR_HEAD_(__VA_ARGS__)
#define MP_PAIR_L(x) MP_PAIR_L_(MP_PAIR_PROBE_ x,)
#define MP_PAIR_R(x) MP_EAT x
// separator macros
#define MP_SEP_COMMA() ,
#define MP_SEP_SEMICOLON() ;
#define MP_SEP_PLUS() +
#define MP_SEP_AND() &
#define MP_SEP_OR() |
#define MP_SEP_COLON() :
#define MP_SEP_SPACE() /**/
#define MP_SEP_LESS() <
#define MP_SEP_GREATER() >
#define MP_SEP_ANDL() &&
#define MP_SEP_ORL() ||
// MAKE_UNIQUE macro
#define MP_MAKE_UNIQUE(x) MP_CONCAT(x, __COUNTER__)
// increment macro
#define MP_INC(x) MP_INC_ ## x
#define MP_INC_0 1
#define MP_INC_1 2
#define MP_INC_2 3
#define MP_INC_3 4
#define MP_INC_4 5
#define MP_INC_5 6
#define MP_INC_6 7
#define MP_INC_7 8
#define MP_INC_8 9
#define MP_INC_9 10
#define MP_INC_10 11
#define MP_INC_11 12
#define MP_INC_12 13
#define MP_INC_13 14
#define MP_INC_14 15
#define MP_INC_15 16
#define MP_INC_16 17
#define MP_INC_17 18
#define MP_INC_18 19
#define MP_INC_19 20
#define MP_INC_20 21
#define MP_INC_21 22
#define MP_INC_22 23
#define MP_INC_23 24
#define MP_INC_24 25
#define MP_INC_25 26
#define MP_INC_26 27
#define MP_INC_27 28
#define MP_INC_28 29
#define MP_INC_29 30
#define MP_INC_30 31
#define MP_INC_31 32
#define MP_INC_32 33
#define MP_INC_33 34
#define MP_INC_34 35
#define MP_INC_35 36
#define MP_INC_36 37
#define MP_INC_37 38
#define MP_INC_38 39
#define MP_INC_39 40
#define MP_INC_40 41
#define MP_INC_41 42
#define MP_INC_42 43
#define MP_INC_43 44
#define MP_INC_44 45
#define MP_INC_45 46
#define MP_INC_46 47
#define MP_INC_47 48
#define MP_INC_48 49
#define MP_INC_49 50
#define MP_INC_50 51
#define MP_INC_51 52
#define MP_INC_52 53
#define MP_INC_53 54
#define MP_INC_54 55
#define MP_INC_55 56
#define MP_INC_56 57
#define MP_INC_57 58
#define MP_INC_58 59
#define MP_INC_59 60
#define MP_INC_60 61
#define MP_INC_61 62
#define MP_INC_62 63
#define MP_INC_63 64
// NARG macro
#define MP_NARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10,_11,_12,_13,_14,_15,_16, \
_17,_18,_19,_20,_21,_22,_23,_24,_25,_26,_27,_28,_29,_30,_31,_32, \
_33,_34,_35,_36,_37,_38,_39,_40,_41,_42,_43,_44,_45,_46,_47,_48, \
_49,_50,_51,_52,_53,_54,_55,_56,_57,_58,_59,_60,_61,_62,_63, N, ...) N
#define MP_NARG_R() 63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48, \
47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32, \
31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16, \
15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
#define MP_NARG_(...) MP_NARG_N(__VA_ARGS__)
#define MP_NARG(...) MP_NARG_(__VA_ARGS__, MP_NARG_R())
// FOR_EACH macro
#define MP_FOR_EACH_1(idx, func, arg, sep, ...) func(arg, idx, __VA_ARGS__)
#define MP_FOR_EACH_2(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_1(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_3(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_2(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_4(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_3(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_5(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_4(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_6(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_5(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_7(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_6(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_8(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_7(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_9(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_8(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_10(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_9(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_11(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_10(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_12(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_11(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_13(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_12(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_14(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_13(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_15(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_14(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_16(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_15(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_17(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_16(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_18(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_17(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_19(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_18(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_20(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_19(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_21(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_20(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_22(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_21(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_23(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_22(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_24(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_23(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_25(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_24(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_26(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_25(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_27(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_26(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_28(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_27(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_29(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_28(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_30(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_29(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_31(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_30(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_32(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_31(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_33(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_32(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_34(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_33(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_35(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_34(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_36(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_35(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_37(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_36(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_38(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_37(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_39(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_38(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_40(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_39(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_41(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_40(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_42(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_41(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_43(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_42(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_44(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_43(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_45(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_44(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_46(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_45(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_47(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_46(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_48(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_47(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_49(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_48(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_50(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_49(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_51(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_50(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_52(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_51(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_53(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_52(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_54(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_53(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_55(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_54(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_56(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_55(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_57(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_56(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_58(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_57(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_59(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_58(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_60(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_59(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_61(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_60(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_62(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_61(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_63(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_62(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_64(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_63(MP_INC(idx), func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH_(N, func, arg, sep, ...) MP_CONCAT(MP_FOR_EACH_, N)(0, func, arg, sep, __VA_ARGS__)
#define MP_FOR_EACH(func, arg, sep, ...) MP_FOR_EACH_(MP_NARG(__VA_ARGS__), func, arg, sep, __VA_ARGS__)
// REVERSE_FOR_EACH macro
#define MP_REVERSE_FOR_EACH_1(func, arg, sep, ...) func(arg, 0, __VA_ARGS__)
#define MP_REVERSE_FOR_EACH_2(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_1(func, arg, sep, __VA_ARGS__) sep() func(arg, 1, x)
#define MP_REVERSE_FOR_EACH_3(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_2(func, arg, sep, __VA_ARGS__) sep() func(arg, 2, x)
#define MP_REVERSE_FOR_EACH_4(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_3(func, arg, sep, __VA_ARGS__) sep() func(arg, 3, x)
#define MP_REVERSE_FOR_EACH_5(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_4(func, arg, sep, __VA_ARGS__) sep() func(arg, 4, x)
#define MP_REVERSE_FOR_EACH_6(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_5(func, arg, sep, __VA_ARGS__) sep() func(arg, 5, x)
#define MP_REVERSE_FOR_EACH_7(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_6(func, arg, sep, __VA_ARGS__) sep() func(arg, 6, x)
#define MP_REVERSE_FOR_EACH_8(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_7(func, arg, sep, __VA_ARGS__) sep() func(arg, 7, x)
#define MP_REVERSE_FOR_EACH_9(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_8(func, arg, sep, __VA_ARGS__) sep() func(arg, 8, x)
#define MP_REVERSE_FOR_EACH_10(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_9(func, arg, sep, __VA_ARGS__) sep() func(arg, 9, x)
#define MP_REVERSE_FOR_EACH_11(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_10(func, arg, sep, __VA_ARGS__) sep() func(arg, 10, x)
#define MP_REVERSE_FOR_EACH_12(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_11(func, arg, sep, __VA_ARGS__) sep() func(arg, 11, x)
#define MP_REVERSE_FOR_EACH_13(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_12(func, arg, sep, __VA_ARGS__) sep() func(arg, 12, x)
#define MP_REVERSE_FOR_EACH_14(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_13(func, arg, sep, __VA_ARGS__) sep() func(arg, 13, x)
#define MP_REVERSE_FOR_EACH_15(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_14(func, arg, sep, __VA_ARGS__) sep() func(arg, 14, x)
#define MP_REVERSE_FOR_EACH_16(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_15(func, arg, sep, __VA_ARGS__) sep() func(arg, 15, x)
#define MP_REVERSE_FOR_EACH_17(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_16(func, arg, sep, __VA_ARGS__) sep() func(arg, 16, x)
#define MP_REVERSE_FOR_EACH_18(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_17(func, arg, sep, __VA_ARGS__) sep() func(arg, 17, x)
#define MP_REVERSE_FOR_EACH_19(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_18(func, arg, sep, __VA_ARGS__) sep() func(arg, 18, x)
#define MP_REVERSE_FOR_EACH_20(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_19(func, arg, sep, __VA_ARGS__) sep() func(arg, 19, x)
#define MP_REVERSE_FOR_EACH_21(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_20(func, arg, sep, __VA_ARGS__) sep() func(arg, 20, x)
#define MP_REVERSE_FOR_EACH_22(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_21(func, arg, sep, __VA_ARGS__) sep() func(arg, 21, x)
#define MP_REVERSE_FOR_EACH_23(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_22(func, arg, sep, __VA_ARGS__) sep() func(arg, 22, x)
#define MP_REVERSE_FOR_EACH_24(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_23(func, arg, sep, __VA_ARGS__) sep() func(arg, 23, x)
#define MP_REVERSE_FOR_EACH_25(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_24(func, arg, sep, __VA_ARGS__) sep() func(arg, 24, x)
#define MP_REVERSE_FOR_EACH_26(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_25(func, arg, sep, __VA_ARGS__) sep() func(arg, 25, x)
#define MP_REVERSE_FOR_EACH_27(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_26(func, arg, sep, __VA_ARGS__) sep() func(arg, 26, x)
#define MP_REVERSE_FOR_EACH_28(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_27(func, arg, sep, __VA_ARGS__) sep() func(arg, 27, x)
#define MP_REVERSE_FOR_EACH_29(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_28(func, arg, sep, __VA_ARGS__) sep() func(arg, 28, x)
#define MP_REVERSE_FOR_EACH_30(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_29(func, arg, sep, __VA_ARGS__) sep() func(arg, 29, x)
#define MP_REVERSE_FOR_EACH_31(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_30(func, arg, sep, __VA_ARGS__) sep() func(arg, 30, x)
#define MP_REVERSE_FOR_EACH_32(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_31(func, arg, sep, __VA_ARGS__) sep() func(arg, 31, x)
#define MP_REVERSE_FOR_EACH_33(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_32(func, arg, sep, __VA_ARGS__) sep() func(arg, 32, x)
#define MP_REVERSE_FOR_EACH_34(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_33(func, arg, sep, __VA_ARGS__) sep() func(arg, 33, x)
#define MP_REVERSE_FOR_EACH_35(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_34(func, arg, sep, __VA_ARGS__) sep() func(arg, 34, x)
#define MP_REVERSE_FOR_EACH_36(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_35(func, arg, sep, __VA_ARGS__) sep() func(arg, 35, x)
#define MP_REVERSE_FOR_EACH_37(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_36(func, arg, sep, __VA_ARGS__) sep() func(arg, 36, x)
#define MP_REVERSE_FOR_EACH_38(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_37(func, arg, sep, __VA_ARGS__) sep() func(arg, 37, x)
#define MP_REVERSE_FOR_EACH_39(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_38(func, arg, sep, __VA_ARGS__) sep() func(arg, 38, x)
#define MP_REVERSE_FOR_EACH_40(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_39(func, arg, sep, __VA_ARGS__) sep() func(arg, 39, x)
#define MP_REVERSE_FOR_EACH_41(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_40(func, arg, sep, __VA_ARGS__) sep() func(arg, 40, x)
#define MP_REVERSE_FOR_EACH_42(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_41(func, arg, sep, __VA_ARGS__) sep() func(arg, 41, x)
#define MP_REVERSE_FOR_EACH_43(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_42(func, arg, sep, __VA_ARGS__) sep() func(arg, 42, x)
#define MP_REVERSE_FOR_EACH_44(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_43(func, arg, sep, __VA_ARGS__) sep() func(arg, 43, x)
#define MP_REVERSE_FOR_EACH_45(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_44(func, arg, sep, __VA_ARGS__) sep() func(arg, 44, x)
#define MP_REVERSE_FOR_EACH_46(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_45(func, arg, sep, __VA_ARGS__) sep() func(arg, 45, x)
#define MP_REVERSE_FOR_EACH_47(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_46(func, arg, sep, __VA_ARGS__) sep() func(arg, 46, x)
#define MP_REVERSE_FOR_EACH_48(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_47(func, arg, sep, __VA_ARGS__) sep() func(arg, 47, x)
#define MP_REVERSE_FOR_EACH_49(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_48(func, arg, sep, __VA_ARGS__) sep() func(arg, 48, x)
#define MP_REVERSE_FOR_EACH_50(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_49(func, arg, sep, __VA_ARGS__) sep() func(arg, 49, x)
#define MP_REVERSE_FOR_EACH_51(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_50(func, arg, sep, __VA_ARGS__) sep() func(arg, 50, x)
#define MP_REVERSE_FOR_EACH_52(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_51(func, arg, sep, __VA_ARGS__) sep() func(arg, 51, x)
#define MP_REVERSE_FOR_EACH_53(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_52(func, arg, sep, __VA_ARGS__) sep() func(arg, 52, x)
#define MP_REVERSE_FOR_EACH_54(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_53(func, arg, sep, __VA_ARGS__) sep() func(arg, 53, x)
#define MP_REVERSE_FOR_EACH_55(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_54(func, arg, sep, __VA_ARGS__) sep() func(arg, 54, x)
#define MP_REVERSE_FOR_EACH_56(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_55(func, arg, sep, __VA_ARGS__) sep() func(arg, 55, x)
#define MP_REVERSE_FOR_EACH_57(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_56(func, arg, sep, __VA_ARGS__) sep() func(arg, 56, x)
#define MP_REVERSE_FOR_EACH_58(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_57(func, arg, sep, __VA_ARGS__) sep() func(arg, 57, x)
#define MP_REVERSE_FOR_EACH_59(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_58(func, arg, sep, __VA_ARGS__) sep() func(arg, 58, x)
#define MP_REVERSE_FOR_EACH_60(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_59(func, arg, sep, __VA_ARGS__) sep() func(arg, 59, x)
#define MP_REVERSE_FOR_EACH_61(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_60(func, arg, sep, __VA_ARGS__) sep() func(arg, 60, x)
#define MP_REVERSE_FOR_EACH_62(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_61(func, arg, sep, __VA_ARGS__) sep() func(arg, 61, x)
#define MP_REVERSE_FOR_EACH_63(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_62(func, arg, sep, __VA_ARGS__) sep() func(arg, 62, x)
#define MP_REVERSE_FOR_EACH_64(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_63(func, arg, sep, __VA_ARGS__) sep() func(arg, 63, x)
#define MP_REVERSE_FOR_EACH_(N, func, arg, sep, ...) MP_CONCAT(MP_REVERSE_FOR_EACH_, N)(func, arg, sep, __VA_ARGS__)
#define MP_REVERSE_FOR_EACH(func, arg, sep, ...) MP_REVERSE_FOR_EACH_(MP_NARG(__VA_ARGS__), func, arg, sep, __VA_ARGS__)
#define MP_FIRST_ARG_(N, ...) N
#define MP_FIRST_ARG(...) MP_FIRST_ARG_(__VA_ARGS__, ignore)
// MP_REPEAT macro
#define MP_REPEAT_0(func, sep)
#define MP_REPEAT_1(func, sep) func(0)
#define MP_REPEAT_2(func, sep) MP_REPEAT_1(func, sep) sep func(1)
#define MP_REPEAT_3(func, sep) MP_REPEAT_2(func, sep) sep func(2)
#define MP_REPEAT_4(func, sep) MP_REPEAT_3(func, sep) sep func(3)
#define MP_REPEAT_5(func, sep) MP_REPEAT_4(func, sep) sep func(4)
#define MP_REPEAT_6(func, sep) MP_REPEAT_5(func, sep) sep func(5)
#define MP_REPEAT_7(func, sep) MP_REPEAT_6(func, sep) sep func(6)
#define MP_REPEAT_8(func, sep) MP_REPEAT_7(func, sep) sep func(7)
#define MP_REPEAT_9(func, sep) MP_REPEAT_8(func, sep) sep func(8)
#define MP_REPEAT_10(func, sep) MP_REPEAT_9(func, sep) sep func(9)
#define MP_REPEAT_11(func, sep) MP_REPEAT_10(func, sep) sep func(10)
#define MP_REPEAT_12(func, sep) MP_REPEAT_11(func, sep) sep func(11)
#define MP_REPEAT_13(func, sep) MP_REPEAT_12(func, sep) sep func(12)
#define MP_REPEAT_14(func, sep) MP_REPEAT_13(func, sep) sep func(13)
#define MP_REPEAT_15(func, sep) MP_REPEAT_14(func, sep) sep func(14)
#define MP_REPEAT_16(func, sep) MP_REPEAT_15(func, sep) sep func(15)
#define MP_REPEAT_17(func, sep) MP_REPEAT_16(func, sep) sep func(16)
#define MP_REPEAT_18(func, sep) MP_REPEAT_17(func, sep) sep func(17)
#define MP_REPEAT_19(func, sep) MP_REPEAT_18(func, sep) sep func(18)
#define MP_REPEAT_20(func, sep) MP_REPEAT_19(func, sep) sep func(19)
#define MP_REPEAT_21(func, sep) MP_REPEAT_20(func, sep) sep func(20)
#define MP_REPEAT_22(func, sep) MP_REPEAT_21(func, sep) sep func(21)
#define MP_REPEAT_23(func, sep) MP_REPEAT_22(func, sep) sep func(22)
#define MP_REPEAT_24(func, sep) MP_REPEAT_23(func, sep) sep func(23)
#define MP_REPEAT_25(func, sep) MP_REPEAT_24(func, sep) sep func(24)
#define MP_REPEAT_26(func, sep) MP_REPEAT_25(func, sep) sep func(25)
#define MP_REPEAT_27(func, sep) MP_REPEAT_26(func, sep) sep func(26)
#define MP_REPEAT_28(func, sep) MP_REPEAT_27(func, sep) sep func(27)
#define MP_REPEAT_29(func, sep) MP_REPEAT_28(func, sep) sep func(28)
#define MP_REPEAT_30(func, sep) MP_REPEAT_29(func, sep) sep func(29)
#define MP_REPEAT_31(func, sep) MP_REPEAT_30(func, sep) sep func(30)
#define MP_REPEAT_32(func, sep) MP_REPEAT_31(func, sep) sep func(31)
#define MP_REPEAT(N, func, sep) MP_CONCAT(MP_REPEAT_, N)(func, sep)

View file

@ -93,6 +93,8 @@ extern fpga_result fpgaClose(fpga_handle handle) {
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
sim->shutdown();
delete sim;
return FPGA_OK;

View file

@ -110,6 +110,9 @@ public:
for (auto& buffer : host_buffers_) {
aligned_free(buffer.second.data);
}
if (ram_) {
delete ram_;
}
#ifdef VCD_OUTPUT
if (tfp_) {
tfp_->close();
@ -119,9 +122,6 @@ public:
if (device_) {
delete device_;
}
if (ram_) {
delete ram_;
}
}
int init() {
@ -142,11 +142,15 @@ public:
tfp_->open("trace.vcd");
#endif
// allocate RAM
ram_ = new RAM(0, RAM_PAGE_SIZE);
// reset the device
this->reset();
// Turn on assertion after reset
Verilated::assertOn(true);
// launch execution thread
future_ = std::async(std::launch::async, [&]{
while (!stop_) {
@ -158,6 +162,13 @@ public:
return 0;
}
void shutdown() {
stop_ = true;
if (future_.valid()) {
future_.wait();
}
}
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
auto alloc = aligned_malloc(len, CACHE_BLOCK_SIZE);
if (alloc == NULL)
@ -256,9 +267,6 @@ private:
device_->clk = 1;
this->eval();
}
// Turn on assertion after reset
Verilated::assertOn(true);
}
void tick() {
@ -279,13 +287,13 @@ private:
}
}
dram_sim_.tick();
device_->clk = 0;
this->eval();
device_->clk = 1;
this->eval();
dram_sim_.tick();
#ifndef NDEBUG
fflush(stdout);
#endif
@ -399,7 +407,6 @@ private:
void avs_bus_reset() {
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) {
pending_mem_reqs_[b].clear();
device_->avs_readdatavalid[b] = 0;
device_->avs_waitrequest[b] = 0;
}
@ -422,7 +429,7 @@ private:
// process memory requests
assert(!device_->avs_read[b] || !device_->avs_write[b]);
unsigned byte_addr = (device_->avs_address[b] * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE;
uint64_t byte_addr = (uint64_t(device_->avs_address[b]) * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE;
if (device_->avs_write[b]) {
uint64_t byteen = device_->avs_byteenable[b];
uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data());
@ -432,7 +439,7 @@ private:
}
}
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, 0x%x, data=0x", timestamp, b, byte_addr);
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, data=0x", timestamp, b, byte_addr);
for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) {
printf("%02x", data[(PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE-1)-i]);
}
@ -456,7 +463,7 @@ private:
mem_req->ready = false;
pending_mem_reqs_[b].emplace_back(mem_req);
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, mem_req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
for (auto& req : pending_mem_reqs_[b]) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE);
@ -537,6 +544,10 @@ int opae_sim::init() {
return impl_->init();
}
void opae_sim::shutdown() {
impl_->shutdown();
}
int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
return impl_->prepare_buffer(len, buf_addr, wsid, flags);
}

View file

@ -25,6 +25,8 @@ public:
int init();
void shutdown();
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
void release_buffer(uint64_t wsid);

View file

@ -39,13 +39,6 @@ SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
SRCS += $(SRC_DIR)/processor.cpp
ifdef AXI_BUS
TOP = Vortex_axi
CXXFLAGS += -DAXI_BUS
else
TOP = Vortex
endif
VL_FLAGS = --exe
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
@ -56,7 +49,7 @@ VL_FLAGS += -DXLEN_$(XLEN)
VL_FLAGS += $(CONFIGS)
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(RTL_PKGS)
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
VL_FLAGS += --cc Vortex --top-module Vortex
CXXFLAGS += $(CONFIGS)

View file

@ -13,13 +13,7 @@
#include "processor.h"
#ifdef AXI_BUS
#include "VVortex_axi.h"
typedef VVortex_axi Device;
#else
#include "VVortex.h"
typedef VVortex Device;
#endif
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
@ -106,7 +100,7 @@ public:
Verilated::assertOn(false);
// create RTL module instance
device_ = new Device();
device_ = new VVortex();
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
@ -116,7 +110,7 @@ public:
#endif
ram_ = nullptr;
// reset the device
this->reset();
@ -154,9 +148,11 @@ public:
std::cout << std::dec << timestamp << ": [sim] run()" << std::endl;
#endif
// reset device
this->reset();
// start execution
running_ = true;
device_->reset = 0;
// wait on device to go busy
while (!device_->busy) {
@ -168,9 +164,6 @@ public:
this->tick();
}
// reset device
this->reset();
this->cout_flush();
}
@ -178,14 +171,16 @@ public:
device_->dcr_wr_valid = 1;
device_->dcr_wr_addr = addr;
device_->dcr_wr_data = value;
while (device_->dcr_wr_valid) {
this->tick();
}
this->tick();
device_->dcr_wr_valid = 0;
}
private:
void reset() {
this->mem_bus_reset();
this->dcr_bus_reset();
running_ = false;
print_bufs_.clear();
@ -198,11 +193,6 @@ private:
}
mem_rd_rsp_active_ = false;
mem_wr_rsp_active_ = false;
this->mem_bus_reset();
this->dcr_bus_reset();
device_->reset = 1;
@ -212,23 +202,19 @@ private:
device_->clk = 1;
this->eval();
}
device_->reset = 0;
for (int i = 0; i < RESET_DELAY; ++i) {
device_->clk = 0;
this->eval();
device_->clk = 1;
this->eval();
}
}
void tick() {
device_->clk = 0;
this->eval();
this->mem_bus_eval(0);
this->dcr_bus_eval(0);
device_->clk = 1;
this->eval();
this->mem_bus_eval(1);
this->dcr_bus_eval(1);
dram_sim_.tick();
this->mem_bus_eval();
if (!dram_queue_.empty()) {
auto mem_req = dram_queue_.front();
@ -244,6 +230,13 @@ private:
}
}
dram_sim_.tick();
device_->clk = 0;
this->eval();
device_->clk = 1;
this->eval();
#ifndef NDEBUG
fflush(stdout);
#endif
@ -261,207 +254,39 @@ private:
++timestamp;
}
#ifdef AXI_BUS
void mem_bus_reset() {
device_->m_axi_wready[0] = 0;
device_->m_axi_awready[0] = 0;
device_->m_axi_arready[0] = 0;
device_->m_axi_rvalid[0] = 0;
device_->m_axi_bvalid[0] = 0;
}
void mem_bus_eval(bool clk) {
if (!clk) {
mem_rd_rsp_ready_ = device_->m_axi_rready[0];
mem_wr_rsp_ready_ = device_->m_axi_bready[0];
return;
}
if (ram_ == nullptr) {
device_->m_axi_wready[0] = 0;
device_->m_axi_awready[0] = 0;
device_->m_axi_arready[0] = 0;
return;
}
// process memory read responses
if (mem_rd_rsp_active_
&& device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) {
mem_rd_rsp_active_ = false;
}
if (!mem_rd_rsp_active_) {
if (!pending_mem_reqs_.empty()
&& (*pending_mem_reqs_.begin())->ready
&& !(*pending_mem_reqs_.begin())->write) {
auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_rsp = *mem_rsp_it;
/*
printf("%0ld: [sim] MEM Rd Rsp: addr=0x%0lx, data=0x", timestamp, mem_rsp->addr);
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%02x", mem_rsp->block[i]);
}
printf("\n");
*/
device_->m_axi_rvalid[0] = 1;
device_->m_axi_rid[0] = mem_rsp->tag;
device_->m_axi_rresp[0] = 0;
device_->m_axi_rlast[0] = 1;
memcpy(device_->m_axi_rdata[0].data(), mem_rsp->block.data(), MEM_BLOCK_SIZE);
pending_mem_reqs_.erase(mem_rsp_it);
mem_rd_rsp_active_ = true;
delete mem_rsp;
} else {
device_->m_axi_rvalid[0] = 0;
}
}
// process memory write responses
if (mem_wr_rsp_active_
&& device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) {
mem_wr_rsp_active_ = false;
}
if (!mem_wr_rsp_active_) {
if (!pending_mem_reqs_.empty()
&& (*pending_mem_reqs_.begin())->ready
&& (*pending_mem_reqs_.begin())->write) {
auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_rsp = *mem_rsp_it;
/*
printf("%0ld: [sim] MEM Wr Rsp: addr=0x%0lx\n", timestamp, mem_rsp->addr);
*/
device_->m_axi_bvalid[0] = 1;
device_->m_axi_bid[0] = mem_rsp->tag;
device_->m_axi_bresp[0] = 0;
pending_mem_reqs_.erase(mem_rsp_it);
mem_wr_rsp_active_ = true;
delete mem_rsp;
} else {
device_->m_axi_bvalid[0] = 0;
}
}
// select the memory bank
uint32_t req_addr = device_->m_axi_wvalid[0] ? device_->m_axi_awaddr[0] : device_->m_axi_araddr[0];
// process memory requests
if ((device_->m_axi_wvalid[0] || device_->m_axi_arvalid[0]) && running_) {
if (device_->m_axi_wvalid[0]) {
auto byteen = device_->m_axi_wstrb[0];
auto base_addr = device_->m_axi_awaddr[0];
auto data = (uint8_t*)device_->m_axi_wdata[0].data();
if (base_addr >= uint64_t(IO_COUT_ADDR)
&& base_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
// process console output
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
}
}
} else {
// process writes
/*
printf("%0ld: [sim] MEM Wr: addr=0x%0lx, byteen=0x", timestamp, base_addr);
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
}
printf(", data=0x");
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%02x", data[i]);
}
printf("\n");
*/
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
}
}
auto mem_req = new mem_req_t();
mem_req->tag = device_->m_axi_awid[0];
mem_req->addr = device_->m_axi_awaddr[0];
mem_req->write = true;
mem_req->ready = false;
pending_mem_reqs_.emplace_back(mem_req);
// send dram request
dram_queue_.push(mem_req);
}
} else {
// process reads
auto mem_req = new mem_req_t();
mem_req->tag = device_->m_axi_arid[0];
mem_req->addr = device_->m_axi_araddr[0];
ram_->read(mem_req->block.data(), device_->m_axi_araddr[0], MEM_BLOCK_SIZE);
mem_req->write = false;
mem_req->ready = false;
pending_mem_reqs_.emplace_back(mem_req);
// send dram request
dram_queue_.push(mem_req);
}
}
device_->m_axi_wready[0] = running_;
device_->m_axi_awready[0] = running_;
device_->m_axi_arready[0] = running_;
}
#else
void mem_bus_reset() {
device_->mem_req_ready = 0;
device_->mem_rsp_valid = 0;
}
void mem_bus_eval(bool clk) {
if (!clk) {
mem_rd_rsp_ready_ = device_->mem_rsp_ready;
return;
}
if (ram_ == nullptr) {
device_->mem_req_ready = 0;
return;
}
void mem_bus_eval() {
// process memory read responses
if (mem_rd_rsp_active_
&& device_->mem_rsp_valid && mem_rd_rsp_ready_) {
if (mem_rd_rsp_active_ && device_->mem_rsp_ready) {
device_->mem_rsp_valid = 0;
mem_rd_rsp_active_ = false;
}
if (!mem_rd_rsp_active_) {
if (!pending_mem_reqs_.empty()
&& (*pending_mem_reqs_.begin())->ready) {
device_->mem_rsp_valid = 1;
auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_rsp = *mem_rsp_it;
/*
printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
/*printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%02x", mem_rsp->block[i]);
printf("%02x", mem_rsp->data[i]);
}
printf("\n");
*/
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data), mem_rsp->block.data(), MEM_BLOCK_SIZE);
device_->mem_rsp_valid = 1;
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data), mem_rsp->data.data(), MEM_BLOCK_SIZE);
device_->mem_rsp_tag = mem_rsp->tag;
pending_mem_reqs_.erase(mem_rsp_it);
mem_rd_rsp_active_ = true;
delete mem_rsp;
} else {
device_->mem_rsp_valid = 0;
}
}
// process memory requests
if (device_->mem_req_valid && running_) {
if (device_->mem_req_valid && device_->mem_req_ready) {
uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
if (device_->mem_req_rw) {
auto byteen = device_->mem_req_byteen;
@ -516,7 +341,7 @@ private:
mem_req->addr = byte_addr;
mem_req->write = false;
mem_req->ready = false;
ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE);
ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE);
pending_mem_reqs_.emplace_back(mem_req);
//printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
@ -529,21 +354,10 @@ private:
device_->mem_req_ready = running_;
}
#endif
void dcr_bus_reset() {
device_->dcr_wr_valid = 0;
}
void dcr_bus_eval(bool clk) {
if (!clk) {
return;
}
if (device_->dcr_wr_valid) {
device_->dcr_wr_valid = 0;
}
}
void wait(uint32_t cycles) {
for (int i = 0; i < cycles; ++i) {
this->tick();
@ -553,8 +367,8 @@ private:
private:
typedef struct {
Device* device;
std::array<uint8_t, MEM_BLOCK_SIZE> block;
VVortex* device;
std::array<uint8_t, MEM_BLOCK_SIZE> data;
uint64_t addr;
uint64_t tag;
bool write;
@ -569,7 +383,7 @@ private:
DramSim dram_sim_;
Device* device_;
VVortex* device_;
#ifdef VCD_OUTPUT
VerilatedVcdC *tfp_;
@ -578,10 +392,6 @@ private:
RAM* ram_;
bool mem_rd_rsp_active_;
bool mem_rd_rsp_ready_;
bool mem_wr_rsp_active_;
bool mem_wr_rsp_ready_;
bool running_;
};

View file

@ -32,11 +32,22 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED
# AFU parameters
ifeq (,$(findstring M_AXI_MEM_NUM_BANKS,$(CONFIGS)))
CONFIGS += -DM_AXI_MEM_NUM_BANKS=1
endif
ifeq (,$(findstring M_AXI_MEM_ADDR_WIDTH,$(CONFIGS)))
CONFIGS += -DM_AXI_MEM_ADDR_WIDTH=32
endif
ifeq (,$(findstring M_AXI_MEM_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DM_AXI_MEM_DATA_WIDTH=512
endif
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/xrt_sim.cpp
SRCS += $(SRC_DIR)/xrt.cpp $(SRC_DIR)/xrt_sim.cpp
RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv

View file

@ -19,7 +19,7 @@
#include <cstring>
#include <unistd.h>
#include <assert.h>
#include "fpga.h"
#include "xrt.h"
#include "xrt_sim.h"
#include <VX_config.h>
#include <util.h>
@ -30,6 +30,13 @@ using namespace vortex;
extern "C" {
#endif
typedef struct {
size_t size;
xrt_sim* sim;
uint32_t bank;
uint64_t addr;
} buffer_t;
extern xrtDeviceHandle xrtDeviceOpen(unsigned int index) {
if (index != 0)
return nullptr;
@ -45,6 +52,8 @@ extern xrtDeviceHandle xrtDeviceOpen(unsigned int index) {
extern int xrtXclbinGetXSAName(xrtDeviceHandle /*dhdl*/, char* name, int size, int* ret_size) {
static const char* deviceName = "vortex_xrtsim";
if (name) {
if (size < strlen(deviceName) + 1)
return -1;
memcpy(name, deviceName, size);
}
if (ret_size) {
@ -54,7 +63,10 @@ extern int xrtXclbinGetXSAName(xrtDeviceHandle /*dhdl*/, char* name, int size, i
}
extern int xrtDeviceClose(xrtDeviceHandle dhdl) {
if (dhdl == nullptr)
return -1;
auto sim = reinterpret_cast<xrt_sim*>(dhdl);
sim->shutdown();
delete sim;
return 0;
}
@ -64,19 +76,38 @@ extern int xrtKernelClose(xrtKernelHandle /*kernelHandle*/) {
}
extern xrtBufferHandle xrtBOAlloc(xrtDeviceHandle dhdl, size_t size, xrtBufferFlags flags, xrtMemoryGroup grp) {
return 0;
auto sim = reinterpret_cast<xrt_sim*>(dhdl);
uint64_t addr;
int err = sim->mem_alloc(size, grp, &addr);
if (err != 0)
return nullptr;
auto buffer = new buffer_t();
buffer->size = size;
buffer->bank = grp;
buffer->sim = sim;
buffer->addr = addr;
return buffer;
}
extern int xrtBOFree(xrtBufferHandle bhdl) {
return 0;
if (bhdl == nullptr)
return -1;
auto buffer = reinterpret_cast<buffer_t*>(bhdl);
return buffer->sim->mem_free(buffer->bank, buffer->addr);
}
extern int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t seek) {
return 0;
extern int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t offset) {
if (bhdl == nullptr)
return -1;
auto buffer = reinterpret_cast<buffer_t*>(bhdl);
return buffer->sim->mem_write(buffer->bank, buffer->addr + offset, size, src);
}
extern int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t skip) {
return 0;
extern int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t offset) {
if (bhdl == nullptr)
return -1;
auto buffer = reinterpret_cast<buffer_t*>(bhdl);
return buffer->sim->mem_read(buffer->bank, buffer->addr + offset, size, dst);
}
extern int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t size, size_t offset) {
@ -84,11 +115,17 @@ extern int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t s
}
extern int xrtKernelWriteRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t data) {
return 0;
if (kernelHandle == nullptr)
return -1;
auto sim = reinterpret_cast<xrt_sim*>(kernelHandle);
return sim->register_write(offset, data);
}
extern int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* datap) {
return 0;
extern int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* data) {
if (kernelHandle == nullptr)
return -1;
auto sim = reinterpret_cast<xrt_sim*>(kernelHandle);
return sim->register_read(offset, data);
}
extern int xrtErrorGetString(xrtDeviceHandle, xrtErrorCode error, char* out, size_t len, size_t* out_len) {

View file

@ -94,15 +94,15 @@ xrtBufferHandle xrtBOAlloc(xrtDeviceHandle dhdl, size_t size, xrtBufferFlags fla
int xrtBOFree(xrtBufferHandle bhdl);
int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t seek);
int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t offset);
int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t skip);
int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t offset);
int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t size, size_t offset);
int xrtKernelWriteRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t data);
int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* datap);
int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* data);
int xrtErrorGetString(xrtDeviceHandle, xrtErrorCode error, char* out, size_t len, size_t* out_len);

View file

@ -32,6 +32,12 @@
#include <queue>
#include <unordered_map>
#include <util.h>
#include <mem_alloc.h>
#include <mp_macros.h>
#include <iostream>
#define M_AXI_MEM_DATA_SIZE (M_AXI_MEM_DATA_WIDTH/8)
#ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1
@ -53,6 +59,8 @@
#define RAM_PAGE_SIZE 4096
#define MEM_BANK_SIZE (1ull << M_AXI_MEM_ADDR_WIDTH)
#define CPU_GPU_LATENCY 200
using namespace vortex;
@ -80,6 +88,35 @@ void sim_trace_enable(bool enable) {
///////////////////////////////////////////////////////////////////////////////
#define MP_M_AXI_MEM_EACH(i) \
m_axi_mem_[i].awvalid = &device_->m_axi_mem_##i##_awvalid; \
m_axi_mem_[i].awready = &device_->m_axi_mem_##i##_awready; \
m_axi_mem_[i].awaddr = &device_->m_axi_mem_##i##_awaddr; \
m_axi_mem_[i].awid = &device_->m_axi_mem_##i##_awid; \
m_axi_mem_[i].awlen = &device_->m_axi_mem_##i##_awlen; \
m_axi_mem_[i].wvalid = &device_->m_axi_mem_##i##_wvalid; \
m_axi_mem_[i].wready = &device_->m_axi_mem_##i##_wready; \
m_axi_mem_[i].wdata = &device_->m_axi_mem_##i##_wdata; \
m_axi_mem_[i].wstrb = &device_->m_axi_mem_##i##_wstrb; \
m_axi_mem_[i].wlast = &device_->m_axi_mem_##i##_wlast; \
m_axi_mem_[i].arvalid = &device_->m_axi_mem_##i##_arvalid; \
m_axi_mem_[i].arready = &device_->m_axi_mem_##i##_arready; \
m_axi_mem_[i].araddr = &device_->m_axi_mem_##i##_araddr; \
m_axi_mem_[i].arid = &device_->m_axi_mem_##i##_arid; \
m_axi_mem_[i].arlen = &device_->m_axi_mem_##i##_arlen; \
m_axi_mem_[i].rvalid = &device_->m_axi_mem_##i##_rvalid; \
m_axi_mem_[i].rready = &device_->m_axi_mem_##i##_rready; \
m_axi_mem_[i].rdata = &device_->m_axi_mem_##i##_rdata; \
m_axi_mem_[i].rlast = &device_->m_axi_mem_##i##_rlast; \
m_axi_mem_[i].rid = &device_->m_axi_mem_##i##_rid; \
m_axi_mem_[i].rresp = &device_->m_axi_mem_##i##_rresp; \
m_axi_mem_[i].bvalid = &device_->m_axi_mem_##i##_bvalid; \
m_axi_mem_[i].bready = &device_->m_axi_mem_##i##_bready; \
m_axi_mem_[i].bresp = &device_->m_axi_mem_##i##_bresp; \
m_axi_mem_[i].bid = &device_->m_axi_mem_##i##_bid;
#define MP_M_AXI_MEM(n) MP_REPEAT(n, MP_M_AXI_MEM_EACH, ;)
class xrt_sim::Impl {
public:
Impl()
@ -97,6 +134,12 @@ public:
if (future_.valid()) {
future_.wait();
}
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
delete mem_alloc_[i];
}
if (ram_) {
delete ram_;
}
#ifdef VCD_OUTPUT
if (tfp_) {
tfp_->close();
@ -106,9 +149,6 @@ public:
if (device_) {
delete device_;
}
if (ram_) {
delete ram_;
}
}
int init() {
@ -129,22 +169,136 @@ public:
tfp_->open("trace.vcd");
#endif
// allocate RAM
ram_ = new RAM(0, RAM_PAGE_SIZE);
// initialize AXI memory interfaces
MP_M_AXI_MEM(M_AXI_MEM_NUM_BANKS);
// initialize memory allocator
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
mem_alloc_[i] = new MemoryAllocator(0, MEM_BANK_SIZE, 4096, 64);
}
// reset the device
this->reset();
// Turn on assertion after reset
Verilated::assertOn(true);
// launch execution thread
future_ = std::async(std::launch::async, [&]{
while (!stop_) {
std::lock_guard<std::mutex> guard(mutex_);
this->tick();
}
while (!stop_) {
std::lock_guard<std::mutex> guard(mutex_);
this->tick();
}
});
return 0;
}
void shutdown() {
stop_ = true;
if (future_.valid()) {
future_.wait();
}
}
int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) {
if (bank_id >= M_AXI_MEM_NUM_BANKS)
return -1;
return mem_alloc_[bank_id]->allocate(size, addr);
}
int mem_free(uint32_t bank_id, uint64_t addr) {
if (bank_id >= M_AXI_MEM_NUM_BANKS)
return -1;
return mem_alloc_[bank_id]->release(addr);
}
int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) {
if (bank_id >= M_AXI_MEM_NUM_BANKS)
return -1;
uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr;
ram_->write(data, base_addr, size);
/*printf("%0ld: [sim] xrt-mem-write: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size);
for (int i = size-1; i >= 0; --i) {
printf("%02x", ((const uint8_t*)data)[i]);
}
printf(")\n");*/
return 0;
}
int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) {
if (bank_id >= M_AXI_MEM_NUM_BANKS)
return -1;
uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr;
ram_->read(data, base_addr, size);
/*printf("%0ld: [sim] xrt-mem-read: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size);
for (int i = size-1; i >= 0; --i) {
printf("%02x", ((uint8_t*)data)[i]);
}
printf(")\n");*/
return 0;
}
int register_write(uint32_t offset, uint32_t value) {
std::lock_guard<std::mutex> guard(mutex_);
// write address
device_->s_axi_ctrl_awvalid = 1;
device_->s_axi_ctrl_awaddr = offset;
auto s_axi_ctrl_awready = device_->s_axi_ctrl_awready;
do {
this->tick();
} while (!(s_axi_ctrl_awready || device_->s_axi_ctrl_awready));
device_->s_axi_ctrl_awvalid = 0;
// write data
device_->s_axi_ctrl_wvalid = 1;
device_->s_axi_ctrl_wdata = value;
device_->s_axi_ctrl_wstrb = 0xf;
auto s_axi_ctrl_wready = device_->s_axi_ctrl_wready;
do {
this->tick();
} while (!(s_axi_ctrl_wready || device_->s_axi_ctrl_wready));
device_->s_axi_ctrl_wvalid = 0;
// write response
device_->s_axi_ctrl_bready = 1;
auto s_axi_ctrl_bvalid = device_->s_axi_ctrl_bvalid;
do {
this->tick();
} while (!(s_axi_ctrl_bvalid || device_->s_axi_ctrl_bvalid));
device_->s_axi_ctrl_bready = 0;
return 0;
}
int register_read(uint32_t offset, uint32_t* value) {
std::lock_guard<std::mutex> guard(mutex_);
// read address
device_->s_axi_ctrl_arvalid = 1;
device_->s_axi_ctrl_araddr = offset;
auto s_axi_ctrl_arready = device_->s_axi_ctrl_arready;
do {
this->tick();
} while (!(s_axi_ctrl_arready || device_->s_axi_ctrl_arready));
device_->s_axi_ctrl_arvalid = 0;
// read data
device_->s_axi_ctrl_rready = 1;
auto s_axi_ctrl_rvalid = device_->s_axi_ctrl_rvalid;
do {
this->tick();
} while (!(s_axi_ctrl_rvalid || device_->s_axi_ctrl_rvalid));
*value = device_->s_axi_ctrl_rdata;
device_->s_axi_ctrl_rready = 0;
return 0;
}
private:
void reset() {
@ -155,9 +309,9 @@ private:
reqs.clear();
}
{
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
std::queue<mem_req_t*> empty;
std::swap(dram_queue_, empty);
std::swap(dram_queues_[i], empty);
}
device_->ap_rst_n = 0;
@ -177,36 +331,34 @@ private:
device_->ap_clk = 1;
this->eval();
}
// Turn on assertion after reset
Verilated::assertOn(true);
}
void tick() {
this->axi_ctrl_bus_eval();
this->axi_mem_bus_eval();
if (!dram_queue_.empty()) {
auto mem_req = dram_queue_.front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
if (orig_req->ready) {
delete orig_req;
} else {
orig_req->ready = true;
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
if (!dram_queues_[i].empty()) {
auto mem_req = dram_queues_[i].front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) {
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
if (orig_req->ready) {
delete orig_req;
} else {
orig_req->ready = true;
}
}, mem_req)) {
dram_queues_[i].pop();
}
}, mem_req)) {
dram_queue_.pop();
}
}
dram_sim_.tick();
device_->ap_clk = 0;
this->eval();
device_->ap_clk = 1;
this->eval();
dram_sim_.tick();
#ifndef NDEBUG
fflush(stdout);
#endif
@ -223,65 +375,208 @@ private:
}
void axi_ctrl_bus_reset() {
// address write request
device_->s_axi_ctrl_awvalid = 0;
//device_->s_axi_ctrl_awaddr = 0;
// data write request
device_->s_axi_ctrl_wvalid = 0;
//device_->s_axi_ctrl_wdata = 0;
//device_->s_axi_ctrl_wstrb = 0;
// address read request
device_->s_axi_ctrl_arvalid = 0;
//device_->s_axi_ctrl_araddr = 0;
device_->s_axi_ctrl_araddr = 0;
// data read response
device_->s_axi_ctrl_rready = 0;
// address write request
device_->s_axi_ctrl_awvalid = 0;
device_->s_axi_ctrl_awaddr = 0;
// data write request
device_->s_axi_ctrl_wvalid = 0;
device_->s_axi_ctrl_wdata = 0;
device_->s_axi_ctrl_wstrb = 0;
// data write response
device_->s_axi_ctrl_bready = 0;
}
void axi_ctrl_bus_eval() {
//--
}
void axi_mem_bus_reset() {
// address write request
device_->m_axi_mem_0_awready = 0;
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
// address read request
*m_axi_mem_[i].arready = 1;
// data write request
device_->m_axi_mem_0_wready = 0;
// address write request
*m_axi_mem_[i].awready = 1;
// address read request
device_->m_axi_mem_0_arready = 0;
// data write request
*m_axi_mem_[i].wready = 0;
// data read response
device_->m_axi_mem_0_rvalid = 0;
//device_->m_axi_mem_0_rdata = 0;
//device_->m_axi_mem_0_rlast = 0;
//device_->m_axi_mem_0_rid = 0;
//device_->m_axi_mem_0_rresp = 0;
// data read response
*m_axi_mem_[i].rvalid = 0;
// data write response
device_->m_axi_mem_0_bvalid = 0;
//device_->m_axi_mem_0_bresp = 0;
//device_->m_axi_mem_0_bid = 0;
// data write response
*m_axi_mem_[i].bvalid = 0;
// states
m_axi_states_[i].write_req_pending = false;
m_axi_states_[i].write_rsp_pending = false;
m_axi_states_[i].read_rsp_pending = false;
}
}
void axi_mem_bus_eval() {
//--
for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) {
// handle read responses
if (m_axi_states_[i].read_rsp_pending
&& (*m_axi_mem_[i].rready)) {
*m_axi_mem_[i].rvalid = 0;
m_axi_states_[i].read_rsp_pending = false;
}
}
if (!m_axi_states_[i].read_rsp_pending) {
if (!pending_mem_reqs_[i].empty()
&& (*pending_mem_reqs_[i].begin())->ready
&& !(*pending_mem_reqs_[i].begin())->write) {
auto mem_rsp_it = pending_mem_reqs_[i].begin();
auto mem_rsp = *mem_rsp_it;
*m_axi_mem_[i].rvalid = 1;
*m_axi_mem_[i].rid = mem_rsp->tag;
*m_axi_mem_[i].rresp = 0;
*m_axi_mem_[i].rlast = 1;
memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), M_AXI_MEM_DATA_SIZE);
pending_mem_reqs_[i].erase(mem_rsp_it);
m_axi_states_[i].read_rsp_pending = true;
delete mem_rsp;
}
}
// handle write responses
if (m_axi_states_[i].write_rsp_pending) {
if (*m_axi_mem_[i].bready) {
*m_axi_mem_[i].bvalid = 0;
m_axi_states_[i].write_rsp_pending = false;
}
}
if (!m_axi_states_[i].write_rsp_pending) {
if (!pending_mem_reqs_[i].empty()
&& (*pending_mem_reqs_[i].begin())->ready
&& (*pending_mem_reqs_[i].begin())->write) {
auto mem_rsp_it = pending_mem_reqs_[i].begin();
auto mem_rsp = *mem_rsp_it;
*m_axi_mem_[i].bvalid = 1;
*m_axi_mem_[i].bid = mem_rsp->tag;
*m_axi_mem_[i].bresp = 0;
pending_mem_reqs_[i].erase(mem_rsp_it);
m_axi_states_[i].write_rsp_pending = true;
delete mem_rsp;
}
}
// handle read requests
if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) {
auto mem_req = new mem_req_t();
mem_req->tag = *m_axi_mem_[i].arid;
mem_req->addr = uint64_t(*m_axi_mem_[i].araddr) * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE;
ram_->read(mem_req->data.data(), mem_req->addr, M_AXI_MEM_DATA_SIZE);
mem_req->write = false;
mem_req->ready = false;
pending_mem_reqs_[i].emplace_back(mem_req);
/*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag);
for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", mem_req->data[i]);
}
printf("\n");*/
// send dram request
dram_queues_[i].push(mem_req);
}
// handle address write requests
if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !m_axi_states_[i].write_req_pending) {
m_axi_states_[i].write_req_addr = *m_axi_mem_[i].awaddr;
m_axi_states_[i].write_req_tag = *m_axi_mem_[i].awid;
m_axi_states_[i].write_req_pending = true;
}
// handle data write requests
*m_axi_mem_[i].wready = false;
if (*m_axi_mem_[i].wvalid && m_axi_states_[i].write_req_pending) {
auto byteen = *m_axi_mem_[i].wstrb;
auto data = (uint8_t*)m_axi_mem_[i].wdata->data();
auto byte_addr = m_axi_states_[i].write_req_addr * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE;
for (int i = 0; i < M_AXI_MEM_DATA_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[byte_addr + i] = data[i];
}
}
auto mem_req = new mem_req_t();
mem_req->tag = m_axi_states_[i].write_req_tag;
mem_req->addr = byte_addr;
mem_req->write = true;
mem_req->ready = false;
pending_mem_reqs_[i].emplace_back(mem_req);
/*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag);
for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", data[i]);
}
printf("\n");*/
// send dram request
dram_queues_[i].push(mem_req);
m_axi_states_[i].write_req_pending = false;
// acquire write data
*m_axi_mem_[i].wready = true;
}
}
}
typedef struct {
std::array<uint8_t, MEM_BLOCK_SIZE> data;
uint32_t addr;
uint64_t write_req_addr;
uint32_t write_req_tag;
bool write_req_pending;
bool read_rsp_pending;
bool write_rsp_pending;
} m_axi_state_t;
typedef struct {
std::array<uint8_t, M_AXI_MEM_DATA_SIZE> data;
uint32_t tag;
uint64_t addr;
bool write;
bool ready;
} mem_req_t;
Vvortex_afu_shim *device_;
typedef struct {
CData* awvalid;
CData* awready;
QData* awaddr;
IData* awid;
CData* awlen;
CData* wvalid;
CData* wready;
VlWide<16>* wdata;
QData* wstrb;
CData* wlast;
CData* arvalid;
CData* arready;
QData* araddr;
IData* arid;
CData* arlen;
CData* rvalid;
CData* rready;
VlWide<16>* rdata;
CData* rlast;
IData* rid;
CData* rresp;
CData* bvalid;
CData* bready;
CData* bresp;
IData* bid;
} m_axi_mem_t;
Vvortex_afu_shim* device_;
RAM* ram_;
DramSim dram_sim_;
@ -290,9 +585,15 @@ private:
std::mutex mutex_;
std::list<mem_req_t*> pending_mem_reqs_[MEMORY_BANKS];
std::list<mem_req_t*> pending_mem_reqs_[M_AXI_MEM_NUM_BANKS];
std::queue<mem_req_t*> dram_queue_;
m_axi_mem_t m_axi_mem_[M_AXI_MEM_NUM_BANKS];
MemoryAllocator* mem_alloc_[M_AXI_MEM_NUM_BANKS];
m_axi_state_t m_axi_states_[M_AXI_MEM_NUM_BANKS];
std::queue<mem_req_t*> dram_queues_[M_AXI_MEM_NUM_BANKS];
#ifdef VCD_OUTPUT
VerilatedVcdC* tfp_;
@ -311,4 +612,32 @@ xrt_sim::~xrt_sim() {
int xrt_sim::init() {
return impl_->init();
}
void xrt_sim::shutdown() {
impl_->shutdown();
}
int xrt_sim::mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) {
return impl_->mem_alloc(size, bank_id, addr);
}
int xrt_sim::mem_free(uint32_t bank_id, uint64_t addr) {
return impl_->mem_free(bank_id, addr);
}
int xrt_sim::mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) {
return impl_->mem_write(bank_id, addr, size, data);
}
int xrt_sim::mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) {
return impl_->mem_read(bank_id, addr, size, data);
}
int xrt_sim::register_write(uint32_t offset, uint32_t value) {
return impl_->register_write(offset, value);
}
int xrt_sim::register_read(uint32_t offset, uint32_t* value) {
return impl_->register_read(offset, value);
}

View file

@ -25,6 +25,20 @@ public:
int init();
void shutdown();
int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr);
int mem_free(uint32_t bank_id, uint64_t addr);
int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* value);
int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* value);
int register_write(uint32_t offset, uint32_t value);
int register_read(uint32_t offset, uint32_t* value);
private:
class Impl;