XRT runtime and simulation support for Vortex AFU (incomplete)

This commit is contained in:
Blaise Tine 2024-05-11 17:43:49 -07:00
parent 98f080340a
commit 60107cf2b6
19 changed files with 973 additions and 176 deletions

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -77,7 +77,8 @@
/* verilator lint_off IMPLICIT */ \
/* verilator lint_off PINMISSING */ \
/* verilator lint_off IMPORTSTAR */ \
/* verilator lint_off UNSIGNED */
/* verilator lint_off UNSIGNED */ \
/* verilator lint_off SYMRSVDWORD */
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
/* verilator lint_on PINCONNECTEMPTY */ \
@ -88,7 +89,8 @@
/* verilator lint_on IMPLICIT */ \
/* verilator lint_off PINMISSING */ \
/* verilator lint_on IMPORTSTAR */ \
/* verilator lint_on UNSIGNED */
/* verilator lint_on UNSIGNED */ \
/* verilator lint_on SYMRSVDWORD */
`define UNUSED_PARAM(x) /* verilator lint_off UNUSED */ \
localparam __``x = x; \

View file

@ -110,23 +110,25 @@ module VX_afu_ctrl #(
ADDR_DEV_0 = 8'h10,
ADDR_DEV_1 = 8'h14,
ADDR_DEV_CTRL = 8'h18,
//ADDR_DEV_CTRL = 8'h18,
ADDR_ISA_0 = 8'h1C,
ADDR_ISA_1 = 8'h20,
ADDR_ISA_CTRL = 8'h24,
//ADDR_ISA_CTRL = 8'h24,
ADDR_DCR_0 = 8'h28,
ADDR_DCR_1 = 8'h2C,
ADDR_DCR_CTRL = 8'h30,
//ADDR_DCR_CTRL = 8'h30,
`ifdef SCOPE
ADDR_SCP_0 = 8'h34,
ADDR_SCP_1 = 8'h38,
ADDR_SCP_CTRL = 8'h3C,
//ADDR_SCP_CTRL = 8'h3C,
`endif
ADDR_MEM_0 = 8'h40,
ADDR_MEM_1 = 8'h44,
ADDR_MEM_CTRL = 8'h48,
//ADDR_MEM_CTRL = 8'h48,
ADDR_BITS = 8;
@ -318,10 +320,10 @@ module VX_afu_ctrl #(
end
default: begin
for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin
if (waddr == (ADDR_MEM_0 + i * 12)) begin
if (waddr == (ADDR_MEM_0 + 8'(i) * 8'd12)) begin
mem_r[i][31:0] <= (s_axi_wdata & wmask) | (mem_r[i][31:0] & ~wmask);
end
if (waddr == (ADDR_MEM_1 + i * 12)) begin
if (waddr == (ADDR_MEM_1 + 8'(i) * 8'd12)) begin
mem_r[i][63:32] <= (s_axi_wdata & wmask) | (mem_r[i][63:32] & ~wmask);
end
end

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -13,7 +13,7 @@
`include "vortex_afu.vh"
module VX_afu_wrap #(
module VX_afu_wrap #(
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
parameter C_M_AXI_MEM_ID_WIDTH = 16,
@ -45,8 +45,8 @@ module VX_afu_wrap #(
output wire s_axi_ctrl_bvalid,
input wire s_axi_ctrl_bready,
output wire [1:0] s_axi_ctrl_bresp,
output wire interrupt
output wire interrupt
);
localparam C_M_AXI_MEM_NUM_BANKS = `M_AXI_MEM_NUM_BANKS;
@ -62,7 +62,7 @@ module VX_afu_wrap #(
wire m_axi_mem_wready_a [C_M_AXI_MEM_NUM_BANKS];
wire [C_M_AXI_MEM_DATA_WIDTH-1:0] m_axi_mem_wdata_a [C_M_AXI_MEM_NUM_BANKS];
wire [C_M_AXI_MEM_DATA_WIDTH/8-1:0] m_axi_mem_wstrb_a [C_M_AXI_MEM_NUM_BANKS];
wire m_axi_mem_wlast_a [C_M_AXI_MEM_NUM_BANKS];
wire m_axi_mem_wlast_a [C_M_AXI_MEM_NUM_BANKS];
wire m_axi_mem_bvalid_a [C_M_AXI_MEM_NUM_BANKS];
wire m_axi_mem_bready_a [C_M_AXI_MEM_NUM_BANKS];
wire [C_M_AXI_MEM_ID_WIDTH-1:0] m_axi_mem_bid_a [C_M_AXI_MEM_NUM_BANKS];
@ -82,17 +82,16 @@ module VX_afu_wrap #(
// convert memory interface to array
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
wire clk = ap_clk;
wire reset = ~ap_rst_n;
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
reg [15:0] vx_pending_writes;
reg vx_busy_wait;
reg vx_running;
wire vx_busy;
wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS];
wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS];
wire dcr_wr_valid;
wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr;
@ -109,7 +108,7 @@ module VX_afu_wrap #(
`ifdef SCOPE
wire scope_bus_in;
wire scope_bus_out;
wire scope_reset = reset;
wire scope_reset = reset;
`endif
always @(posedge ap_clk) begin
@ -120,15 +119,15 @@ module VX_afu_wrap #(
end else begin
case (state)
STATE_IDLE: begin
if (ap_start) begin
if (ap_start) begin
`ifdef DBG_TRACE_AFU
`TRACE(2, ("%d: STATE RUN\n", $time));
`endif
`endif
state <= STATE_RUN;
vx_running <= 0;
end
end
STATE_RUN: begin
STATE_RUN: begin
if (vx_running) begin
if (vx_busy_wait) begin
// wait until processor goes busy
@ -151,7 +150,7 @@ module VX_afu_wrap #(
`ifdef DBG_TRACE_AFU
`TRACE(2, ("%d: AFU: Begin execution\n", $time));
`endif
vx_running <= 1;
vx_running <= 1;
vx_busy_wait <= 1;
end
end
@ -185,7 +184,7 @@ module VX_afu_wrap #(
always @(posedge ap_clk) begin
if (state == STATE_RUN) begin
vx_reset_ctr <= vx_reset_ctr + 1;
vx_reset_ctr <= vx_reset_ctr + 1;
end else begin
vx_reset_ctr <= '0;
end
@ -197,9 +196,9 @@ module VX_afu_wrap #(
.AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS)
) afu_ctrl (
.clk (ap_clk),
.reset (reset || ap_reset),
.reset (reset || ap_reset),
.clk_en (1'b1),
.s_axi_awvalid (s_axi_ctrl_awvalid),
.s_axi_awready (s_axi_ctrl_awready),
.s_axi_awaddr (s_axi_ctrl_awaddr),
@ -226,9 +225,9 @@ module VX_afu_wrap #(
.interrupt (interrupt),
`ifdef SCOPE
.scope_bus_in (scope_bus_out),
.scope_bus_in (scope_bus_out),
.scope_bus_out (scope_bus_in),
`endif
`endif
.mem_base (mem_base),
@ -257,7 +256,7 @@ module VX_afu_wrap #(
.clk (ap_clk),
.reset (reset || ap_reset || ~vx_running),
.m_axi_awvalid (m_axi_mem_awvalid_a),
.m_axi_awready (m_axi_mem_awready_a),
.m_axi_awaddr (m_axi_mem_awaddr_w),
@ -268,7 +267,7 @@ module VX_afu_wrap #(
`UNUSED_PIN (m_axi_awlock),
`UNUSED_PIN (m_axi_awcache),
`UNUSED_PIN (m_axi_awprot),
`UNUSED_PIN (m_axi_awqos),
`UNUSED_PIN (m_axi_awqos),
`UNUSED_PIN (m_axi_awregion),
.m_axi_wvalid (m_axi_mem_wvalid_a),
@ -280,7 +279,7 @@ module VX_afu_wrap #(
.m_axi_bvalid (m_axi_mem_bvalid_a),
.m_axi_bready (m_axi_mem_bready_a),
.m_axi_bid (m_axi_mem_bid_a),
.m_axi_bresp (m_axi_mem_bresp_a),
.m_axi_bresp (m_axi_mem_bresp_a),
.m_axi_arvalid (m_axi_mem_arvalid_a),
.m_axi_arready (m_axi_mem_arready_a),
@ -292,7 +291,7 @@ module VX_afu_wrap #(
`UNUSED_PIN (m_axi_arlock),
`UNUSED_PIN (m_axi_arcache),
`UNUSED_PIN (m_axi_arprot),
`UNUSED_PIN (m_axi_arqos),
`UNUSED_PIN (m_axi_arqos),
`UNUSED_PIN (m_axi_arregion),
.m_axi_rvalid (m_axi_mem_rvalid_a),
@ -370,13 +369,13 @@ module VX_afu_wrap #(
reg [`CLOG2(`RESET_DELAY+1)-1:0] assert_delay_ctr;
reg assert_enabled;
initial begin
$assertoff(0, vortex_axi);
end
$assertoff(0, vortex_axi);
end
always @(posedge ap_clk) begin
if (reset) begin
assert_delay_ctr <= '0;
assert_enabled <= 0;
end else begin
end else begin
if (~assert_enabled) begin
if (assert_delay_ctr == (`RESET_DELAY-1)) begin
assert_enabled <= 1;
@ -394,12 +393,12 @@ module VX_afu_wrap #(
always @(posedge ap_clk) begin
for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin
`TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]));
`TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]));
end
if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin
`TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%0h\n", $time, i, m_axi_mem_wdata_a[i]));
`TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%0h\n", $time, i, m_axi_mem_wdata_a[i]));
end
if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin
if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin
`TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i]));
end
if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin
@ -408,5 +407,5 @@ module VX_afu_wrap #(
end
end
`endif
endmodule

View file

@ -1,7 +1,7 @@
ROOT_DIR := $(realpath ..)
include $(ROOT_DIR)/config.mk
all: stub rtlsim simx opae
all: stub rtlsim simx opae xrt
stub:
$(MAKE) -C stub

View file

@ -1,5 +1,9 @@
include ../common.mk
TARGET ?= xrtsim
DESTDIR ?= $(CURDIR)
SRC_DIR := $(VORTEX_HOME)/runtime/xrt
CXXFLAGS += -std=c++14 -Wall -Wextra -Wfatal-errors
@ -7,29 +11,43 @@ CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(XILINX_XRT)/includ
CXXFLAGS += -fPIC
LDFLAGS += -shared -pthread
LDFLAGS += -L$(XILINX_XRT)/lib -luuid -lxrt_coreutil
LDFLAGS += -L$(XILINX_XRT)/lib
SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp $(SIM_DIR)/common/util.cpp
# set up target types
ifeq ($(TARGET), xrtsim)
XRTSIM = $(DESTDIR)/libxrtsim.so
CXXFLAGS += -DXRTSIM -I$(SIM_DIR)/xrtsim
LDFLAGS += -L$(DESTDIR) -lxrtsim
else
LDFLAGS += -luuid -lxrt_coreutil
endif
PROJECT := libvortex.so
# Debugigng
ifdef DEBUG
CXXFLAGS += -g -O0
else
else
CXXFLAGS += -O2 -DNDEBUG
endif
# Enable scope logic analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
CXXFLAGS += -DSCOPE
SRCS += $(COMMON_DIR)/scope.cpp
endif
all: $(PROJECT)
all: $(DESTDIR)/$(PROJECT)
$(PROJECT): $(SRCS) $(SCOPE_JSON)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
$(DESTDIR)/libxrtsim.so:
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/xrtsim $(DESTDIR)/libxrtsim.so
$(DESTDIR)/$(PROJECT): $(SRCS) $(XRTSIM)
$(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $@
clean:
rm -rf $(PROJECT) obj_dir
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/xrtsim clean
rm -rf $(DESTDIR)/$(PROJECT)

View file

@ -19,6 +19,8 @@
#include <stdarg.h>
#include <util.h>
#include <limits>
#include <vector>
#include <string>
#include <unordered_map>
#ifdef SCOPE
@ -26,16 +28,23 @@
#endif
// XRT includes
#ifndef XRTSIM
#include "experimental/xrt_bo.h"
#include "experimental/xrt_ip.h"
#include "experimental/xrt_device.h"
#include "experimental/xrt_kernel.h"
#include "experimental/xrt_xclbin.h"
#include "experimental/xrt_error.h"
#else
#include <fpga.h>
#endif
using namespace vortex;
#ifndef XRTSIM
#define CPP_API
#endif
//#define BANK_INTERLEAVE
#define MMIO_CTL_ADDR 0x00
@ -60,9 +69,10 @@ struct platform_info_t {
};
static const platform_info_t g_platforms [] = {
{"xilinx_u50", 4, 0x1C, 0x0},
{"xilinx_u200", 4, 0x1C, 0x0},
{"xilinx_u280", 4, 0x1C, 0x0},
{"vortex_xrtsim", 4, 0x10, 0x0}, // 64 KB banks
{"xilinx_u50", 4, 0x1C, 0x0}, // 16 MB banks
{"xilinx_u200", 4, 0x1C, 0x0}, // 16 MB banks
{"xilinx_u280", 4, 0x1C, 0x0}, // 16 MB banks
{"xilinx_vck5000", 0, 0x21, 0xC000000000},
};
@ -148,7 +158,6 @@ public:
, xrtKernel_(kernel)
, platform_(platform)
, global_mem_(ALLOC_BASE_ADDR, GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR, RAM_PAGE_SIZE, CACHE_BLOCK_SIZE)
, mpm_cache_(nullptr)
{}
#ifndef CPP_API
@ -515,7 +524,7 @@ public:
profiling_begin(profiling_id_);
// start execution
CHECK_ERR(device->write_register(MMIO_CTL_ADDR, CTL_AP_START), {
CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_START), {
return err;
});
@ -540,7 +549,7 @@ public:
for (;;) {
uint32_t status = 0;
CHECK_ERR(device->read_register(MMIO_CTL_ADDR, &status), {
CHECK_ERR(this->read_register(MMIO_CTL_ADDR, &status), {
return err;
});
bool is_done = (status & CTL_AP_DONE) == CTL_AP_DONE;
@ -554,7 +563,7 @@ public:
};
profiling_end(profiling_id_);
return 0;
}
@ -778,6 +787,8 @@ extern int vx_dev_open(vx_device_h* hdevice) {
return -1;
});
#ifndef XRTSIM
CHECK_ERR(xrtDeviceLoadXclbinFile(xrtDevice, xlbin_path_s), {
dump_xrt_error(xrtDevice, err);
xrtDeviceClose(xrtDevice);
@ -796,6 +807,12 @@ extern int vx_dev_open(vx_device_h* hdevice) {
return -1;
});
#else
xrtKernelHandle xrtKernel = nullptr;
#endif
int device_name_size;
xrtXclbinGetXSAName(xrtDevice, nullptr, 0, &device_name_size);
std::vector<char> device_name(device_name_size);
@ -1035,7 +1052,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, const void* host_ptr, uint64_t ds
DBGPRINT("COPY_TO_DEV: hbuffer=%p, host_addr=%p, dst_offset=%ld, size=%ld\n", hbuffer, host_ptr, dst_offset, size);
CHECK_ERR(device->upload(buffer->addr + dst_offset, host_ptr, asize), {
CHECK_ERR(device->upload(buffer->addr + dst_offset, host_ptr, size), {
return err;
});
@ -1054,7 +1071,7 @@ extern int vx_copy_from_dev(void* host_ptr, vx_buffer_h hbuffer, uint64_t src_of
DBGPRINT("COPY_FROM_DEV: hbuffer=%p, host_addr=%p, src_offset=%ld, size=%ld\n", hbuffer, host_ptr, src_offset, size);
CHECK_ERR(device->download(host_ptr, buffer->addr + src_offset, asize), {
CHECK_ERR(device->download(host_ptr, buffer->addr + src_offset, size), {
return err;
});

View file

@ -5,8 +5,10 @@ all:
$(MAKE) -C simx
$(MAKE) -C rtlsim
$(MAKE) -C opaesim
$(MAKE) -C xrtsim
clean:
$(MAKE) -C simx clean
$(MAKE) -C rtlsim clean
$(MAKE) -C opaesim clean
$(MAKE) -C opaesim clean
$(MAKE) -C xrtsim clean

View file

@ -4,7 +4,6 @@ include $(ROOT_DIR)/config.mk
HW_DIR := $(VORTEX_HOME)/hw
RTL_DIR := $(HW_DIR)/rtl
DPI_DIR := $(HW_DIR)/dpi
AFU_DIR := $(RTL_DIR)/afu/opae
SCRIPT_DIR := $(HW_DIR)/scripts
COMMON_DIR := $(VORTEX_HOME)/sim/common

View file

@ -3,6 +3,7 @@ include ../common.mk
DESTDIR ?= $(CURDIR)
SRC_DIR := $(VORTEX_HOME)/sim/opaesim
AFU_DIR := $(RTL_DIR)/afu/opae
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
@ -85,7 +86,7 @@ VL_FLAGS += -j $(THREADS)
ifdef DEBUG
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O3 -DNDEBUG
endif

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -29,40 +29,40 @@ using namespace vortex;
extern "C" {
#endif
extern fpga_result fpgaGetProperties(fpga_token token, fpga_properties *prop) {
extern fpga_result fpgaGetProperties(fpga_token token, fpga_properties *prop) {
__unused (token, prop);
return FPGA_OK;
}
extern fpga_result fpgaPropertiesSetObjectType(fpga_properties prop, fpga_objtype objtype) {
extern fpga_result fpgaPropertiesSetObjectType(fpga_properties prop, fpga_objtype objtype) {
__unused (prop, objtype);
return FPGA_OK;
return FPGA_OK;
}
extern fpga_result fpgaPropertiesSetGUID(fpga_properties prop, fpga_guid guid) {
extern fpga_result fpgaPropertiesSetGUID(fpga_properties prop, fpga_guid guid) {
__unused (prop, guid);
return FPGA_OK;
return FPGA_OK;
}
extern fpga_result fpgaDestroyProperties(fpga_properties *prop) {
__unused (prop);
return FPGA_OK;
__unused (prop);
return FPGA_OK;
}
extern fpga_result fpgaEnumerate(const fpga_properties *filters, uint32_t num_filters, fpga_token *tokens, uint32_t max_tokens, uint32_t *num_matches) {
extern fpga_result fpgaEnumerate(const fpga_properties *filters, uint32_t num_filters, fpga_token *tokens, uint32_t max_tokens, uint32_t *num_matches) {
__unused (filters, num_filters, num_filters, tokens, max_tokens);
if (num_matches) {
*num_matches = 1;
}
return FPGA_OK;
return FPGA_OK;
}
extern fpga_result fpgaDestroyToken(fpga_token *token) {
extern fpga_result fpgaDestroyToken(fpga_token *token) {
__unused (token);
return FPGA_OK;
return FPGA_OK;
}
extern fpga_result fpgaPropertiesGetLocalMemorySize(const fpga_properties *filters, uint64_t* lms) {
extern fpga_result fpgaPropertiesGetLocalMemorySize(const fpga_properties *filters, uint64_t* lms) {
__unused (filters);
if (lms) {
#if (XLEN == 64)
@ -71,14 +71,19 @@ extern fpga_result fpgaPropertiesGetLocalMemorySize(const fpga_properties *filte
*lms = 0x100000000; // 4 GB
#endif
}
return FPGA_OK;
return FPGA_OK;
}
extern fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags) {
__unused (token);
if (NULL == handle || flags != 0)
return FPGA_INVALID_PARAM;
auto sim = new opae_sim();
auto sim = new opae_sim();
int ret = sim->init();
if (ret != 0) {
delete sim;
return FPGA_NO_MEMORY;
}
*handle = reinterpret_cast<fpga_handle>(sim);
return FPGA_OK;
}
@ -89,24 +94,24 @@ extern fpga_result fpgaClose(fpga_handle handle) {
auto sim = reinterpret_cast<opae_sim*>(handle);
delete sim;
return FPGA_OK;
}
extern fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
if (NULL == handle || len == 0 || buf_addr == NULL || wsid == NULL)
if (NULL == handle || len == 0 || buf_addr == NULL || wsid == NULL)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
int ret = sim->prepare_buffer(len, buf_addr, wsid, flags);
if (ret != 0)
return FPGA_NO_MEMORY;
return FPGA_OK;
}
extern fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid) {
if (NULL == handle)
if (NULL == handle)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
@ -116,7 +121,7 @@ extern fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid) {
}
extern fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr) {
if (NULL == handle || ioaddr == NULL)
if (NULL == handle || ioaddr == NULL)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
@ -126,7 +131,7 @@ extern fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t
}
extern fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value) {
if (NULL == handle || mmio_num != 0)
if (NULL == handle || mmio_num != 0)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
@ -136,7 +141,7 @@ extern fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64
}
extern fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value) {
if (NULL == handle || mmio_num != 0 || value == NULL)
if (NULL == handle || mmio_num != 0 || value == NULL)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -40,7 +40,7 @@
#include <unordered_map>
#include <util.h>
#ifndef MEMORY_BANKS
#ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#else
@ -82,7 +82,7 @@ using namespace vortex;
static uint64_t timestamp = 0;
double sc_time_stamp() {
double sc_time_stamp() {
return timestamp;
}
@ -91,7 +91,7 @@ static uint64_t trace_start_time = TRACE_START_TIME;
static uint64_t trace_stop_time = TRACE_STOP_TIME;
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
@ -106,9 +106,45 @@ void sim_trace_enable(bool enable) {
class opae_sim::Impl {
public:
Impl()
: stop_(false)
, host_buffer_ids_(0) {
// force random values for unitialized signals
: device_(nullptr)
, ram_(nullptr)
, ramulator_(nullptr)
, stop_(false)
, host_buffer_ids_(0)
#ifdef VCD_OUTPUT
, trace_(nullptr)
#endif
{}
~Impl() {
stop_ = true;
if (future_.valid()) {
future_.wait();
}
for (auto& buffer : host_buffers_) {
aligned_free(buffer.second.data);
}
#ifdef VCD_OUTPUT
if (trace_) {
trace_->close();
delete trace_;
}
#endif
if (device_) {
delete device_;
}
if (ram_) {
delete ram_;
}
if (ramulator_) {
ramulator_->finish();
Stats::statlist.printall();
delete ramulator_;
}
}
int init() {
// force random values for unitialized signals
Verilated::randReset(VERILATOR_RESET_VALUE);
Verilated::randSeed(50);
@ -136,42 +172,21 @@ public:
ram_config.add("org", "DDR4_4Gb_x8");
ram_config.add("mapping", "defaultmapping");
ram_config.set_core_num(1);
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
ramulator_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
Stats::statlist.output("ramulator.ddr4.log");
// reset the device
this->reset();
// launch execution thread
future_ = std::async(std::launch::async, [&]{
future_ = std::async(std::launch::async, [&]{
while (!stop_) {
std::lock_guard<std::mutex> guard(mutex_);
this->tick();
}
});
}
});
~Impl() {
stop_ = true;
if (future_.valid()) {
future_.wait();
}
for (auto& buffer : host_buffers_) {
aligned_free(buffer.second.data);
}
#ifdef VCD_OUTPUT
trace_->close();
delete trace_;
#endif
delete device_;
delete ram_;
if (dram_) {
dram_->finish();
Stats::statlist.printall();
delete dram_;
}
return 0;
}
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
@ -185,7 +200,7 @@ public:
host_buffer_t buffer;
buffer.data = (uint64_t*)alloc;
buffer.size = len;
buffer.ioaddr = uintptr_t(alloc);
buffer.ioaddr = uintptr_t(alloc);
auto buffer_id = host_buffer_ids_++;
host_buffers_.emplace(buffer_id, buffer);
*buf_addr = alloc;
@ -209,7 +224,7 @@ public:
std::lock_guard<std::mutex> guard(mutex_);
// simulate CPU-GPU latency
for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i)
for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i)
this->tick();
// simulate mmio request
@ -219,7 +234,7 @@ public:
device_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
this->tick();
device_->vcp2af_sRxPort_c0_mmioRdValid = 0;
assert(device_->af2cp_sTxPort_c2_mmioRdValid);
assert(device_->af2cp_sTxPort_c2_mmioRdValid);
*value = device_->af2cp_sTxPort_c2_data;
}
@ -227,11 +242,11 @@ public:
std::lock_guard<std::mutex> guard(mutex_);
// simulate CPU-GPU latency
for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i)
for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i)
this->tick();
// simulate mmio request
device_->vcp2af_sRxPort_c0_mmioWrValid = 1;
device_->vcp2af_sRxPort_c0_mmioWrValid = 1;
device_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
device_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
device_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
@ -242,19 +257,19 @@ public:
private:
void reset() {
void reset() {
cci_reads_.clear();
cci_writes_.clear();
device_->vcp2af_sRxPort_c0_mmioRdValid = 0;
device_->vcp2af_sRxPort_c0_mmioWrValid = 0;
device_->vcp2af_sRxPort_c0_rspValid = 0;
device_->vcp2af_sRxPort_c1_rspValid = 0;
device_->vcp2af_sRxPort_c0_rspValid = 0;
device_->vcp2af_sRxPort_c1_rspValid = 0;
device_->vcp2af_sRxPort_c0_TxAlmFull = 0;
device_->vcp2af_sRxPort_c1_TxAlmFull = 0;
for (int b = 0; b < MEMORY_BANKS; ++b) {
pending_mem_reqs_[b].clear();
device_->avs_readdatavalid[b] = 0;
device_->avs_readdatavalid[b] = 0;
device_->avs_waitrequest[b] = 0;
}
@ -265,9 +280,9 @@ private:
this->eval();
device_->clk = 1;
this->eval();
}
}
device_->reset = 0;
device_->reset = 0;
for (int i = 0; i < RESET_DELAY; ++i) {
device_->clk = 0;
@ -275,7 +290,7 @@ private:
device_->clk = 1;
this->eval();
}
// Turn on assertion after reset
Verilated::assertOn(true);
}
@ -286,22 +301,22 @@ private:
this->avs_bus();
if (!dram_queue_.empty()) {
if (dram_->send(dram_queue_.front()))
if (ramulator_->send(dram_queue_.front()))
dram_queue_.pop();
}
device_->clk = 0;
this->eval();
device_->clk = 1;
this->eval();
if (MEM_CYCLE_RATIO > 0) {
if (MEM_CYCLE_RATIO > 0) {
auto cycle = timestamp / 2;
if ((cycle % MEM_CYCLE_RATIO) == 0)
dram_->tick();
ramulator_->tick();
} else {
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
dram_->tick();
ramulator_->tick();
}
#ifndef NDEBUG
@ -319,7 +334,7 @@ private:
++timestamp;
}
void sRxPort_bus() {
void sRxPort_bus() {
// check mmio request
bool mmio_req_enabled = device_->vcp2af_sRxPort_c0_mmioRdValid
|| device_->vcp2af_sRxPort_c0_mmioWrValid;
@ -344,8 +359,8 @@ private:
}
}
// send CCI write response
device_->vcp2af_sRxPort_c1_rspValid = 0;
// send CCI write response
device_->vcp2af_sRxPort_c1_rspValid = 0;
if (cci_wr_it != cci_writes_.end()) {
device_->vcp2af_sRxPort_c1_rspValid = 1;
device_->vcp2af_sRxPort_c1_hdr_resp_type = 0;
@ -353,14 +368,14 @@ private:
cci_writes_.erase(cci_wr_it);
}
// send CCI read response (ensure mmio disabled)
device_->vcp2af_sRxPort_c0_rspValid = 0;
if (!mmio_req_enabled
// send CCI read response (ensure mmio disabled)
device_->vcp2af_sRxPort_c0_rspValid = 0;
if (!mmio_req_enabled
&& (cci_rd_it != cci_reads_.end())) {
device_->vcp2af_sRxPort_c0_rspValid = 1;
device_->vcp2af_sRxPort_c0_hdr_resp_type = 0;
memcpy(device_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE);
device_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
device_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]);
@ -368,19 +383,19 @@ private:
cci_reads_.erase(cci_rd_it);
}
}
void sTxPort_bus() {
// process read requests
if (device_->af2cp_sTxPort_c0_valid) {
assert(!device_->vcp2af_sRxPort_c0_TxAlmFull);
cci_rd_req_t cci_req;
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
cci_req.addr = device_->af2cp_sTxPort_c0_hdr_address;
cci_req.mdata = device_->af2cp_sTxPort_c0_hdr_mdata;
auto host_ptr = (uint64_t*)(device_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE);
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, device_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
cci_reads_.emplace_back(cci_req);
cci_reads_.emplace_back(cci_req);
}
// process write requests
@ -392,18 +407,18 @@ private:
auto host_ptr = (uint64_t*)(device_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE);
memcpy(host_ptr, device_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE);
cci_writes_.emplace_back(cci_req);
}
}
// check queues overflow
device_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1));
device_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1));
}
void avs_bus() {
for (int b = 0; b < MEMORY_BANKS; ++b) {
// process memory responses
device_->avs_readdatavalid[b] = 0;
if (!pending_mem_reqs_[b].empty()
device_->avs_readdatavalid[b] = 0;
if (!pending_mem_reqs_[b].empty()
&& (*pending_mem_reqs_[b].begin())->ready) {
auto mem_rd_it = pending_mem_reqs_[b].begin();
auto mem_req = *mem_rd_it;
@ -417,11 +432,11 @@ private:
// process memory requests
assert(!device_->avs_read[b] || !device_->avs_write[b]);
unsigned byte_addr = (device_->avs_address[b] * MEMORY_BANKS + b) * MEM_BLOCK_SIZE;
if (device_->avs_write[b]) {
uint64_t byteen = device_->avs_byteenable[b];
if (device_->avs_write[b]) {
uint64_t byteen = device_->avs_byteenable[b];
uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data());
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
if ((byteen >> i) & 0x1) {
(*ram_)[byte_addr + i] = data[i];
}
}
@ -433,7 +448,7 @@ private:
printf("\n");*/
// send dram request
ramulator::Request dram_req(
ramulator::Request dram_req(
byte_addr,
ramulator::Request::Type::WRITE,
0
@ -443,13 +458,13 @@ private:
if (device_->avs_read[b]) {
auto mem_req = new mem_rd_req_t();
mem_req->addr = device_->avs_address[b];
ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE);
ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE);
mem_req->ready = false;
pending_mem_reqs_[b].emplace_back(mem_req);
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * MEM_BLOCK_SIZE);
for (auto& req : pending_mem_reqs_[b]) {
if (req.cycles_left != 0)
if (req.cycles_left != 0)
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
else
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
@ -457,7 +472,7 @@ private:
printf("}\n");*/
// send dram request
ramulator::Request dram_req(
ramulator::Request dram_req(
byte_addr,
ramulator::Request::Type::READ,
std::bind([](ramulator::Request& dram_req, mem_rd_req_t* mem_req) {
@ -473,29 +488,33 @@ private:
}
typedef struct {
bool ready;
bool ready;
std::array<uint8_t, MEM_BLOCK_SIZE> data;
uint32_t addr;
} mem_rd_req_t;
typedef struct {
int cycles_left;
int cycles_left;
std::array<uint8_t, CACHE_BLOCK_SIZE> data;
uint64_t addr;
uint32_t mdata;
} cci_rd_req_t;
typedef struct {
int cycles_left;
int cycles_left;
uint32_t mdata;
} cci_wr_req_t;
typedef struct {
typedef struct {
uint64_t* data;
size_t size;
uint64_t ioaddr;
uint64_t ioaddr;
} host_buffer_t;
Vvortex_afu_shim *device_;
RAM* ram_;
ramulator::Gem5Wrapper* ramulator_;
std::future<void> future_;
bool stop_;
@ -505,18 +524,12 @@ private:
std::list<mem_rd_req_t*> pending_mem_reqs_[MEMORY_BANKS];
std::list<cci_rd_req_t> cci_reads_;
std::list<cci_wr_req_t> cci_writes_;
std::mutex mutex_;
RAM* ram_;
ramulator::Gem5Wrapper* dram_;
std::queue<ramulator::Request> dram_queue_;
Vvortex_afu_shim *device_;
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
#endif
@ -524,7 +537,7 @@ private:
///////////////////////////////////////////////////////////////////////////////
opae_sim::opae_sim()
opae_sim::opae_sim()
: impl_(new Impl())
{}
@ -532,6 +545,10 @@ opae_sim::~opae_sim() {
delete impl_;
}
int opae_sim::init() {
return impl_->init();
}
int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
return impl_->prepare_buffer(len, buf_addr, wsid, flags);
}

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,16 +14,17 @@
#pragma once
#include <stdint.h>
namespace vortex {
class RAM;
namespace vortex {
class opae_sim {
public:
opae_sim();
virtual ~opae_sim();
int init();
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
void release_buffer(uint64_t wsid);
@ -34,10 +35,10 @@ public:
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value);
private:
private:
class Impl;
Impl* impl_;
Impl* impl_;
};
}

124
sim/xrtsim/Makefile Normal file
View file

@ -0,0 +1,124 @@
include ../common.mk
DESTDIR ?= $(CURDIR)
SRC_DIR := $(VORTEX_HOME)/sim/xrtsim
AFU_DIR := $(RTL_DIR)/afu/xrt
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I$(SRC_DIR) -I$(ROOT_DIR)/hw -I$(COMMON_DIR) -I$(DESTDIR)
CXXFLAGS += -I/$(THIRD_PARTY_DIR)/softfloat/source/include
CXXFLAGS += -I/$(THIRD_PARTY_DIR)
CXXFLAGS += -DXLEN_$(XLEN)
LDFLAGS += -shared $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE
DBG_TRACE_FLAGS += -DDBG_TRACE_MEM
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR
# Control logic analyzer monitors
DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU
DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED
# AFU parameters
CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2
endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26
endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512
endif
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4
endif
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/xrt_sim.cpp
RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
FPU_INCLUDE = -I$(RTL_DIR)/fpu
ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
endif
RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
RTL_INCLUDE += -I$(AFU_DIR)
TOP = vortex_afu_shim
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += -DSIMULATION -DSV_DPI
VL_FLAGS += -DXLEN_$(XLEN)
VL_FLAGS += $(CONFIGS)
VL_FLAGS += $(SRC_DIR)/verilator.vlt
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(RTL_PKGS)
VL_FLAGS += $(DBG_SCOPE_FLAGS)
CXXFLAGS += $(CONFIGS)
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O3 -DNDEBUG
endif
# Enable scope analyzer
ifdef SCOPE
VL_FLAGS += -DSCOPE
CXXFLAGS += -DSCOPE
SCOPE_JSON = $(DESTDIR)/scope.json
endif
# Enable perf counters
ifdef PERF
VL_FLAGS += -DPERF_ENABLE
CXXFLAGS += -DPERF_ENABLE
endif
# use our XRT shim
VL_FLAGS += -DNOXRT
CXXFLAGS += -DNOXRT
PROJECT := libxrtsim.so
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/vortex.xml:
verilator --xml-only -O0 $(VL_FLAGS) $(TOP) --xml-output $@
$(DESTDIR)/scope.json: $(DESTDIR)/vortex.xml
$(SCRIPT_DIR)/scope.py $^ -o $@
$(DESTDIR)/$(PROJECT): $(SRCS) $(SCOPE_JSON)
verilator --build --exe -O3 $(VL_FLAGS) --cc $(TOP) --top-module $(TOP) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' --Mdir $(DESTDIR)/obj_dir -o $@
clean:
rm -rf $(DESTDIR)/obj_dir $(DESTDIR)/vortex.xml $(DESTDIR)/scope.json $(DESTDIR)/$(PROJECT)

100
sim/xrtsim/fpga.cpp Normal file
View file

@ -0,0 +1,100 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <cstdlib>
#include <cstring>
#include <unistd.h>
#include <assert.h>
#include "fpga.h"
#include "xrt_sim.h"
#include <VX_config.h>
#include <util.h>
using namespace vortex;
#ifdef __cplusplus
extern "C" {
#endif
extern xrtDeviceHandle xrtDeviceOpen(unsigned int index) {
if (index != 0)
return nullptr;
auto sim = new xrt_sim();
int ret = sim->init();
if (ret != 0) {
delete sim;
return nullptr;
}
return sim;
}
extern int xrtXclbinGetXSAName(xrtDeviceHandle /*dhdl*/, char* name, int size, int* ret_size) {
static const char* deviceName = "vortex_xrtsim";
if (name) {
memcpy(name, deviceName, size);
}
if (ret_size) {
*ret_size = strlen(deviceName) + 1;
}
return 0;
}
extern int xrtDeviceClose(xrtDeviceHandle dhdl) {
auto sim = reinterpret_cast<xrt_sim*>(dhdl);
delete sim;
return 0;
}
extern int xrtKernelClose(xrtKernelHandle /*kernelHandle*/) {
return 0;
}
extern xrtBufferHandle xrtBOAlloc(xrtDeviceHandle dhdl, size_t size, xrtBufferFlags flags, xrtMemoryGroup grp) {
return 0;
}
extern int xrtBOFree(xrtBufferHandle bhdl) {
return 0;
}
extern int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t seek) {
return 0;
}
extern int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t skip) {
return 0;
}
extern int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t size, size_t offset) {
return 0;
}
extern int xrtKernelWriteRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t data) {
return 0;
}
extern int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* datap) {
return 0;
}
extern int xrtErrorGetString(xrtDeviceHandle, xrtErrorCode error, char* out, size_t len, size_t* out_len) {
return 0;
}
#ifdef __cplusplus
}
#endif

113
sim/xrtsim/fpga.h Normal file
View file

@ -0,0 +1,113 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __FPGA_H__
#define __FPGA_H__
#include <stdint.h>
#include <uuid/uuid.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Copyright (C) 2019, Xilinx Inc - All rights reserved.
* Xilinx Runtime (XRT) APIs
*
* Licensed under the Apache License, Version 2.0 (the "License"). You may
* not use this file except in compliance with the License. A copy of the
* License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/**
* XCL BO Flags bits layout
* bits 0 ~ 15: DDR BANK index
* bits 24 ~ 31: BO flags
*/
#define XRT_BO_FLAGS_MEMIDX_MASK (0xFFFFFFUL)
#define XCL_BO_FLAGS_NONE (0)
#define XCL_BO_FLAGS_CACHEABLE (1U << 24)
#define XCL_BO_FLAGS_KERNBUF (1U << 25)
#define XCL_BO_FLAGS_SGL (1U << 26)
#define XCL_BO_FLAGS_SVM (1U << 27)
#define XCL_BO_FLAGS_DEV_ONLY (1U << 28)
#define XCL_BO_FLAGS_HOST_ONLY (1U << 29)
#define XCL_BO_FLAGS_P2P (1U << 30)
#define XCL_BO_FLAGS_EXECBUF (1U << 31)
#define XRT_BO_FLAGS_NONE XCL_BO_FLAGS_NONE
#define XRT_BO_FLAGS_CACHEABLE XCL_BO_FLAGS_CACHEABLE
#define XRT_BO_FLAGS_DEV_ONLY XCL_BO_FLAGS_DEV_ONLY
#define XRT_BO_FLAGS_HOST_ONLY XCL_BO_FLAGS_HOST_ONLY
#define XRT_BO_FLAGS_P2P XCL_BO_FLAGS_P2P
#define XRT_BO_FLAGS_SVM XCL_BO_FLAGS_SVM
enum xclBOSyncDirection {
XCL_BO_SYNC_BO_TO_DEVICE = 0,
XCL_BO_SYNC_BO_FROM_DEVICE,
};
typedef void *xrtDeviceHandle;
typedef void *xrtKernelHandle;
typedef void* xrtXclbinHandle;
typedef void *xrtBufferHandle;
typedef uint64_t xrtErrorCode;
typedef uint64_t xrtBufferFlags;
typedef uint32_t xrtMemoryGroup;
typedef uuid_t xuid_t;
xrtDeviceHandle xrtDeviceOpen(unsigned int index);
int xrtXclbinGetXSAName(xrtDeviceHandle dhdl, char* name, int size, int* ret_size);
int xrtDeviceClose(xrtDeviceHandle dhdl);
int xrtKernelClose(xrtKernelHandle kernelHandle);
xrtBufferHandle xrtBOAlloc(xrtDeviceHandle dhdl, size_t size, xrtBufferFlags flags, xrtMemoryGroup grp);
int xrtBOFree(xrtBufferHandle bhdl);
int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t seek);
int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t skip);
int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t size, size_t offset);
int xrtKernelWriteRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t data);
int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* datap);
int xrtErrorGetString(xrtDeviceHandle, xrtErrorCode error, char* out, size_t len, size_t* out_len);
#ifdef __cplusplus
}
#endif
#endif // __FPGA_H__

5
sim/xrtsim/verilator.vlt Normal file
View file

@ -0,0 +1,5 @@
`verilator_config
lint_off -rule BLKANDNBLK -file "*/fpnew/src/*"
lint_off -rule UNOPTFLAT -file "*/fpnew/src/*"
lint_off -file "*/fpnew/src/*"

View file

@ -0,0 +1,85 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_platform.vh"
`include "vortex_afu.vh"
module vortex_afu_shim #(
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH,
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH
) (
// System signals
input wire ap_clk,
input wire ap_rst_n,
// AXI4 master interface
`REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
// AXI4-Lite slave interface
input wire s_axi_ctrl_awvalid,
output wire s_axi_ctrl_awready,
input wire [C_S_AXI_CTRL_ADDR_WIDTH-1:0] s_axi_ctrl_awaddr,
input wire s_axi_ctrl_wvalid,
output wire s_axi_ctrl_wready,
input wire [C_S_AXI_CTRL_DATA_WIDTH-1:0] s_axi_ctrl_wdata,
input wire [C_S_AXI_CTRL_DATA_WIDTH/8-1:0] s_axi_ctrl_wstrb,
input wire s_axi_ctrl_arvalid,
output wire s_axi_ctrl_arready,
input wire [C_S_AXI_CTRL_ADDR_WIDTH-1:0] s_axi_ctrl_araddr,
output wire s_axi_ctrl_rvalid,
input wire s_axi_ctrl_rready,
output wire [C_S_AXI_CTRL_DATA_WIDTH-1:0] s_axi_ctrl_rdata,
output wire [1:0] s_axi_ctrl_rresp,
output wire s_axi_ctrl_bvalid,
input wire s_axi_ctrl_bready,
output wire [1:0] s_axi_ctrl_bresp,
`IGNORE_WARNINGS_BEGIN
output wire interrupt
`IGNORE_WARNINGS_END
);
vortex_afu #(
.C_S_AXI_CTRL_ADDR_WIDTH(C_S_AXI_CTRL_ADDR_WIDTH),
.C_S_AXI_CTRL_DATA_WIDTH(C_S_AXI_CTRL_DATA_WIDTH),
.C_M_AXI_MEM_ID_WIDTH(C_M_AXI_MEM_ID_WIDTH),
.C_M_AXI_MEM_ADDR_WIDTH(C_M_AXI_MEM_ADDR_WIDTH),
.C_M_AXI_MEM_DATA_WIDTH(C_M_AXI_MEM_DATA_WIDTH)
) afu (
.ap_clk(ap_clk),
.ap_rst_n(ap_rst_n),
// AXI4 master interface
`REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
.s_axi_ctrl_awvalid(s_axi_ctrl_awvalid),
.s_axi_ctrl_awready(s_axi_ctrl_awready),
.s_axi_ctrl_awaddr(s_axi_ctrl_awaddr),
.s_axi_ctrl_wvalid(s_axi_ctrl_wvalid),
.s_axi_ctrl_wready(s_axi_ctrl_wready),
.s_axi_ctrl_wdata(s_axi_ctrl_wdata),
.s_axi_ctrl_wstrb(s_axi_ctrl_wstrb),
.s_axi_ctrl_arvalid(s_axi_ctrl_arvalid),
.s_axi_ctrl_arready(s_axi_ctrl_arready),
.s_axi_ctrl_araddr(s_axi_ctrl_araddr),
.s_axi_ctrl_rvalid(s_axi_ctrl_rvalid),
.s_axi_ctrl_rready(s_axi_ctrl_rready),
.s_axi_ctrl_rdata(s_axi_ctrl_rdata),
.s_axi_ctrl_rresp(s_axi_ctrl_rresp),
.s_axi_ctrl_bvalid(s_axi_ctrl_bvalid),
.s_axi_ctrl_bready(s_axi_ctrl_bready),
.s_axi_ctrl_bresp(s_axi_ctrl_bresp),
.interrupt(interrupt)
);
endmodule

273
sim/xrtsim/xrt_sim.cpp Normal file
View file

@ -0,0 +1,273 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "xrt_sim.h"
#include <verilated.h>
#include "Vvortex_afu_shim.h"
#include "Vvortex_afu_shim__Syms.h"
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
#endif
#include <iostream>
#include <fstream>
#include <iomanip>
#include <mem.h>
#define RAMULATOR
#include <ramulator/src/Gem5Wrapper.h>
#include <ramulator/src/Request.h>
#include <ramulator/src/Statistics.h>
#include <VX_config.h>
#include <future>
#include <list>
#include <queue>
#include <unordered_map>
#include <util.h>
#ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#else
#define MEMORY_BANKS 2
#endif
#endif
#ifndef MEM_CYCLE_RATIO
#define MEM_CYCLE_RATIO -1
#endif
#undef MEM_BLOCK_SIZE
#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
#define CACHE_BLOCK_SIZE 64
#ifndef TRACE_START_TIME
#define TRACE_START_TIME 0ull
#endif
#ifndef TRACE_STOP_TIME
#define TRACE_STOP_TIME -1ull
#endif
#ifndef VERILATOR_RESET_VALUE
#define VERILATOR_RESET_VALUE 2
#endif
#define RAM_PAGE_SIZE 4096
#define CPU_GPU_LATENCY 200
using namespace vortex;
static uint64_t timestamp = 0;
double sc_time_stamp() {
return timestamp;
}
static bool trace_enabled = false;
static uint64_t trace_start_time = TRACE_START_TIME;
static uint64_t trace_stop_time = TRACE_STOP_TIME;
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable(bool enable) {
trace_enabled = enable;
}
///////////////////////////////////////////////////////////////////////////////
class xrt_sim::Impl {
public:
Impl()
: device_(nullptr)
, ram_(nullptr)
, ramulator_(nullptr)
, stop_(false)
#ifdef VCD_OUTPUT
, trace_(nullptr)
#endif
{}
~Impl() {
stop_ = true;
if (future_.valid()) {
future_.wait();
}
#ifdef VCD_OUTPUT
if (trace_) {
trace_->close();
delete trace_;
}
#endif
if (device_) {
delete device_;
}
if (ram_) {
delete ram_;
}
if (ramulator_) {
ramulator_->finish();
Stats::statlist.printall();
delete ramulator_;
}
}
int init() {
// force random values for unitialized signals
Verilated::randReset(VERILATOR_RESET_VALUE);
Verilated::randSeed(50);
// turn off assertion before reset
Verilated::assertOn(false);
// create RTL module instance
device_ = new Vvortex_afu_shim();
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC();
device_->trace(trace_, 99);
trace_->open("trace.vcd");
#endif
ram_ = new RAM(0, RAM_PAGE_SIZE);
// initialize dram simulator
ramulator::Config ram_config;
ram_config.add("standard", "DDR4");
ram_config.add("channels", std::to_string(MEMORY_BANKS));
ram_config.add("ranks", "1");
ram_config.add("speed", "DDR4_2400R");
ram_config.add("org", "DDR4_4Gb_x8");
ram_config.add("mapping", "defaultmapping");
ram_config.set_core_num(1);
ramulator_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
Stats::statlist.output("ramulator.ddr4.log");
// reset the device
this->reset();
// launch execution thread
future_ = std::async(std::launch::async, [&]{
while (!stop_) {
std::lock_guard<std::mutex> guard(mutex_);
this->tick();
}
});
return 0;
}
private:
void reset() {
//--
device_->ap_rst_n = 0;
for (int i = 0; i < RESET_DELAY; ++i) {
device_->ap_clk = 0;
this->eval();
device_->ap_clk = 1;
this->eval();
}
device_->ap_rst_n = 1;
for (int i = 0; i < RESET_DELAY; ++i) {
device_->ap_clk = 0;
this->eval();
device_->ap_clk = 1;
this->eval();
}
// Turn on assertion after reset
Verilated::assertOn(true);
}
void tick() {
//--
if (!dram_queue_.empty()) {
if (ramulator_->send(dram_queue_.front()))
dram_queue_.pop();
}
device_->ap_clk = 0;
this->eval();
device_->ap_clk = 1;
this->eval();
if (MEM_CYCLE_RATIO > 0) {
auto cycle = timestamp / 2;
if ((cycle % MEM_CYCLE_RATIO) == 0)
ramulator_->tick();
} else {
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
ramulator_->tick();
}
#ifndef NDEBUG
fflush(stdout);
#endif
}
void eval() {
device_->eval();
#ifdef VCD_OUTPUT
if (sim_trace_enabled()) {
trace_->dump(timestamp);
}
#endif
++timestamp;
}
Vvortex_afu_shim *device_;
RAM* ram_;
ramulator::Gem5Wrapper* ramulator_;
std::future<void> future_;
bool stop_;
std::mutex mutex_;
std::queue<ramulator::Request> dram_queue_;
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
#endif
};
///////////////////////////////////////////////////////////////////////////////
xrt_sim::xrt_sim()
: impl_(new Impl())
{}
xrt_sim::~xrt_sim() {
delete impl_;
}
int xrt_sim::init() {
return impl_->init();
}

34
sim/xrtsim/xrt_sim.h Normal file
View file

@ -0,0 +1,34 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
namespace vortex {
class xrt_sim {
public:
xrt_sim();
virtual ~xrt_sim();
int init();
private:
class Impl;
Impl* impl_;
};
}