mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-06-27 17:01:10 -04:00
Ramulator 2.0 with HBM 2.0 support
Verilator 5.0 support SimX C++17 requirement
This commit is contained in:
parent
ca474c39b9
commit
fb141ae522
18 changed files with 377 additions and 296 deletions
2
.gitmodules
vendored
2
.gitmodules
vendored
|
@ -6,4 +6,4 @@
|
|||
url = https://github.com/ucb-bar/berkeley-softfloat-3.git
|
||||
[submodule "third_party/ramulator"]
|
||||
path = third_party/ramulator
|
||||
url = https://github.com/CMU-SAFARI/ramulator.git
|
||||
url = https://github.com/CMU-SAFARI/ramulator2.git
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
diff --git a/Makefile b/Makefile
|
||||
index ea340c8..d2aac5b 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -7,16 +7,16 @@ OBJS := $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.o, $(SRCS))
|
||||
|
||||
# Ramulator currently supports g++ 5.1+ or clang++ 3.4+. It will NOT work with
|
||||
# g++ 4.x due to an internal compiler error when processing lambda functions.
|
||||
-CXX := clang++
|
||||
+#CXX := clang++
|
||||
# CXX := g++-5
|
||||
-CXXFLAGS := -O3 -std=c++11 -g -Wall
|
||||
+CXXFLAGS := -std=c++11 -O3 -g -Wall -fPIC
|
||||
|
||||
.PHONY: all clean depend
|
||||
|
||||
all: depend ramulator
|
||||
|
||||
clean:
|
||||
- rm -f ramulator
|
||||
+ rm -f ramulator libramulator.a
|
||||
rm -rf $(OBJDIR)
|
||||
|
||||
depend: $(OBJDIR)/.depend
|
||||
@@ -36,7 +36,7 @@ ramulator: $(MAIN) $(OBJS) $(SRCDIR)/*.h | depend
|
||||
$(CXX) $(CXXFLAGS) -DRAMULATOR -o $@ $(MAIN) $(OBJS)
|
||||
|
||||
libramulator.a: $(OBJS) $(OBJDIR)/Gem5Wrapper.o
|
||||
- libtool -static -o $@ $(OBJS) $(OBJDIR)/Gem5Wrapper.o
|
||||
+ $(AR) rcs $@ $^
|
||||
|
||||
$(OBJS): | $(OBJDIR)
|
||||
|
||||
diff --git a/src/Request.h b/src/Request.h
|
||||
index 57abd0d..a5ce061 100644
|
||||
--- a/src/Request.h
|
||||
+++ b/src/Request.h
|
||||
@@ -36,7 +36,7 @@ public:
|
||||
|
||||
Request(long addr, Type type, int coreid = 0)
|
||||
: is_first_command(true), addr(addr), coreid(coreid), type(type),
|
||||
- callback([](Request& req){}) {}
|
||||
+ callback([](Request&){}) {}
|
||||
|
||||
Request(long addr, Type type, function<void(Request&)> callback, int coreid = 0)
|
||||
: is_first_command(true), addr(addr), coreid(coreid), type(type), callback(callback) {}
|
28
ramulator_config.yaml
Normal file
28
ramulator_config.yaml
Normal file
|
@ -0,0 +1,28 @@
|
|||
Frontend:
|
||||
impl: GEM5
|
||||
|
||||
MemorySystem:
|
||||
impl: GenericDRAM
|
||||
clock_ratio: 1
|
||||
|
||||
DRAM:
|
||||
impl: HBM2
|
||||
org:
|
||||
preset: HBM2_8Gb
|
||||
density: 8192
|
||||
timing:
|
||||
preset: HBM2_2Gbps
|
||||
|
||||
Controller:
|
||||
impl: Generic
|
||||
Scheduler:
|
||||
impl: FRFCFS
|
||||
RefreshManager:
|
||||
impl: AllBank
|
||||
RowPolicy:
|
||||
impl: OpenRowPolicy
|
||||
cap: 1
|
||||
plugins:
|
||||
|
||||
AddrMapper:
|
||||
impl: RoBaRaCoCh
|
113
sim/common/dram_sim.cpp
Normal file
113
sim/common/dram_sim.cpp
Normal file
|
@ -0,0 +1,113 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dram_sim.h"
|
||||
#include "util.h"
|
||||
#include <fstream>
|
||||
|
||||
DISABLE_WARNING_PUSH
|
||||
DISABLE_WARNING_UNUSED_PARAMETER
|
||||
DISABLE_WARNING_MISSING_FIELD_INITIALIZERS
|
||||
#include <base/base.h>
|
||||
#include <base/request.h>
|
||||
#include <base/config.h>
|
||||
#include <frontend/frontend.h>
|
||||
#include <memory_system/memory_system.h>
|
||||
DISABLE_WARNING_POP
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
class DramSim::Impl {
|
||||
private:
|
||||
Ramulator::IFrontEnd* ramulator_frontend_;
|
||||
Ramulator::IMemorySystem* ramulator_memorysystem_;
|
||||
|
||||
public:
|
||||
Impl(int clock_ratio) {
|
||||
YAML::Node dram_config;
|
||||
dram_config["Frontend"]["impl"] = "GEM5";
|
||||
dram_config["MemorySystem"]["impl"] = "GenericDRAM";
|
||||
dram_config["MemorySystem"]["clock_ratio"] = clock_ratio;
|
||||
dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2";
|
||||
dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb";
|
||||
dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192;
|
||||
dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps";
|
||||
dram_config["MemorySystem"]["Controller"]["impl"] = "Generic";
|
||||
dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS";
|
||||
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
|
||||
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
|
||||
dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy";
|
||||
{
|
||||
YAML::Node draw_plugin;
|
||||
draw_plugin["ControllerPlugin"]["impl"] = "TraceRecorder";
|
||||
draw_plugin["ControllerPlugin"]["path"] = "./trace/ramulator.log";
|
||||
dram_config["MemorySystem"]["Controller"]["plugins"].push_back(draw_plugin);
|
||||
}
|
||||
dram_config["MemorySystem"]["AddrMapper"]["impl"] = "RoBaRaCoCh";
|
||||
|
||||
ramulator_frontend_ = Ramulator::Factory::create_frontend(dram_config);
|
||||
ramulator_memorysystem_ = Ramulator::Factory::create_memory_system(dram_config);
|
||||
ramulator_frontend_->connect_memory_system(ramulator_memorysystem_);
|
||||
ramulator_memorysystem_->connect_frontend(ramulator_frontend_);
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
std::ofstream nullstream("ramulator.stats.log");
|
||||
auto original_buf = std::cout.rdbuf();
|
||||
std::cout.rdbuf(nullstream.rdbuf());
|
||||
ramulator_frontend_->finalize();
|
||||
ramulator_memorysystem_->finalize();
|
||||
std::cout.rdbuf(original_buf);
|
||||
}
|
||||
|
||||
void reset() {
|
||||
//--
|
||||
}
|
||||
|
||||
void tick() {
|
||||
ramulator_memorysystem_->tick();
|
||||
}
|
||||
|
||||
bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback callback, void* arg) {
|
||||
return ramulator_frontend_->receive_external_requests(
|
||||
is_write ? Ramulator::Request::Type::Write : Ramulator::Request::Type::Read,
|
||||
addr,
|
||||
source_id,
|
||||
[callback_ = std::move(callback), arg_ = std::move(arg)](Ramulator::Request& /*dram_req*/) {
|
||||
callback_(arg_);
|
||||
}
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
DramSim::DramSim(int clock_ratio)
|
||||
: impl_(new Impl(clock_ratio))
|
||||
{}
|
||||
|
||||
DramSim::~DramSim() {
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
void DramSim::reset() {
|
||||
impl_->reset();
|
||||
}
|
||||
|
||||
void DramSim::tick() {
|
||||
impl_->tick();
|
||||
}
|
||||
|
||||
bool DramSim::send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback callback, void* arg) {
|
||||
return impl_->send_request(is_write, addr, source_id, callback, arg);
|
||||
}
|
36
sim/common/dram_sim.h
Normal file
36
sim/common/dram_sim.h
Normal file
|
@ -0,0 +1,36 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class DramSim {
|
||||
public:
|
||||
typedef void (*ResponseCallback)(void *arg);
|
||||
|
||||
DramSim(int clock_ratio);
|
||||
~DramSim();
|
||||
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
|
||||
bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback callback, void* arg);
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
};
|
||||
|
||||
}
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -34,6 +34,7 @@ const char* fileExtension(const char* filepath);
|
|||
#define DISABLE_WARNING_UNREFERENCED_FUNCTION __pragma(warning(disable : 4505))
|
||||
#define DISABLE_WARNING_ANONYMOUS_STRUCT __pragma(warning(disable : 4201))
|
||||
#define DISABLE_WARNING_UNUSED_VARIABLE __pragma(warning(disable : 4189))
|
||||
#define DISABLE_WARNING_MISSING_FIELD_INITIALIZERS __pragma(warning(disable : 4351))
|
||||
#elif defined(__GNUC__)
|
||||
#define DISABLE_WARNING_PUSH _Pragma("GCC diagnostic push")
|
||||
#define DISABLE_WARNING_POP _Pragma("GCC diagnostic pop")
|
||||
|
@ -45,6 +46,8 @@ const char* fileExtension(const char* filepath);
|
|||
_Pragma("GCC diagnostic ignored \"-Wpedantic\"")
|
||||
#define DISABLE_WARNING_UNUSED_VARIABLE \
|
||||
_Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"")
|
||||
#define DISABLE_WARNING_MISSING_FIELD_INITIALIZERS \
|
||||
_Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"")
|
||||
#elif defined(__clang__)
|
||||
#define DISABLE_WARNING_PUSH _Pragma("clang diagnostic push")
|
||||
#define DISABLE_WARNING_POP _Pragma("clang diagnostic pop")
|
||||
|
@ -56,6 +59,8 @@ const char* fileExtension(const char* filepath);
|
|||
_Pragma("clang diagnostic ignored \"-Wgnu-anonymous-struct\"")
|
||||
#define DISABLE_WARNING_UNUSED_VARIABLE \
|
||||
_Pragma("clang diagnostic ignored \"-Wunused-but-set-variable\"")
|
||||
#define DISABLE_WARNING_MISSING_FIELD_INITIALIZERS \
|
||||
_Pragma("clang diagnostic ignored \"-Wmissing-field-initializers\"")
|
||||
#else
|
||||
#define DISABLE_WARNING_PUSH
|
||||
#define DISABLE_WARNING_POP
|
||||
|
|
|
@ -5,15 +5,17 @@ DESTDIR ?= $(CURDIR)
|
|||
SRC_DIR := $(VORTEX_HOME)/sim/opaesim
|
||||
AFU_DIR := $(RTL_DIR)/afu/opae
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I$(SRC_DIR) -I$(ROOT_DIR)/hw -I$(COMMON_DIR) -I$(DESTDIR)
|
||||
CXXFLAGS += -I/$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I/$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/spdlog/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/yaml-cpp/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
LDFLAGS += -shared $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread
|
||||
LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread
|
||||
|
||||
# control RTL debug tracing states
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE
|
||||
|
@ -47,7 +49,7 @@ endif
|
|||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/opae_sim.cpp
|
||||
|
||||
|
@ -65,7 +67,7 @@ RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip
|
|||
TOP = vortex_afu_shim
|
||||
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += -DSIMULATION -DSV_DPI
|
||||
VL_FLAGS += -DXLEN_$(XLEN)
|
||||
|
|
|
@ -26,10 +26,7 @@
|
|||
#include <iomanip>
|
||||
#include <mem.h>
|
||||
|
||||
#define RAMULATOR
|
||||
#include <ramulator/src/Gem5Wrapper.h>
|
||||
#include <ramulator/src/Request.h>
|
||||
#include <ramulator/src/Statistics.h>
|
||||
#include <dram_sim.h>
|
||||
|
||||
#include <VX_config.h>
|
||||
#include <vortex_afu.h>
|
||||
|
@ -48,8 +45,8 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef MEM_CYCLE_RATIO
|
||||
#define MEM_CYCLE_RATIO -1
|
||||
#ifndef MEM_CLOCK_RATIO
|
||||
#define MEM_CLOCK_RATIO 1
|
||||
#endif
|
||||
|
||||
#undef MEM_BLOCK_SIZE
|
||||
|
@ -108,7 +105,7 @@ public:
|
|||
Impl()
|
||||
: device_(nullptr)
|
||||
, ram_(nullptr)
|
||||
, ramulator_(nullptr)
|
||||
, dram_sim_(MEM_CLOCK_RATIO)
|
||||
, stop_(false)
|
||||
, host_buffer_ids_(0)
|
||||
#ifdef VCD_OUTPUT
|
||||
|
@ -136,11 +133,6 @@ public:
|
|||
if (ram_) {
|
||||
delete ram_;
|
||||
}
|
||||
if (ramulator_) {
|
||||
ramulator_->finish();
|
||||
Stats::statlist.printall();
|
||||
delete ramulator_;
|
||||
}
|
||||
}
|
||||
|
||||
int init() {
|
||||
|
@ -163,18 +155,7 @@ public:
|
|||
|
||||
ram_ = new RAM(0, RAM_PAGE_SIZE);
|
||||
|
||||
// initialize dram simulator
|
||||
ramulator::Config ram_config;
|
||||
ram_config.add("standard", "DDR4");
|
||||
ram_config.add("channels", std::to_string(MEMORY_BANKS));
|
||||
ram_config.add("ranks", "1");
|
||||
ram_config.add("speed", "DDR4_2400R");
|
||||
ram_config.add("org", "DDR4_4Gb_x8");
|
||||
ram_config.add("mapping", "defaultmapping");
|
||||
ram_config.set_core_num(1);
|
||||
ramulator_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
||||
Stats::statlist.output("ramulator.ddr4.log");
|
||||
|
||||
#ifndef NDEBUG
|
||||
// dump device configuration
|
||||
std::cout << "CONFIGS:"
|
||||
<< " num_threads=" << NUM_THREADS
|
||||
|
@ -185,7 +166,7 @@ public:
|
|||
<< ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec
|
||||
<< ", num_barriers=" << NUM_BARRIERS
|
||||
<< std::endl;
|
||||
|
||||
#endif
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
|
@ -272,6 +253,15 @@ private:
|
|||
this->cci_bus_reset();
|
||||
this->avs_bus_reset();
|
||||
|
||||
for (auto& reqs : pending_mem_reqs_) {
|
||||
reqs.clear();
|
||||
}
|
||||
|
||||
{
|
||||
std::queue<mem_req_t*> empty;
|
||||
std::swap(dram_queue_, empty);
|
||||
}
|
||||
|
||||
device_->reset = 1;
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
|
@ -299,8 +289,17 @@ private:
|
|||
this->avs_bus_eval();
|
||||
|
||||
if (!dram_queue_.empty()) {
|
||||
if (ramulator_->send(dram_queue_.front()))
|
||||
auto mem_req = dram_queue_.front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
|
||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||
if (orig_req->ready) {
|
||||
delete orig_req;
|
||||
} else {
|
||||
orig_req->ready = true;
|
||||
}
|
||||
}, mem_req)) {
|
||||
dram_queue_.pop();
|
||||
}
|
||||
}
|
||||
|
||||
device_->clk = 0;
|
||||
|
@ -308,14 +307,7 @@ private:
|
|||
device_->clk = 1;
|
||||
this->eval();
|
||||
|
||||
if (MEM_CYCLE_RATIO > 0) {
|
||||
auto cycle = timestamp / 2;
|
||||
if ((cycle % MEM_CYCLE_RATIO) == 0)
|
||||
ramulator_->tick();
|
||||
} else {
|
||||
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
|
||||
ramulator_->tick();
|
||||
}
|
||||
dram_sim_.tick();
|
||||
|
||||
#ifndef NDEBUG
|
||||
fflush(stdout);
|
||||
|
@ -470,17 +462,18 @@ private:
|
|||
printf("\n");*/
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
byte_addr,
|
||||
ramulator::Request::Type::WRITE,
|
||||
0
|
||||
);
|
||||
dram_queue_.push(dram_req);
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->addr = device_->avs_address[b];
|
||||
mem_req->write = true;
|
||||
mem_req->ready = true;
|
||||
|
||||
dram_queue_.push(mem_req);
|
||||
} else
|
||||
if (device_->avs_read[b]) {
|
||||
auto mem_req = new mem_rd_req_t();
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->addr = device_->avs_address[b];
|
||||
ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE);
|
||||
mem_req->write = false;
|
||||
mem_req->ready = false;
|
||||
pending_mem_reqs_[b].emplace_back(mem_req);
|
||||
|
||||
|
@ -494,15 +487,7 @@ private:
|
|||
printf("}\n");*/
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
byte_addr,
|
||||
ramulator::Request::Type::READ,
|
||||
std::bind([](ramulator::Request& dram_req, mem_rd_req_t* mem_req) {
|
||||
mem_req->ready = true;
|
||||
}, placeholders::_1, mem_req),
|
||||
0
|
||||
);
|
||||
dram_queue_.push(dram_req);
|
||||
dram_queue_.push(mem_req);
|
||||
}
|
||||
|
||||
device_->avs_waitrequest[b] = false;
|
||||
|
@ -510,10 +495,11 @@ private:
|
|||
}
|
||||
|
||||
typedef struct {
|
||||
bool ready;
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> data;
|
||||
uint32_t addr;
|
||||
} mem_rd_req_t;
|
||||
bool write;
|
||||
bool ready;
|
||||
} mem_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
|
@ -535,7 +521,7 @@ private:
|
|||
|
||||
Vvortex_afu_shim *device_;
|
||||
RAM* ram_;
|
||||
ramulator::Gem5Wrapper* ramulator_;
|
||||
DramSim dram_sim_;
|
||||
|
||||
std::future<void> future_;
|
||||
bool stop_;
|
||||
|
@ -543,14 +529,14 @@ private:
|
|||
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
|
||||
int64_t host_buffer_ids_;
|
||||
|
||||
std::list<mem_rd_req_t*> pending_mem_reqs_[MEMORY_BANKS];
|
||||
std::list<mem_req_t*> pending_mem_reqs_[MEMORY_BANKS];
|
||||
|
||||
std::list<cci_rd_req_t> cci_reads_;
|
||||
std::list<cci_wr_req_t> cci_writes_;
|
||||
|
||||
std::mutex mutex_;
|
||||
|
||||
std::queue<ramulator::Request> dram_queue_;
|
||||
std::queue<mem_req_t*> dram_queue_;
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *trace_;
|
||||
|
|
|
@ -4,15 +4,17 @@ DESTDIR ?= $(CURDIR)
|
|||
|
||||
SRC_DIR = $(VORTEX_HOME)/sim/rtlsim
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I$(ROOT_DIR)/hw -I$(COMMON_DIR)
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/spdlog/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/yaml-cpp/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
|
||||
# control RTL debug tracing states
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE
|
||||
|
@ -33,7 +35,7 @@ ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
|
|||
endif
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += $(SRC_DIR)/processor.cpp
|
||||
|
||||
|
@ -46,7 +48,7 @@ endif
|
|||
|
||||
VL_FLAGS = --exe
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += $(SRC_DIR)/verilator.vlt
|
||||
VL_FLAGS += -DSIMULATION -DSV_DPI
|
||||
|
|
|
@ -40,10 +40,7 @@
|
|||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
|
||||
#define RAMULATOR
|
||||
#include <ramulator/src/Gem5Wrapper.h>
|
||||
#include <ramulator/src/Request.h>
|
||||
#include <ramulator/src/Statistics.h>
|
||||
#include <dram_sim.h>
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
|
@ -53,8 +50,8 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef MEM_CYCLE_RATIO
|
||||
#define MEM_CYCLE_RATIO -1
|
||||
#ifndef MEM_CLOCK_RATIO
|
||||
#define MEM_CLOCK_RATIO 1
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_START_TIME
|
||||
|
@ -109,7 +106,7 @@ void sim_trace_enable(bool enable) {
|
|||
|
||||
class Processor::Impl {
|
||||
public:
|
||||
Impl() {
|
||||
Impl() : dram_sim_(MEM_CLOCK_RATIO) {
|
||||
// force random values for unitialized signals
|
||||
Verilated::randReset(VERILATOR_RESET_VALUE);
|
||||
Verilated::randSeed(50);
|
||||
|
@ -133,18 +130,7 @@ public:
|
|||
|
||||
ram_ = nullptr;
|
||||
|
||||
// initialize dram simulator
|
||||
ramulator::Config ram_config;
|
||||
ram_config.add("standard", "DDR4");
|
||||
ram_config.add("channels", std::to_string(MEMORY_BANKS));
|
||||
ram_config.add("ranks", "1");
|
||||
ram_config.add("speed", "DDR4_2400R");
|
||||
ram_config.add("org", "DDR4_4Gb_x8");
|
||||
ram_config.add("mapping", "defaultmapping");
|
||||
ram_config.set_core_num(1);
|
||||
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
||||
Stats::statlist.output("ramulator.ddr4.log");
|
||||
|
||||
#ifndef NDEBUG
|
||||
// dump device configuration
|
||||
std::cout << "CONFIGS:"
|
||||
<< " num_threads=" << NUM_THREADS
|
||||
|
@ -155,7 +141,7 @@ public:
|
|||
<< ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec
|
||||
<< ", num_barriers=" << NUM_BARRIERS
|
||||
<< std::endl;
|
||||
|
||||
#endif
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
|
@ -172,12 +158,6 @@ public:
|
|||
#endif
|
||||
|
||||
delete device_;
|
||||
|
||||
if (dram_) {
|
||||
dram_->finish();
|
||||
Stats::statlist.printall();
|
||||
delete dram_;
|
||||
}
|
||||
}
|
||||
|
||||
void cout_flush() {
|
||||
|
@ -237,6 +217,11 @@ private:
|
|||
|
||||
pending_mem_reqs_.clear();
|
||||
|
||||
{
|
||||
std::queue<mem_req_t*> empty;
|
||||
std::swap(dram_queue_, empty);
|
||||
}
|
||||
|
||||
mem_rd_rsp_active_ = false;
|
||||
mem_wr_rsp_active_ = false;
|
||||
|
||||
|
@ -280,18 +265,20 @@ private:
|
|||
#endif
|
||||
this->dcr_bus_eval(1);
|
||||
|
||||
if (MEM_CYCLE_RATIO > 0) {
|
||||
auto cycle = timestamp / 2;
|
||||
if ((cycle % MEM_CYCLE_RATIO) == 0)
|
||||
dram_->tick();
|
||||
} else {
|
||||
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
|
||||
dram_->tick();
|
||||
}
|
||||
dram_sim_.tick();
|
||||
|
||||
if (!dram_queue_.empty()) {
|
||||
if (dram_->send(dram_queue_.front()))
|
||||
auto mem_req = dram_queue_.front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
|
||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||
if (orig_req->ready) {
|
||||
delete orig_req;
|
||||
} else {
|
||||
orig_req->ready = true;
|
||||
}
|
||||
}, mem_req)) {
|
||||
dram_queue_.pop();
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
@ -337,7 +324,7 @@ private:
|
|||
|
||||
// process memory responses
|
||||
if (mem_rd_rsp_active_
|
||||
&& device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) {
|
||||
&& device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) {
|
||||
mem_rd_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_rd_rsp_active_) {
|
||||
|
@ -347,7 +334,7 @@ private:
|
|||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp->addr);
|
||||
printf("%0ld: [sim] MEM Rsp: addr=%0lx, data=", timestamp, mem_rsp->addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
|
@ -368,7 +355,7 @@ private:
|
|||
|
||||
// send memory write response
|
||||
if (mem_wr_rsp_active_
|
||||
&& device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) {
|
||||
&& device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) {
|
||||
mem_wr_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_wr_rsp_active_) {
|
||||
|
@ -378,7 +365,7 @@ private:
|
|||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_rsp->addr);
|
||||
printf("%0ld: [sim] MEM Wr Rsp: addr=%0lx\n", timestamp, mem_rsp->addr);
|
||||
*/
|
||||
device_->m_axi_bvalid[0] = 1;
|
||||
device_->m_axi_bid[0] = mem_rsp->tag;
|
||||
|
@ -433,16 +420,11 @@ private:
|
|||
mem_req->tag = device_->m_axi_awid[0];
|
||||
mem_req->addr = device_->m_axi_awaddr[0];
|
||||
mem_req->write = true;
|
||||
mem_req->ready = true;
|
||||
mem_req->ready = false;
|
||||
pending_mem_reqs_.emplace_back(mem_req);
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
device_->m_axi_awaddr[0],
|
||||
ramulator::Request::Type::WRITE,
|
||||
0
|
||||
);
|
||||
dram_queue_.push(dram_req);
|
||||
dram_queue_.push(mem_req);
|
||||
}
|
||||
} else {
|
||||
// process reads
|
||||
|
@ -455,15 +437,7 @@ private:
|
|||
pending_mem_reqs_.emplace_back(mem_req);
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
device_->m_axi_araddr[0],
|
||||
ramulator::Request::Type::READ,
|
||||
std::bind([&](ramulator::Request& dram_req, mem_req_t* mem_req) {
|
||||
mem_req->ready = true;
|
||||
}, placeholders::_1, mem_req),
|
||||
0
|
||||
);
|
||||
dram_queue_.push(dram_req);
|
||||
dram_queue_.push(mem_req);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -502,7 +476,7 @@ private:
|
|||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd: bank=%d, tag=%0lx, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp->tag, mem_rsp->addr);
|
||||
printf("%0ld: [sim] MEM Rd: tag=%0lx, addr=%0lx, data=", timestamp, mem_rsp->tag, mem_rsp->addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
|
@ -554,13 +528,14 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->mem_req_tag;
|
||||
mem_req->addr = byte_addr;
|
||||
mem_req->write = true;
|
||||
mem_req->ready = true;
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
byte_addr,
|
||||
ramulator::Request::Type::WRITE,
|
||||
0
|
||||
);
|
||||
dram_queue_.push(dram_req);
|
||||
dram_queue_.push(mem_req);
|
||||
}
|
||||
} else {
|
||||
// process reads
|
||||
|
@ -575,15 +550,7 @@ private:
|
|||
//printf("%0ld: [sim] MEM Rd Req: addr=%0x, tag=%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
byte_addr,
|
||||
ramulator::Request::Type::READ,
|
||||
std::bind([&](ramulator::Request& dram_req, mem_req_t* mem_req) {
|
||||
mem_req->ready = true;
|
||||
}, placeholders::_1, mem_req),
|
||||
0
|
||||
);
|
||||
dram_queue_.push(dram_req);
|
||||
dram_queue_.push(mem_req);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -614,11 +581,12 @@ private:
|
|||
private:
|
||||
|
||||
typedef struct {
|
||||
bool ready;
|
||||
VVortex *device;
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> block;
|
||||
uint64_t addr;
|
||||
uint64_t tag;
|
||||
bool write;
|
||||
bool ready;
|
||||
} mem_req_t;
|
||||
|
||||
#ifdef AXI_BUS
|
||||
|
@ -642,9 +610,9 @@ private:
|
|||
|
||||
RAM *ram_;
|
||||
|
||||
ramulator::Gem5Wrapper* dram_;
|
||||
DramSim dram_sim_;
|
||||
|
||||
std::queue<ramulator::Request> dram_queue_;
|
||||
std::queue<mem_req_t*> dram_queue_;
|
||||
|
||||
bool running_;
|
||||
};
|
||||
|
|
|
@ -4,18 +4,20 @@ DESTDIR ?= $(CURDIR)
|
|||
|
||||
SRC_DIR = $(VORTEX_HOME)/sim/simx
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I$(SRC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/spdlog/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/yaml-cpp/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
|
||||
|
||||
# Debugigng
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
#define RAM_PAGE_SIZE 4096
|
||||
#endif
|
||||
|
||||
#ifndef MEM_CYCLE_RATIO
|
||||
#define MEM_CYCLE_RATIO -1
|
||||
#ifndef MEM_CLOCK_RATIO
|
||||
#define MEM_CLOCK_RATIO 1
|
||||
#endif
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -15,14 +15,7 @@
|
|||
#include <vector>
|
||||
#include <queue>
|
||||
#include <stdlib.h>
|
||||
|
||||
DISABLE_WARNING_PUSH
|
||||
DISABLE_WARNING_UNUSED_PARAMETER
|
||||
#define RAMULATOR
|
||||
#include <ramulator/src/Gem5Wrapper.h>
|
||||
#include <ramulator/src/Request.h>
|
||||
#include <ramulator/src/Statistics.h>
|
||||
DISABLE_WARNING_POP
|
||||
#include <dram_sim.h>
|
||||
|
||||
#include "constants.h"
|
||||
#include "types.h"
|
||||
|
@ -34,80 +27,68 @@ class MemSim::Impl {
|
|||
private:
|
||||
MemSim* simobject_;
|
||||
Config config_;
|
||||
DramSim dram_sim_;
|
||||
PerfStats perf_stats_;
|
||||
ramulator::Gem5Wrapper* dram_;
|
||||
|
||||
struct DramCallbackArgs {
|
||||
MemSim* simobject;
|
||||
MemReq request;
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
Impl(MemSim* simobject, const Config& config)
|
||||
Impl(MemSim* simobject, const Config& config)
|
||||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
{
|
||||
ramulator::Config ram_config;
|
||||
ram_config.add("standard", "DDR4");
|
||||
ram_config.add("channels", std::to_string(config.channels));
|
||||
ram_config.add("ranks", "1");
|
||||
ram_config.add("speed", "DDR4_2400R");
|
||||
ram_config.add("org", "DDR4_4Gb_x8");
|
||||
ram_config.add("mapping", "defaultmapping");
|
||||
ram_config.set_core_num(config.num_cores);
|
||||
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
||||
Stats::statlist.output("ramulator.ddr4.log");
|
||||
}
|
||||
, dram_sim_(MEM_CLOCK_RATIO)
|
||||
{}
|
||||
|
||||
~Impl() {
|
||||
dram_->finish();
|
||||
Stats::statlist.printall();
|
||||
delete dram_;
|
||||
//--
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
void dram_callback(ramulator::Request& req, uint32_t tag, uint64_t uuid) {
|
||||
if (req.type == ramulator::Request::Type::WRITE)
|
||||
return;
|
||||
MemRsp mem_rsp{tag, (uint32_t)req.coreid, uuid};
|
||||
simobject_->MemRspPort.push(mem_rsp, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_rsp);
|
||||
}
|
||||
|
||||
void reset() {
|
||||
perf_stats_ = PerfStats();
|
||||
dram_sim_.reset();
|
||||
}
|
||||
|
||||
void tick() {
|
||||
if (MEM_CYCLE_RATIO > 0) {
|
||||
auto cycle = SimPlatform::instance().cycles();
|
||||
if ((cycle % MEM_CYCLE_RATIO) == 0)
|
||||
dram_->tick();
|
||||
} else {
|
||||
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
|
||||
dram_->tick();
|
||||
}
|
||||
|
||||
dram_sim_.tick();
|
||||
|
||||
if (simobject_->MemReqPort.empty())
|
||||
return;
|
||||
|
||||
|
||||
auto& mem_req = simobject_->MemReqPort.front();
|
||||
|
||||
ramulator::Request dram_req(
|
||||
// try to enqueue the request to the memory system
|
||||
auto enqueue_success = dram_sim_.send_request(
|
||||
mem_req.write,
|
||||
mem_req.addr,
|
||||
mem_req.write ? ramulator::Request::Type::WRITE : ramulator::Request::Type::READ,
|
||||
std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag, mem_req.uuid),
|
||||
mem_req.cid
|
||||
mem_req.cid,
|
||||
[](void* arg) {
|
||||
auto dram_args = reinterpret_cast<const DramCallbackArgs*>(arg);
|
||||
if (dram_args->request.write)
|
||||
return; // write's responses are not handled
|
||||
MemRsp mem_rsp{dram_args->request.tag, dram_args->request.cid, dram_args->request.uuid};
|
||||
dram_args->simobject->MemRspPort.push(mem_rsp, 1);
|
||||
DT(3, dram_args->simobject->name() << "-" << mem_rsp);
|
||||
delete dram_args;
|
||||
},
|
||||
new DramCallbackArgs{simobject_, mem_req}
|
||||
);
|
||||
|
||||
if (!dram_->send(dram_req))
|
||||
// check if the request was enqueued successfully
|
||||
if (!enqueue_success)
|
||||
return;
|
||||
|
||||
|
||||
if (mem_req.write) {
|
||||
++perf_stats_.writes;
|
||||
} else {
|
||||
++perf_stats_.reads;
|
||||
}
|
||||
|
||||
|
||||
DT(3, simobject_->name() << "-" << mem_req);
|
||||
|
||||
simobject_->MemReqPort.pop();
|
||||
|
@ -116,9 +97,9 @@ public:
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
|
||||
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<MemSim>(ctx, name)
|
||||
, MemReqPort(this)
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
|
|
@ -70,6 +70,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
--perf_mem_pending_reads_;
|
||||
});
|
||||
|
||||
#ifndef NDEBUG
|
||||
// dump device configuration
|
||||
std::cout << "CONFIGS:"
|
||||
<< " num_threads=" << arch.num_threads()
|
||||
|
@ -80,7 +81,8 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
<< ", local_mem_base=0x" << std::hex << arch.local_mem_base() << std::dec
|
||||
<< ", num_barriers=" << arch.num_barriers()
|
||||
<< std::endl;
|
||||
|
||||
#endif
|
||||
// reset the device
|
||||
this->reset();
|
||||
}
|
||||
|
||||
|
|
|
@ -5,15 +5,17 @@ DESTDIR ?= $(CURDIR)
|
|||
SRC_DIR := $(VORTEX_HOME)/sim/xrtsim
|
||||
AFU_DIR := $(RTL_DIR)/afu/xrt
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I$(SRC_DIR) -I$(ROOT_DIR)/hw -I$(COMMON_DIR) -I$(DESTDIR)
|
||||
CXXFLAGS += -I/$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I/$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/spdlog/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/yaml-cpp/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
LDFLAGS += -shared $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread
|
||||
LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread
|
||||
|
||||
# control RTL debug tracing states
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE
|
||||
|
@ -47,7 +49,7 @@ endif
|
|||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/xrt_sim.cpp
|
||||
|
||||
|
@ -64,7 +66,7 @@ RTL_INCLUDE += -I$(AFU_DIR)
|
|||
TOP = vortex_afu_shim
|
||||
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += -DSIMULATION -DSV_DPI
|
||||
VL_FLAGS += -DXLEN_$(XLEN)
|
||||
|
|
|
@ -26,10 +26,7 @@
|
|||
#include <iomanip>
|
||||
#include <mem.h>
|
||||
|
||||
#define RAMULATOR
|
||||
#include <ramulator/src/Gem5Wrapper.h>
|
||||
#include <ramulator/src/Request.h>
|
||||
#include <ramulator/src/Statistics.h>
|
||||
#include <dram_sim.h>
|
||||
|
||||
#include <VX_config.h>
|
||||
#include <future>
|
||||
|
@ -46,8 +43,8 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef MEM_CYCLE_RATIO
|
||||
#define MEM_CYCLE_RATIO -1
|
||||
#ifndef MEM_CLOCK_RATIO
|
||||
#define MEM_CLOCK_RATIO 1
|
||||
#endif
|
||||
|
||||
#undef MEM_BLOCK_SIZE
|
||||
|
@ -101,7 +98,7 @@ public:
|
|||
Impl()
|
||||
: device_(nullptr)
|
||||
, ram_(nullptr)
|
||||
, ramulator_(nullptr)
|
||||
, dram_sim_(MEM_CLOCK_RATIO)
|
||||
, stop_(false)
|
||||
#ifdef VCD_OUTPUT
|
||||
, trace_(nullptr)
|
||||
|
@ -125,11 +122,6 @@ public:
|
|||
if (ram_) {
|
||||
delete ram_;
|
||||
}
|
||||
if (ramulator_) {
|
||||
ramulator_->finish();
|
||||
Stats::statlist.printall();
|
||||
delete ramulator_;
|
||||
}
|
||||
}
|
||||
|
||||
int init() {
|
||||
|
@ -152,18 +144,7 @@ public:
|
|||
|
||||
ram_ = new RAM(0, RAM_PAGE_SIZE);
|
||||
|
||||
// initialize dram simulator
|
||||
ramulator::Config ram_config;
|
||||
ram_config.add("standard", "DDR4");
|
||||
ram_config.add("channels", std::to_string(MEMORY_BANKS));
|
||||
ram_config.add("ranks", "1");
|
||||
ram_config.add("speed", "DDR4_2400R");
|
||||
ram_config.add("org", "DDR4_4Gb_x8");
|
||||
ram_config.add("mapping", "defaultmapping");
|
||||
ram_config.set_core_num(1);
|
||||
ramulator_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
||||
Stats::statlist.output("ramulator.ddr4.log");
|
||||
|
||||
#ifndef NDEBUG
|
||||
// dump device configuration
|
||||
std::cout << "CONFIGS:"
|
||||
<< " num_threads=" << NUM_THREADS
|
||||
|
@ -174,7 +155,7 @@ public:
|
|||
<< ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec
|
||||
<< ", num_barriers=" << NUM_BARRIERS
|
||||
<< std::endl;
|
||||
|
||||
#endif
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
|
@ -195,6 +176,15 @@ private:
|
|||
this->axi_ctrl_bus_reset();
|
||||
this->axi_mem_bus_reset();
|
||||
|
||||
for (auto& reqs : pending_mem_reqs_) {
|
||||
reqs.clear();
|
||||
}
|
||||
|
||||
{
|
||||
std::queue<mem_req_t*> empty;
|
||||
std::swap(dram_queue_, empty);
|
||||
}
|
||||
|
||||
device_->ap_rst_n = 0;
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
|
@ -222,8 +212,17 @@ private:
|
|||
this->axi_mem_bus_eval();
|
||||
|
||||
if (!dram_queue_.empty()) {
|
||||
if (ramulator_->send(dram_queue_.front()))
|
||||
auto mem_req = dram_queue_.front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
|
||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||
if (orig_req->ready) {
|
||||
delete orig_req;
|
||||
} else {
|
||||
orig_req->ready = true;
|
||||
}
|
||||
}, mem_req)) {
|
||||
dram_queue_.pop();
|
||||
}
|
||||
}
|
||||
|
||||
device_->ap_clk = 0;
|
||||
|
@ -231,14 +230,7 @@ private:
|
|||
device_->ap_clk = 1;
|
||||
this->eval();
|
||||
|
||||
if (MEM_CYCLE_RATIO > 0) {
|
||||
auto cycle = timestamp / 2;
|
||||
if ((cycle % MEM_CYCLE_RATIO) == 0)
|
||||
ramulator_->tick();
|
||||
} else {
|
||||
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
|
||||
ramulator_->tick();
|
||||
}
|
||||
dram_sim_.tick();
|
||||
|
||||
#ifndef NDEBUG
|
||||
fflush(stdout);
|
||||
|
@ -307,16 +299,25 @@ private:
|
|||
//--
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> data;
|
||||
uint32_t addr;
|
||||
bool write;
|
||||
bool ready;
|
||||
} mem_req_t;
|
||||
|
||||
Vvortex_afu_shim *device_;
|
||||
RAM* ram_;
|
||||
ramulator::Gem5Wrapper* ramulator_;
|
||||
DramSim dram_sim_;
|
||||
|
||||
std::future<void> future_;
|
||||
bool stop_;
|
||||
|
||||
std::mutex mutex_;
|
||||
|
||||
std::queue<ramulator::Request> dram_queue_;
|
||||
std::list<mem_req_t*> pending_mem_reqs_[MEMORY_BANKS];
|
||||
|
||||
std::queue<mem_req_t*> dram_queue_;
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *trace_;
|
||||
|
|
5
third_party/Makefile
vendored
5
third_party/Makefile
vendored
|
@ -6,11 +6,10 @@ softfloat:
|
|||
SPECIALIZE_TYPE=RISCV SOFTFLOAT_OPTS="-fPIC -DSOFTFLOAT_ROUND_ODD -DINLINE_LEVEL=5 -DSOFTFLOAT_FAST_DIV32TO16 -DSOFTFLOAT_FAST_DIV64TO32" $(MAKE) -C softfloat/build/Linux-x86_64-GCC
|
||||
|
||||
ramulator:
|
||||
cd ramulator && git apply ../../miscs/patches/ramulator.patch 2> /dev/null; true
|
||||
$(MAKE) -C ramulator libramulator.a
|
||||
cd ramulator && mkdir -p build && cd build && cmake .. && make -j
|
||||
|
||||
clean:
|
||||
$(MAKE) -C softfloat/build/Linux-x86_64-GCC clean
|
||||
$(MAKE) -C ramulator clean
|
||||
rm -rf ramulator/build
|
||||
|
||||
.PHONY: all fpnew softfloat ramulator
|
2
third_party/ramulator
vendored
2
third_party/ramulator
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 214f635845214adf030367939655d172ef0fed5f
|
||||
Subproject commit e62c84a6f0e06566ba6e182d308434b4532068a5
|
Loading…
Add table
Add a link
Reference in a new issue