Ramulator 2.0 with HBM 2.0 support

Verilator 5.0 support
SimX C++17 requirement
This commit is contained in:
Blaise Tine 2024-07-21 06:57:13 -07:00
parent ca474c39b9
commit fb141ae522
18 changed files with 377 additions and 296 deletions

2
.gitmodules vendored
View file

@ -6,4 +6,4 @@
url = https://github.com/ucb-bar/berkeley-softfloat-3.git
[submodule "third_party/ramulator"]
path = third_party/ramulator
url = https://github.com/CMU-SAFARI/ramulator.git
url = https://github.com/CMU-SAFARI/ramulator2.git

View file

@ -1,46 +0,0 @@
diff --git a/Makefile b/Makefile
index ea340c8..d2aac5b 100644
--- a/Makefile
+++ b/Makefile
@@ -7,16 +7,16 @@ OBJS := $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.o, $(SRCS))
# Ramulator currently supports g++ 5.1+ or clang++ 3.4+. It will NOT work with
# g++ 4.x due to an internal compiler error when processing lambda functions.
-CXX := clang++
+#CXX := clang++
# CXX := g++-5
-CXXFLAGS := -O3 -std=c++11 -g -Wall
+CXXFLAGS := -std=c++11 -O3 -g -Wall -fPIC
.PHONY: all clean depend
all: depend ramulator
clean:
- rm -f ramulator
+ rm -f ramulator libramulator.a
rm -rf $(OBJDIR)
depend: $(OBJDIR)/.depend
@@ -36,7 +36,7 @@ ramulator: $(MAIN) $(OBJS) $(SRCDIR)/*.h | depend
$(CXX) $(CXXFLAGS) -DRAMULATOR -o $@ $(MAIN) $(OBJS)
libramulator.a: $(OBJS) $(OBJDIR)/Gem5Wrapper.o
- libtool -static -o $@ $(OBJS) $(OBJDIR)/Gem5Wrapper.o
+ $(AR) rcs $@ $^
$(OBJS): | $(OBJDIR)
diff --git a/src/Request.h b/src/Request.h
index 57abd0d..a5ce061 100644
--- a/src/Request.h
+++ b/src/Request.h
@@ -36,7 +36,7 @@ public:
Request(long addr, Type type, int coreid = 0)
: is_first_command(true), addr(addr), coreid(coreid), type(type),
- callback([](Request& req){}) {}
+ callback([](Request&){}) {}
Request(long addr, Type type, function<void(Request&)> callback, int coreid = 0)
: is_first_command(true), addr(addr), coreid(coreid), type(type), callback(callback) {}

28
ramulator_config.yaml Normal file
View file

@ -0,0 +1,28 @@
Frontend:
impl: GEM5
MemorySystem:
impl: GenericDRAM
clock_ratio: 1
DRAM:
impl: HBM2
org:
preset: HBM2_8Gb
density: 8192
timing:
preset: HBM2_2Gbps
Controller:
impl: Generic
Scheduler:
impl: FRFCFS
RefreshManager:
impl: AllBank
RowPolicy:
impl: OpenRowPolicy
cap: 1
plugins:
AddrMapper:
impl: RoBaRaCoCh

113
sim/common/dram_sim.cpp Normal file
View file

@ -0,0 +1,113 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dram_sim.h"
#include "util.h"
#include <fstream>
DISABLE_WARNING_PUSH
DISABLE_WARNING_UNUSED_PARAMETER
DISABLE_WARNING_MISSING_FIELD_INITIALIZERS
#include <base/base.h>
#include <base/request.h>
#include <base/config.h>
#include <frontend/frontend.h>
#include <memory_system/memory_system.h>
DISABLE_WARNING_POP
using namespace vortex;
class DramSim::Impl {
private:
Ramulator::IFrontEnd* ramulator_frontend_;
Ramulator::IMemorySystem* ramulator_memorysystem_;
public:
Impl(int clock_ratio) {
YAML::Node dram_config;
dram_config["Frontend"]["impl"] = "GEM5";
dram_config["MemorySystem"]["impl"] = "GenericDRAM";
dram_config["MemorySystem"]["clock_ratio"] = clock_ratio;
dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2";
dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb";
dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192;
dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps";
dram_config["MemorySystem"]["Controller"]["impl"] = "Generic";
dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS";
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy";
{
YAML::Node draw_plugin;
draw_plugin["ControllerPlugin"]["impl"] = "TraceRecorder";
draw_plugin["ControllerPlugin"]["path"] = "./trace/ramulator.log";
dram_config["MemorySystem"]["Controller"]["plugins"].push_back(draw_plugin);
}
dram_config["MemorySystem"]["AddrMapper"]["impl"] = "RoBaRaCoCh";
ramulator_frontend_ = Ramulator::Factory::create_frontend(dram_config);
ramulator_memorysystem_ = Ramulator::Factory::create_memory_system(dram_config);
ramulator_frontend_->connect_memory_system(ramulator_memorysystem_);
ramulator_memorysystem_->connect_frontend(ramulator_frontend_);
}
~Impl() {
std::ofstream nullstream("ramulator.stats.log");
auto original_buf = std::cout.rdbuf();
std::cout.rdbuf(nullstream.rdbuf());
ramulator_frontend_->finalize();
ramulator_memorysystem_->finalize();
std::cout.rdbuf(original_buf);
}
void reset() {
//--
}
void tick() {
ramulator_memorysystem_->tick();
}
bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback callback, void* arg) {
return ramulator_frontend_->receive_external_requests(
is_write ? Ramulator::Request::Type::Write : Ramulator::Request::Type::Read,
addr,
source_id,
[callback_ = std::move(callback), arg_ = std::move(arg)](Ramulator::Request& /*dram_req*/) {
callback_(arg_);
}
);
}
};
///////////////////////////////////////////////////////////////////////////////
DramSim::DramSim(int clock_ratio)
: impl_(new Impl(clock_ratio))
{}
DramSim::~DramSim() {
delete impl_;
}
void DramSim::reset() {
impl_->reset();
}
void DramSim::tick() {
impl_->tick();
}
bool DramSim::send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback callback, void* arg) {
return impl_->send_request(is_write, addr, source_id, callback, arg);
}

36
sim/common/dram_sim.h Normal file
View file

@ -0,0 +1,36 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
namespace vortex {
class DramSim {
public:
typedef void (*ResponseCallback)(void *arg);
DramSim(int clock_ratio);
~DramSim();
void reset();
void tick();
bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback callback, void* arg);
private:
class Impl;
Impl* impl_;
};
}

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -34,6 +34,7 @@ const char* fileExtension(const char* filepath);
#define DISABLE_WARNING_UNREFERENCED_FUNCTION __pragma(warning(disable : 4505))
#define DISABLE_WARNING_ANONYMOUS_STRUCT __pragma(warning(disable : 4201))
#define DISABLE_WARNING_UNUSED_VARIABLE __pragma(warning(disable : 4189))
#define DISABLE_WARNING_MISSING_FIELD_INITIALIZERS __pragma(warning(disable : 4351))
#elif defined(__GNUC__)
#define DISABLE_WARNING_PUSH _Pragma("GCC diagnostic push")
#define DISABLE_WARNING_POP _Pragma("GCC diagnostic pop")
@ -45,6 +46,8 @@ const char* fileExtension(const char* filepath);
_Pragma("GCC diagnostic ignored \"-Wpedantic\"")
#define DISABLE_WARNING_UNUSED_VARIABLE \
_Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"")
#define DISABLE_WARNING_MISSING_FIELD_INITIALIZERS \
_Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"")
#elif defined(__clang__)
#define DISABLE_WARNING_PUSH _Pragma("clang diagnostic push")
#define DISABLE_WARNING_POP _Pragma("clang diagnostic pop")
@ -56,6 +59,8 @@ const char* fileExtension(const char* filepath);
_Pragma("clang diagnostic ignored \"-Wgnu-anonymous-struct\"")
#define DISABLE_WARNING_UNUSED_VARIABLE \
_Pragma("clang diagnostic ignored \"-Wunused-but-set-variable\"")
#define DISABLE_WARNING_MISSING_FIELD_INITIALIZERS \
_Pragma("clang diagnostic ignored \"-Wmissing-field-initializers\"")
#else
#define DISABLE_WARNING_PUSH
#define DISABLE_WARNING_POP

View file

@ -5,15 +5,17 @@ DESTDIR ?= $(CURDIR)
SRC_DIR := $(VORTEX_HOME)/sim/opaesim
AFU_DIR := $(RTL_DIR)/afu/opae
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I$(SRC_DIR) -I$(ROOT_DIR)/hw -I$(COMMON_DIR) -I$(DESTDIR)
CXXFLAGS += -I/$(THIRD_PARTY_DIR)/softfloat/source/include
CXXFLAGS += -I/$(THIRD_PARTY_DIR)
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/spdlog/include
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/yaml-cpp/include
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src
CXXFLAGS += -DXLEN_$(XLEN)
LDFLAGS += -shared $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread
LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE
@ -47,7 +49,7 @@ endif
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/opae_sim.cpp
@ -65,7 +67,7 @@ RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip
TOP = vortex_afu_shim
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += -DSIMULATION -DSV_DPI
VL_FLAGS += -DXLEN_$(XLEN)

View file

@ -26,10 +26,7 @@
#include <iomanip>
#include <mem.h>
#define RAMULATOR
#include <ramulator/src/Gem5Wrapper.h>
#include <ramulator/src/Request.h>
#include <ramulator/src/Statistics.h>
#include <dram_sim.h>
#include <VX_config.h>
#include <vortex_afu.h>
@ -48,8 +45,8 @@
#endif
#endif
#ifndef MEM_CYCLE_RATIO
#define MEM_CYCLE_RATIO -1
#ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1
#endif
#undef MEM_BLOCK_SIZE
@ -108,7 +105,7 @@ public:
Impl()
: device_(nullptr)
, ram_(nullptr)
, ramulator_(nullptr)
, dram_sim_(MEM_CLOCK_RATIO)
, stop_(false)
, host_buffer_ids_(0)
#ifdef VCD_OUTPUT
@ -136,11 +133,6 @@ public:
if (ram_) {
delete ram_;
}
if (ramulator_) {
ramulator_->finish();
Stats::statlist.printall();
delete ramulator_;
}
}
int init() {
@ -163,18 +155,7 @@ public:
ram_ = new RAM(0, RAM_PAGE_SIZE);
// initialize dram simulator
ramulator::Config ram_config;
ram_config.add("standard", "DDR4");
ram_config.add("channels", std::to_string(MEMORY_BANKS));
ram_config.add("ranks", "1");
ram_config.add("speed", "DDR4_2400R");
ram_config.add("org", "DDR4_4Gb_x8");
ram_config.add("mapping", "defaultmapping");
ram_config.set_core_num(1);
ramulator_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
Stats::statlist.output("ramulator.ddr4.log");
#ifndef NDEBUG
// dump device configuration
std::cout << "CONFIGS:"
<< " num_threads=" << NUM_THREADS
@ -185,7 +166,7 @@ public:
<< ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec
<< ", num_barriers=" << NUM_BARRIERS
<< std::endl;
#endif
// reset the device
this->reset();
@ -272,6 +253,15 @@ private:
this->cci_bus_reset();
this->avs_bus_reset();
for (auto& reqs : pending_mem_reqs_) {
reqs.clear();
}
{
std::queue<mem_req_t*> empty;
std::swap(dram_queue_, empty);
}
device_->reset = 1;
for (int i = 0; i < RESET_DELAY; ++i) {
@ -299,8 +289,17 @@ private:
this->avs_bus_eval();
if (!dram_queue_.empty()) {
if (ramulator_->send(dram_queue_.front()))
auto mem_req = dram_queue_.front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
if (orig_req->ready) {
delete orig_req;
} else {
orig_req->ready = true;
}
}, mem_req)) {
dram_queue_.pop();
}
}
device_->clk = 0;
@ -308,14 +307,7 @@ private:
device_->clk = 1;
this->eval();
if (MEM_CYCLE_RATIO > 0) {
auto cycle = timestamp / 2;
if ((cycle % MEM_CYCLE_RATIO) == 0)
ramulator_->tick();
} else {
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
ramulator_->tick();
}
dram_sim_.tick();
#ifndef NDEBUG
fflush(stdout);
@ -470,17 +462,18 @@ private:
printf("\n");*/
// send dram request
ramulator::Request dram_req(
byte_addr,
ramulator::Request::Type::WRITE,
0
);
dram_queue_.push(dram_req);
auto mem_req = new mem_req_t();
mem_req->addr = device_->avs_address[b];
mem_req->write = true;
mem_req->ready = true;
dram_queue_.push(mem_req);
} else
if (device_->avs_read[b]) {
auto mem_req = new mem_rd_req_t();
auto mem_req = new mem_req_t();
mem_req->addr = device_->avs_address[b];
ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE);
mem_req->write = false;
mem_req->ready = false;
pending_mem_reqs_[b].emplace_back(mem_req);
@ -494,15 +487,7 @@ private:
printf("}\n");*/
// send dram request
ramulator::Request dram_req(
byte_addr,
ramulator::Request::Type::READ,
std::bind([](ramulator::Request& dram_req, mem_rd_req_t* mem_req) {
mem_req->ready = true;
}, placeholders::_1, mem_req),
0
);
dram_queue_.push(dram_req);
dram_queue_.push(mem_req);
}
device_->avs_waitrequest[b] = false;
@ -510,10 +495,11 @@ private:
}
typedef struct {
bool ready;
std::array<uint8_t, MEM_BLOCK_SIZE> data;
uint32_t addr;
} mem_rd_req_t;
bool write;
bool ready;
} mem_req_t;
typedef struct {
int cycles_left;
@ -535,7 +521,7 @@ private:
Vvortex_afu_shim *device_;
RAM* ram_;
ramulator::Gem5Wrapper* ramulator_;
DramSim dram_sim_;
std::future<void> future_;
bool stop_;
@ -543,14 +529,14 @@ private:
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
int64_t host_buffer_ids_;
std::list<mem_rd_req_t*> pending_mem_reqs_[MEMORY_BANKS];
std::list<mem_req_t*> pending_mem_reqs_[MEMORY_BANKS];
std::list<cci_rd_req_t> cci_reads_;
std::list<cci_wr_req_t> cci_writes_;
std::mutex mutex_;
std::queue<ramulator::Request> dram_queue_;
std::queue<mem_req_t*> dram_queue_;
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;

View file

@ -4,15 +4,17 @@ DESTDIR ?= $(CURDIR)
SRC_DIR = $(VORTEX_HOME)/sim/rtlsim
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I$(ROOT_DIR)/hw -I$(COMMON_DIR)
CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include
CXXFLAGS += -I$(THIRD_PARTY_DIR)
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/spdlog/include
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/yaml-cpp/include
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src
CXXFLAGS += -DXLEN_$(XLEN)
LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator
LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE
@ -33,7 +35,7 @@ ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
endif
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
SRCS += $(SRC_DIR)/processor.cpp
@ -46,7 +48,7 @@ endif
VL_FLAGS = --exe
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += $(SRC_DIR)/verilator.vlt
VL_FLAGS += -DSIMULATION -DSV_DPI

View file

@ -40,10 +40,7 @@
#include <sstream>
#include <unordered_map>
#define RAMULATOR
#include <ramulator/src/Gem5Wrapper.h>
#include <ramulator/src/Request.h>
#include <ramulator/src/Statistics.h>
#include <dram_sim.h>
#ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
@ -53,8 +50,8 @@
#endif
#endif
#ifndef MEM_CYCLE_RATIO
#define MEM_CYCLE_RATIO -1
#ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1
#endif
#ifndef TRACE_START_TIME
@ -109,7 +106,7 @@ void sim_trace_enable(bool enable) {
class Processor::Impl {
public:
Impl() {
Impl() : dram_sim_(MEM_CLOCK_RATIO) {
// force random values for unitialized signals
Verilated::randReset(VERILATOR_RESET_VALUE);
Verilated::randSeed(50);
@ -133,18 +130,7 @@ public:
ram_ = nullptr;
// initialize dram simulator
ramulator::Config ram_config;
ram_config.add("standard", "DDR4");
ram_config.add("channels", std::to_string(MEMORY_BANKS));
ram_config.add("ranks", "1");
ram_config.add("speed", "DDR4_2400R");
ram_config.add("org", "DDR4_4Gb_x8");
ram_config.add("mapping", "defaultmapping");
ram_config.set_core_num(1);
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
Stats::statlist.output("ramulator.ddr4.log");
#ifndef NDEBUG
// dump device configuration
std::cout << "CONFIGS:"
<< " num_threads=" << NUM_THREADS
@ -155,7 +141,7 @@ public:
<< ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec
<< ", num_barriers=" << NUM_BARRIERS
<< std::endl;
#endif
// reset the device
this->reset();
@ -172,12 +158,6 @@ public:
#endif
delete device_;
if (dram_) {
dram_->finish();
Stats::statlist.printall();
delete dram_;
}
}
void cout_flush() {
@ -237,6 +217,11 @@ private:
pending_mem_reqs_.clear();
{
std::queue<mem_req_t*> empty;
std::swap(dram_queue_, empty);
}
mem_rd_rsp_active_ = false;
mem_wr_rsp_active_ = false;
@ -280,18 +265,20 @@ private:
#endif
this->dcr_bus_eval(1);
if (MEM_CYCLE_RATIO > 0) {
auto cycle = timestamp / 2;
if ((cycle % MEM_CYCLE_RATIO) == 0)
dram_->tick();
} else {
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
dram_->tick();
}
dram_sim_.tick();
if (!dram_queue_.empty()) {
if (dram_->send(dram_queue_.front()))
auto mem_req = dram_queue_.front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
if (orig_req->ready) {
delete orig_req;
} else {
orig_req->ready = true;
}
}, mem_req)) {
dram_queue_.pop();
}
}
#ifndef NDEBUG
@ -337,7 +324,7 @@ private:
// process memory responses
if (mem_rd_rsp_active_
&& device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) {
&& device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) {
mem_rd_rsp_active_ = false;
}
if (!mem_rd_rsp_active_) {
@ -347,7 +334,7 @@ private:
auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_rsp = *mem_rsp_it;
/*
printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp->addr);
printf("%0ld: [sim] MEM Rsp: addr=%0lx, data=", timestamp, mem_rsp->addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]);
}
@ -368,7 +355,7 @@ private:
// send memory write response
if (mem_wr_rsp_active_
&& device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) {
&& device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) {
mem_wr_rsp_active_ = false;
}
if (!mem_wr_rsp_active_) {
@ -378,7 +365,7 @@ private:
auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_rsp = *mem_rsp_it;
/*
printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_rsp->addr);
printf("%0ld: [sim] MEM Wr Rsp: addr=%0lx\n", timestamp, mem_rsp->addr);
*/
device_->m_axi_bvalid[0] = 1;
device_->m_axi_bid[0] = mem_rsp->tag;
@ -433,16 +420,11 @@ private:
mem_req->tag = device_->m_axi_awid[0];
mem_req->addr = device_->m_axi_awaddr[0];
mem_req->write = true;
mem_req->ready = true;
mem_req->ready = false;
pending_mem_reqs_.emplace_back(mem_req);
// send dram request
ramulator::Request dram_req(
device_->m_axi_awaddr[0],
ramulator::Request::Type::WRITE,
0
);
dram_queue_.push(dram_req);
dram_queue_.push(mem_req);
}
} else {
// process reads
@ -455,15 +437,7 @@ private:
pending_mem_reqs_.emplace_back(mem_req);
// send dram request
ramulator::Request dram_req(
device_->m_axi_araddr[0],
ramulator::Request::Type::READ,
std::bind([&](ramulator::Request& dram_req, mem_req_t* mem_req) {
mem_req->ready = true;
}, placeholders::_1, mem_req),
0
);
dram_queue_.push(dram_req);
dram_queue_.push(mem_req);
}
}
@ -502,7 +476,7 @@ private:
auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_rsp = *mem_rsp_it;
/*
printf("%0ld: [sim] MEM Rd: bank=%d, tag=%0lx, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp->tag, mem_rsp->addr);
printf("%0ld: [sim] MEM Rd: tag=%0lx, addr=%0lx, data=", timestamp, mem_rsp->tag, mem_rsp->addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]);
}
@ -554,13 +528,14 @@ private:
}
}
auto mem_req = new mem_req_t();
mem_req->tag = device_->mem_req_tag;
mem_req->addr = byte_addr;
mem_req->write = true;
mem_req->ready = true;
// send dram request
ramulator::Request dram_req(
byte_addr,
ramulator::Request::Type::WRITE,
0
);
dram_queue_.push(dram_req);
dram_queue_.push(mem_req);
}
} else {
// process reads
@ -575,15 +550,7 @@ private:
//printf("%0ld: [sim] MEM Rd Req: addr=%0x, tag=%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
// send dram request
ramulator::Request dram_req(
byte_addr,
ramulator::Request::Type::READ,
std::bind([&](ramulator::Request& dram_req, mem_req_t* mem_req) {
mem_req->ready = true;
}, placeholders::_1, mem_req),
0
);
dram_queue_.push(dram_req);
dram_queue_.push(mem_req);
}
}
@ -614,11 +581,12 @@ private:
private:
typedef struct {
bool ready;
VVortex *device;
std::array<uint8_t, MEM_BLOCK_SIZE> block;
uint64_t addr;
uint64_t tag;
bool write;
bool ready;
} mem_req_t;
#ifdef AXI_BUS
@ -642,9 +610,9 @@ private:
RAM *ram_;
ramulator::Gem5Wrapper* dram_;
DramSim dram_sim_;
std::queue<ramulator::Request> dram_queue_;
std::queue<mem_req_t*> dram_queue_;
bool running_;
};

View file

@ -4,18 +4,20 @@ DESTDIR ?= $(CURDIR)
SRC_DIR = $(VORTEX_HOME)/sim/simx
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I$(SRC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw
CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include
CXXFLAGS += -I$(THIRD_PARTY_DIR)
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/spdlog/include
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/yaml-cpp/include
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src
CXXFLAGS += -DXLEN_$(XLEN)
CXXFLAGS += $(CONFIGS)
LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator
LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
# Debugigng

View file

@ -17,8 +17,8 @@
#define RAM_PAGE_SIZE 4096
#endif
#ifndef MEM_CYCLE_RATIO
#define MEM_CYCLE_RATIO -1
#ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1
#endif
#ifndef MEMORY_BANKS

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,14 +15,7 @@
#include <vector>
#include <queue>
#include <stdlib.h>
DISABLE_WARNING_PUSH
DISABLE_WARNING_UNUSED_PARAMETER
#define RAMULATOR
#include <ramulator/src/Gem5Wrapper.h>
#include <ramulator/src/Request.h>
#include <ramulator/src/Statistics.h>
DISABLE_WARNING_POP
#include <dram_sim.h>
#include "constants.h"
#include "types.h"
@ -34,80 +27,68 @@ class MemSim::Impl {
private:
MemSim* simobject_;
Config config_;
DramSim dram_sim_;
PerfStats perf_stats_;
ramulator::Gem5Wrapper* dram_;
struct DramCallbackArgs {
MemSim* simobject;
MemReq request;
};
public:
Impl(MemSim* simobject, const Config& config)
Impl(MemSim* simobject, const Config& config)
: simobject_(simobject)
, config_(config)
{
ramulator::Config ram_config;
ram_config.add("standard", "DDR4");
ram_config.add("channels", std::to_string(config.channels));
ram_config.add("ranks", "1");
ram_config.add("speed", "DDR4_2400R");
ram_config.add("org", "DDR4_4Gb_x8");
ram_config.add("mapping", "defaultmapping");
ram_config.set_core_num(config.num_cores);
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
Stats::statlist.output("ramulator.ddr4.log");
}
, dram_sim_(MEM_CLOCK_RATIO)
{}
~Impl() {
dram_->finish();
Stats::statlist.printall();
delete dram_;
//--
}
const PerfStats& perf_stats() const {
return perf_stats_;
}
void dram_callback(ramulator::Request& req, uint32_t tag, uint64_t uuid) {
if (req.type == ramulator::Request::Type::WRITE)
return;
MemRsp mem_rsp{tag, (uint32_t)req.coreid, uuid};
simobject_->MemRspPort.push(mem_rsp, 1);
DT(3, simobject_->name() << "-" << mem_rsp);
}
void reset() {
perf_stats_ = PerfStats();
dram_sim_.reset();
}
void tick() {
if (MEM_CYCLE_RATIO > 0) {
auto cycle = SimPlatform::instance().cycles();
if ((cycle % MEM_CYCLE_RATIO) == 0)
dram_->tick();
} else {
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
dram_->tick();
}
dram_sim_.tick();
if (simobject_->MemReqPort.empty())
return;
auto& mem_req = simobject_->MemReqPort.front();
ramulator::Request dram_req(
// try to enqueue the request to the memory system
auto enqueue_success = dram_sim_.send_request(
mem_req.write,
mem_req.addr,
mem_req.write ? ramulator::Request::Type::WRITE : ramulator::Request::Type::READ,
std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag, mem_req.uuid),
mem_req.cid
mem_req.cid,
[](void* arg) {
auto dram_args = reinterpret_cast<const DramCallbackArgs*>(arg);
if (dram_args->request.write)
return; // write's responses are not handled
MemRsp mem_rsp{dram_args->request.tag, dram_args->request.cid, dram_args->request.uuid};
dram_args->simobject->MemRspPort.push(mem_rsp, 1);
DT(3, dram_args->simobject->name() << "-" << mem_rsp);
delete dram_args;
},
new DramCallbackArgs{simobject_, mem_req}
);
if (!dram_->send(dram_req))
// check if the request was enqueued successfully
if (!enqueue_success)
return;
if (mem_req.write) {
++perf_stats_.writes;
} else {
++perf_stats_.reads;
}
DT(3, simobject_->name() << "-" << mem_req);
simobject_->MemReqPort.pop();
@ -116,9 +97,9 @@ public:
///////////////////////////////////////////////////////////////////////////////
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
: SimObject<MemSim>(ctx, name)
, MemReqPort(this)
, MemReqPort(this)
, MemRspPort(this)
, impl_(new Impl(this, config))
{}

View file

@ -70,6 +70,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
--perf_mem_pending_reads_;
});
#ifndef NDEBUG
// dump device configuration
std::cout << "CONFIGS:"
<< " num_threads=" << arch.num_threads()
@ -80,7 +81,8 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
<< ", local_mem_base=0x" << std::hex << arch.local_mem_base() << std::dec
<< ", num_barriers=" << arch.num_barriers()
<< std::endl;
#endif
// reset the device
this->reset();
}

View file

@ -5,15 +5,17 @@ DESTDIR ?= $(CURDIR)
SRC_DIR := $(VORTEX_HOME)/sim/xrtsim
AFU_DIR := $(RTL_DIR)/afu/xrt
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I$(SRC_DIR) -I$(ROOT_DIR)/hw -I$(COMMON_DIR) -I$(DESTDIR)
CXXFLAGS += -I/$(THIRD_PARTY_DIR)/softfloat/source/include
CXXFLAGS += -I/$(THIRD_PARTY_DIR)
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/spdlog/include
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/yaml-cpp/include
CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src
CXXFLAGS += -DXLEN_$(XLEN)
LDFLAGS += -shared $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread
LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE
@ -47,7 +49,7 @@ endif
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/xrt_sim.cpp
@ -64,7 +66,7 @@ RTL_INCLUDE += -I$(AFU_DIR)
TOP = vortex_afu_shim
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += -DSIMULATION -DSV_DPI
VL_FLAGS += -DXLEN_$(XLEN)

View file

@ -26,10 +26,7 @@
#include <iomanip>
#include <mem.h>
#define RAMULATOR
#include <ramulator/src/Gem5Wrapper.h>
#include <ramulator/src/Request.h>
#include <ramulator/src/Statistics.h>
#include <dram_sim.h>
#include <VX_config.h>
#include <future>
@ -46,8 +43,8 @@
#endif
#endif
#ifndef MEM_CYCLE_RATIO
#define MEM_CYCLE_RATIO -1
#ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1
#endif
#undef MEM_BLOCK_SIZE
@ -101,7 +98,7 @@ public:
Impl()
: device_(nullptr)
, ram_(nullptr)
, ramulator_(nullptr)
, dram_sim_(MEM_CLOCK_RATIO)
, stop_(false)
#ifdef VCD_OUTPUT
, trace_(nullptr)
@ -125,11 +122,6 @@ public:
if (ram_) {
delete ram_;
}
if (ramulator_) {
ramulator_->finish();
Stats::statlist.printall();
delete ramulator_;
}
}
int init() {
@ -152,18 +144,7 @@ public:
ram_ = new RAM(0, RAM_PAGE_SIZE);
// initialize dram simulator
ramulator::Config ram_config;
ram_config.add("standard", "DDR4");
ram_config.add("channels", std::to_string(MEMORY_BANKS));
ram_config.add("ranks", "1");
ram_config.add("speed", "DDR4_2400R");
ram_config.add("org", "DDR4_4Gb_x8");
ram_config.add("mapping", "defaultmapping");
ram_config.set_core_num(1);
ramulator_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
Stats::statlist.output("ramulator.ddr4.log");
#ifndef NDEBUG
// dump device configuration
std::cout << "CONFIGS:"
<< " num_threads=" << NUM_THREADS
@ -174,7 +155,7 @@ public:
<< ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec
<< ", num_barriers=" << NUM_BARRIERS
<< std::endl;
#endif
// reset the device
this->reset();
@ -195,6 +176,15 @@ private:
this->axi_ctrl_bus_reset();
this->axi_mem_bus_reset();
for (auto& reqs : pending_mem_reqs_) {
reqs.clear();
}
{
std::queue<mem_req_t*> empty;
std::swap(dram_queue_, empty);
}
device_->ap_rst_n = 0;
for (int i = 0; i < RESET_DELAY; ++i) {
@ -222,8 +212,17 @@ private:
this->axi_mem_bus_eval();
if (!dram_queue_.empty()) {
if (ramulator_->send(dram_queue_.front()))
auto mem_req = dram_queue_.front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
if (orig_req->ready) {
delete orig_req;
} else {
orig_req->ready = true;
}
}, mem_req)) {
dram_queue_.pop();
}
}
device_->ap_clk = 0;
@ -231,14 +230,7 @@ private:
device_->ap_clk = 1;
this->eval();
if (MEM_CYCLE_RATIO > 0) {
auto cycle = timestamp / 2;
if ((cycle % MEM_CYCLE_RATIO) == 0)
ramulator_->tick();
} else {
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
ramulator_->tick();
}
dram_sim_.tick();
#ifndef NDEBUG
fflush(stdout);
@ -307,16 +299,25 @@ private:
//--
}
typedef struct {
std::array<uint8_t, MEM_BLOCK_SIZE> data;
uint32_t addr;
bool write;
bool ready;
} mem_req_t;
Vvortex_afu_shim *device_;
RAM* ram_;
ramulator::Gem5Wrapper* ramulator_;
DramSim dram_sim_;
std::future<void> future_;
bool stop_;
std::mutex mutex_;
std::queue<ramulator::Request> dram_queue_;
std::list<mem_req_t*> pending_mem_reqs_[MEMORY_BANKS];
std::queue<mem_req_t*> dram_queue_;
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;

View file

@ -6,11 +6,10 @@ softfloat:
SPECIALIZE_TYPE=RISCV SOFTFLOAT_OPTS="-fPIC -DSOFTFLOAT_ROUND_ODD -DINLINE_LEVEL=5 -DSOFTFLOAT_FAST_DIV32TO16 -DSOFTFLOAT_FAST_DIV64TO32" $(MAKE) -C softfloat/build/Linux-x86_64-GCC
ramulator:
cd ramulator && git apply ../../miscs/patches/ramulator.patch 2> /dev/null; true
$(MAKE) -C ramulator libramulator.a
cd ramulator && mkdir -p build && cd build && cmake .. && make -j
clean:
$(MAKE) -C softfloat/build/Linux-x86_64-GCC clean
$(MAKE) -C ramulator clean
rm -rf ramulator/build
.PHONY: all fpnew softfloat ramulator

@ -1 +1 @@
Subproject commit 214f635845214adf030367939655d172ef0fed5f
Subproject commit e62c84a6f0e06566ba6e182d308434b4532068a5