mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'master' of https://github.gatech.edu/casl/Vortex
This commit is contained in:
commit
59232642c4
7 changed files with 49 additions and 25 deletions
|
@ -43,15 +43,21 @@ echo "begin clustering tests..."
|
|||
# warp/threads configurations
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=demo
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=demo
|
||||
./ci/blackbox.sh --driver=simx --cores=1 --warps=8 --threads=16 --app=demo
|
||||
|
||||
# cores clustering
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --clusters=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=demo --args="-n1"
|
||||
|
||||
# L2/L3
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=demo --args="-n1"
|
||||
|
||||
echo "clustering tests done!"
|
||||
}
|
||||
|
@ -101,12 +107,14 @@ CONFIGS="-DMEM_BLOCK_SIZE=16 -DL1_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsi
|
|||
# test cache banking
|
||||
CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
|
||||
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
|
||||
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=io_addr
|
||||
|
||||
# test cache multi-porting
|
||||
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
|
||||
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --debug --args="-n1"
|
||||
CONFIGS="-DL2_NUM_PORTS=2 -DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr
|
||||
CONFIGS="-DL2_NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=io_addr
|
||||
CONFIGS="-DL2_NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=simx --cores=4 --l2cache --app=io_addr
|
||||
|
||||
# test 128-bit MEM block
|
||||
CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
|
||||
|
|
|
@ -9,11 +9,10 @@
|
|||
#include <vortex.h>
|
||||
#include <vx_utils.h>
|
||||
#include <processor.h>
|
||||
#include <constants.h>
|
||||
#include <VX_config.h>
|
||||
#include <util.h>
|
||||
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -58,7 +57,7 @@ private:
|
|||
class vx_device {
|
||||
public:
|
||||
vx_device()
|
||||
: arch_("rv32i", NUM_CORES, NUM_WARPS, NUM_THREADS)
|
||||
: arch_("rv32i", NUM_CORES * NUM_CLUSTERS, NUM_WARPS, NUM_THREADS)
|
||||
, ram_(RAM_PAGE_SIZE)
|
||||
, mem_allocation_(ALLOC_BASE_ADDR)
|
||||
{}
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#define MEM_LATENCY 24
|
||||
#endif
|
||||
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
|
||||
namespace vortex {
|
||||
|
||||
enum Constants {
|
||||
|
|
|
@ -21,6 +21,7 @@ Core::Core(const SimContext& ctx, const ArchDef &arch, Word id)
|
|||
, arch_(arch)
|
||||
, decoder_(arch)
|
||||
, mmu_(0, arch.wsize(), true)
|
||||
, smem_(RAM_PAGE_SIZE)
|
||||
, tex_units_(NUM_TEX_UNITS, this)
|
||||
, warps_(arch.num_warps())
|
||||
, barriers_(arch.num_barriers(), 0)
|
||||
|
@ -380,7 +381,12 @@ Word Core::icache_read(Addr addr, Size size) {
|
|||
|
||||
Word Core::dcache_read(Addr addr, Size size) {
|
||||
Word data;
|
||||
mmu_.read(&data, addr, size, 0);
|
||||
auto type = get_addr_type(addr, size);
|
||||
if (type == AddrType::Shared) {
|
||||
smem_.read(&data, addr & (SMEM_SIZE-1), size);
|
||||
} else {
|
||||
mmu_.read(&data, addr, size, 0);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
|
@ -389,7 +395,12 @@ void Core::dcache_write(Addr addr, Word data, Size size) {
|
|||
&& addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
|
||||
this->writeToStdOut(addr, data);
|
||||
} else {
|
||||
mmu_.write(&data, addr, size, 0);
|
||||
auto type = get_addr_type(addr, size);
|
||||
if (type == AddrType::Shared) {
|
||||
smem_.write(&data, addr & (SMEM_SIZE-1), size);
|
||||
} else {
|
||||
mmu_.write(&data, addr, size, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -137,6 +137,7 @@ private:
|
|||
const ArchDef arch_;
|
||||
const Decoder decoder_;
|
||||
MemoryUnit mmu_;
|
||||
RAM smem_;
|
||||
std::vector<TexUnit> tex_units_;
|
||||
|
||||
std::vector<std::shared_ptr<Warp>> warps_;
|
||||
|
|
|
@ -6,11 +6,10 @@
|
|||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
#include "processor.h"
|
||||
#include "constants.h"
|
||||
#include <util.h>
|
||||
#include "args.h"
|
||||
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
|
|
@ -18,8 +18,9 @@ Processor::Processor(const ArchDef& arch)
|
|||
|
||||
// connect memory sub-systen
|
||||
memsim_ = MemSim::Create(1, MEM_LATENCY);
|
||||
std::vector<SimPort<MemReq>*> mem_req_ports(1);
|
||||
std::vector<SimPort<MemReq>*> mem_req_ports(1);
|
||||
std::vector<SimPort<MemRsp>*> mem_rsp_ports(1);
|
||||
|
||||
mem_req_ports.at(0) = &memsim_->MemReqPorts.at(0);
|
||||
mem_rsp_ports.at(0) = &memsim_->MemRspPorts.at(0);
|
||||
|
||||
|
@ -46,6 +47,7 @@ Processor::Processor(const ArchDef& arch)
|
|||
|
||||
mem_req_ports.resize(NUM_CLUSTERS);
|
||||
mem_rsp_ports.resize(NUM_CLUSTERS);
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
mem_req_ports.at(i) = &l3cache_->CoreReqPorts.at(i);
|
||||
mem_rsp_ports.at(i) = &l3cache_->CoreRspPorts.at(i);
|
||||
|
@ -57,13 +59,17 @@ Processor::Processor(const ArchDef& arch)
|
|||
|
||||
mem_req_ports.resize(NUM_CLUSTERS);
|
||||
mem_rsp_ports.resize(NUM_CLUSTERS);
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
mem_req_ports.at(i) = &l3_mem_switch_->ReqIn.at(i);
|
||||
mem_rsp_ports.at(i) = &l3_mem_switch_->RspOut.at(i);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
std::vector<SimPort<MemReq>*> cluster_mem_req_ports(cores_per_cluster);
|
||||
std::vector<SimPort<MemRsp>*> cluster_mem_rsp_ports(cores_per_cluster);
|
||||
|
||||
if (L2_ENABLE) {
|
||||
auto& l2cache = l2caches_.at(i);
|
||||
l2cache = Cache::Create("l2cache", Cache::Config{
|
||||
|
@ -74,40 +80,38 @@ Processor::Processor(const ArchDef& arch)
|
|||
32, // address bits
|
||||
L2_NUM_BANKS, // number of banks
|
||||
L2_NUM_PORTS, // number of ports
|
||||
NUM_CORES, // request size
|
||||
(uint8_t)cores_per_cluster, // request size
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
L2_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
});
|
||||
|
||||
mem_rsp_ports.at(i)->bind(&l2cache->MemRspPort);
|
||||
l2cache->MemReqPort.bind(mem_req_ports.at(i));
|
||||
|
||||
mem_req_ports.resize(cores_per_cluster);
|
||||
mem_rsp_ports.resize(cores_per_cluster);
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
mem_req_ports.at(j) = &l2cache->CoreReqPorts.at(j);
|
||||
mem_rsp_ports.at(j) = &l2cache->CoreRspPorts.at(j);
|
||||
cluster_mem_req_ports.at(j) = &l2cache->CoreReqPorts.at(j);
|
||||
cluster_mem_rsp_ports.at(j) = &l2cache->CoreRspPorts.at(j);
|
||||
}
|
||||
} else if (cores_per_cluster > 1) {
|
||||
} else {
|
||||
auto& l2_mem_switch = l2_mem_switches_.at(i);
|
||||
l2_mem_switch = Switch<MemReq, MemRsp>::Create("l2_arb", ArbiterType::RoundRobin, NUM_CORES);
|
||||
mem_rsp_ports.at(i)->bind(&l2_mem_switch->RspIn);
|
||||
l2_mem_switch->ReqOut.bind(mem_req_ports.at(i));
|
||||
l2_mem_switch = Switch<MemReq, MemRsp>::Create("l2_arb", ArbiterType::RoundRobin, cores_per_cluster);
|
||||
|
||||
mem_rsp_ports.at(i)->bind(&l2_mem_switch->RspIn);
|
||||
l2_mem_switch->ReqOut.bind(mem_req_ports.at(i));
|
||||
|
||||
mem_req_ports.resize(cores_per_cluster);
|
||||
mem_rsp_ports.resize(cores_per_cluster);
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
mem_req_ports.at(j) = &l2_mem_switch->ReqIn.at(j);
|
||||
mem_rsp_ports.at(j) = &l2_mem_switch->RspOut.at(j);
|
||||
cluster_mem_req_ports.at(j) = &l2_mem_switch->ReqIn.at(j);
|
||||
cluster_mem_rsp_ports.at(j) = &l2_mem_switch->RspOut.at(j);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
auto& core = cores_.at((i * NUM_CLUSTERS) + j);
|
||||
mem_rsp_ports.at(i)->bind(&core->MemRspPort);
|
||||
core->MemReqPort.bind(mem_req_ports.at(j));
|
||||
auto& core = cores_.at((i * cores_per_cluster) + j);
|
||||
cluster_mem_rsp_ports.at(j)->bind(&core->MemRspPort);
|
||||
core->MemReqPort.bind(cluster_mem_req_ports.at(j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue