mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Moved tc_num, tc_size param to makefile args
This commit is contained in:
parent
0e3badf723
commit
a378aed67c
14 changed files with 70 additions and 42 deletions
|
@ -114,14 +114,6 @@ case $i in
|
|||
LOGFILE=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--tc_size=*)
|
||||
TC_SIZE=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--tc_num=*)
|
||||
TC_NUM=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
show_help
|
||||
exit 0
|
||||
|
@ -190,7 +182,7 @@ then
|
|||
fi
|
||||
|
||||
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS $L2 $L3 $PERF_FLAG $CONFIGS"
|
||||
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DTC_NUM=$TC_NUM -DTC_SIZE=$TC_SIZE $L2 $L3 $PERF_FLAG $CONFIGS"
|
||||
# CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DTC_NUM=$TC_NUM -DTC_SIZE=$TC_SIZE $L2 $L3 $PERF_FLAG $CONFIGS"
|
||||
echo "CONFIGS=$CONFIGS"
|
||||
|
||||
if [ $REBUILD -ne 0 ]
|
||||
|
|
|
@ -124,6 +124,7 @@ regression()
|
|||
# test local barrier
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
|
||||
./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tbar"
|
||||
|
||||
|
||||
echo "regression tests done!"
|
||||
}
|
||||
|
|
|
@ -197,6 +197,9 @@
|
|||
`define VX_CSR_LOCAL_MEM_BASE 12'hFC3
|
||||
|
||||
`define VX_MAT_MUL_SIZE 12'hFC4
|
||||
`define VX_TC_NUM 12'hFC5
|
||||
`define VX_TC_SIZE 12'hFC6
|
||||
|
||||
|
||||
|
||||
`endif // VX_TYPES_VH
|
||||
|
|
|
@ -32,7 +32,7 @@ using namespace vortex;
|
|||
class vx_device {
|
||||
public:
|
||||
vx_device()
|
||||
: arch_(NUM_THREADS, NUM_WARPS, NUM_CORES, TC_SIZE, TC_NUM)
|
||||
: arch_(NUM_THREADS, NUM_WARPS, NUM_CORES)
|
||||
, ram_(0, RAM_PAGE_SIZE)
|
||||
, processor_(arch_)
|
||||
, global_mem_(ALLOC_BASE_ADDR,
|
||||
|
@ -69,12 +69,12 @@ public:
|
|||
case VX_CAPS_NUM_CORES:
|
||||
_value = NUM_CORES * NUM_CLUSTERS;
|
||||
break;
|
||||
case VX_CAPS_TC_SIZE:
|
||||
_value = TC_SIZE;
|
||||
break;
|
||||
case VX_CAPS_TC_NUM:
|
||||
_value = TC_NUM;
|
||||
break;
|
||||
// case VX_CAPS_TC_SIZE:
|
||||
// _value = TC_SIZE;
|
||||
// break;
|
||||
// case VX_CAPS_TC_NUM:
|
||||
// _value = TC_NUM;
|
||||
// break;
|
||||
case VX_CAPS_CACHE_LINE_SIZE:
|
||||
_value = CACHE_BLOCK_SIZE;
|
||||
break;
|
||||
|
|
|
@ -35,11 +35,9 @@ private:
|
|||
uint16_t num_barriers_;
|
||||
uint16_t ipdom_size_;
|
||||
uint64_t local_mem_base_;
|
||||
uint16_t tc_size_;
|
||||
uint16_t tc_num_;
|
||||
|
||||
public:
|
||||
Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores, uint64_t tc_size, uint64_t tc_num)
|
||||
Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores)
|
||||
: num_threads_(num_threads)
|
||||
, num_warps_(num_warps)
|
||||
, num_cores_(num_cores)
|
||||
|
@ -51,8 +49,6 @@ public:
|
|||
, num_barriers_(NUM_BARRIERS)
|
||||
, ipdom_size_((num_threads-1) * 2)
|
||||
, local_mem_base_(LMEM_BASE_ADDR)
|
||||
, tc_size_ (tc_size)
|
||||
, tc_num_ (tc_num)
|
||||
{}
|
||||
|
||||
uint16_t vsize() const {
|
||||
|
@ -98,14 +94,6 @@ public:
|
|||
uint16_t socket_size() const {
|
||||
return socket_size_;
|
||||
}
|
||||
|
||||
uint16_t tc_size() const {
|
||||
return tc_size_;
|
||||
}
|
||||
|
||||
uint16_t tc_num() const {
|
||||
return tc_num_;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
|
|
@ -74,7 +74,7 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core)
|
|||
, core_(core)
|
||||
, warps_(arch.num_warps(), arch)
|
||||
, barriers_(arch.num_barriers(), 0)
|
||||
, scratchpad(std::vector<Word>(core->arch().tc_size() * core->arch().tc_size() * 32768)) //Fix this
|
||||
, scratchpad(std::vector<Word>(32 * 32 * 32768)) //Fix this : Max TC_SIZE = 32
|
||||
{
|
||||
this->clear();
|
||||
}
|
||||
|
@ -355,6 +355,11 @@ Word Emulator::get_tiles()
|
|||
return mat_size;
|
||||
}
|
||||
|
||||
Word Emulator::get_tc_size()
|
||||
{
|
||||
return tc_size;
|
||||
}
|
||||
|
||||
Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
||||
auto core_perf = core_->perf_stats();
|
||||
switch (addr) {
|
||||
|
@ -387,6 +392,8 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
|||
case VX_CSR_LOCAL_MEM_BASE: return arch_.local_mem_base();
|
||||
case VX_CSR_MSCRATCH: return csr_mscratch_;
|
||||
case VX_MAT_MUL_SIZE: return mat_size;
|
||||
case VX_TC_NUM: return tc_num;
|
||||
case VX_TC_SIZE: return tc_size;
|
||||
|
||||
CSR_READ_64(VX_CSR_MCYCLE, core_perf.cycles);
|
||||
CSR_READ_64(VX_CSR_MINSTRET, core_perf.instrs);
|
||||
|
@ -500,6 +507,13 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) {
|
|||
case VX_MAT_MUL_SIZE:
|
||||
mat_size = value;
|
||||
break;
|
||||
case VX_TC_NUM:
|
||||
tc_num = value;
|
||||
break;
|
||||
case VX_TC_SIZE:
|
||||
tc_size = value;
|
||||
break;
|
||||
|
||||
default: {
|
||||
std::cout << std::hex << "Error: invalid CSR write addr=0x" << addr << ", value=0x" << value << std::endl;
|
||||
std::abort();
|
||||
|
|
|
@ -55,6 +55,7 @@ public:
|
|||
int get_exitcode() const;
|
||||
|
||||
Word get_tiles();
|
||||
Word get_tc_size();
|
||||
|
||||
private:
|
||||
|
||||
|
@ -125,6 +126,8 @@ private:
|
|||
wspawn_t wspawn_;
|
||||
std::vector<Word> scratchpad;
|
||||
uint32_t mat_size;
|
||||
uint32_t tc_size;
|
||||
uint32_t tc_num;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -1419,8 +1419,11 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
{ //TODO - make it data-type flexible
|
||||
uint32_t mem_bytes = 1;
|
||||
DP(3, "mem_bytes=" << mem_bytes << std::endl);
|
||||
uint16_t tc_size = core_->arch().tc_size();
|
||||
uint32_t TC_per_warp = core_->arch().tc_num();
|
||||
uint16_t tc_size = this->get_csr(VX_TC_SIZE, 0, wid);
|
||||
uint32_t TC_per_warp = this->get_csr(VX_TC_NUM, 0, wid);
|
||||
|
||||
DP(3, "tc_size=" << tc_size << std::endl);
|
||||
DP(3, "TC_per_warp=" << TC_per_warp << std::endl);
|
||||
|
||||
//Number of loads - dependant on the thread config
|
||||
uint32_t n_tiles = this->get_csr(VX_MAT_MUL_SIZE, 0, wid); //CSR instruction before MLOAD will ensure that this csr has value
|
||||
|
|
|
@ -255,7 +255,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
|
|||
|
||||
TcuUnit::TcuUnit(const SimContext& ctx, Core* core)
|
||||
: FuncUnit(ctx, core, "TCU")
|
||||
, tc_size (core_->arch().tc_size())
|
||||
// , tc_size (core_->arch().tc_size())
|
||||
{}
|
||||
|
||||
void TcuUnit::tick() {
|
||||
|
@ -267,6 +267,8 @@ void TcuUnit::tick() {
|
|||
auto& output = Outputs.at(i);
|
||||
auto trace = input.front();
|
||||
uint32_t n_tiles = core_->emulator_.get_tiles();
|
||||
uint32_t tc_size = core_->emulator_.get_tc_size();
|
||||
|
||||
switch (trace->tcu_type) {
|
||||
case TCUType::TCU_MUL:
|
||||
{ //mat size = n_tiles * tc_size
|
||||
|
|
|
@ -103,7 +103,7 @@ private:
|
|||
class TcuUnit : public FuncUnit {
|
||||
public:
|
||||
TcuUnit(const SimContext& ctx, Core*);
|
||||
uint64_t tc_size;
|
||||
// uint64_t tc_size;
|
||||
void tick();
|
||||
};
|
||||
|
||||
|
|
|
@ -83,7 +83,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
{
|
||||
// create processor configuation
|
||||
Arch arch(num_threads, num_warps, num_cores, tc_size, tc_num);
|
||||
Arch arch(num_threads, num_warps, num_cores);
|
||||
|
||||
// create memory module
|
||||
RAM ram(0, RAM_PAGE_SIZE);
|
||||
|
|
|
@ -9,6 +9,6 @@ SRCS := $(SRC_DIR)/main.cpp
|
|||
|
||||
VX_SRCS := $(SRC_DIR)/kernel.cpp
|
||||
|
||||
OPTS ?= -n128 -d1
|
||||
OPTS ?= -n512 -d1 -s4 -t4
|
||||
|
||||
include ../common.mk
|
||||
|
|
|
@ -13,7 +13,7 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) {
|
|||
unsigned c_addr = reinterpret_cast<unsigned>(dst_ptr);
|
||||
|
||||
uint32_t tc_size = arg->tc_size;
|
||||
int TC_per_warp = arg->TC_per_warp;
|
||||
uint32_t TC_per_warp = arg->TC_per_warp;
|
||||
unsigned num_threads = arg->num_threads;
|
||||
int num_warps = arg->num_warps;
|
||||
uint32_t matrix_size = arg->matrix_size;
|
||||
|
@ -104,6 +104,9 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) {
|
|||
unsigned b_addr_base = b_addr + offset*arg->data_size;
|
||||
unsigned c_addr_base = c_addr + offset_c*arg->data_size;
|
||||
csr_write(VX_MAT_MUL_SIZE,n_tiles);
|
||||
csr_write(VX_TC_NUM,TC_per_warp);
|
||||
csr_write(VX_TC_SIZE,tc_size);
|
||||
|
||||
mload (0, a_addr_base);
|
||||
mload (1, b_addr_base);
|
||||
//In case of multiple threads - sync load
|
||||
|
|
|
@ -21,6 +21,9 @@
|
|||
|
||||
const char* kernel_file = "kernel.vxbin";
|
||||
uint32_t matrix_size = 0;
|
||||
uint32_t tc_num = 4;
|
||||
uint32_t TC_size = 8;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h A_buffer = nullptr;
|
||||
vx_buffer_h B_buffer = nullptr;
|
||||
|
@ -38,7 +41,7 @@ static void show_usage() {
|
|||
|
||||
static void parse_args(int argc, char **argv, uint32_t &data_size) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:k:d:h?")) != -1) {
|
||||
while ((c = getopt(argc, argv, "n:k:d:t:s:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
matrix_size = atoi(optarg);
|
||||
|
@ -48,7 +51,13 @@ static void parse_args(int argc, char **argv, uint32_t &data_size) {
|
|||
break;
|
||||
case 'd':
|
||||
data_size = atoi(optarg);
|
||||
break;
|
||||
break;
|
||||
case 't':
|
||||
tc_num = atoi(optarg);
|
||||
break;
|
||||
case 's':
|
||||
TC_size = atoi(optarg);
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
|
@ -141,12 +150,22 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
uint64_t num_cores, num_warps, num_threads, tc_size, TC_per_warp;
|
||||
uint64_t num_cores, num_warps, num_threads;
|
||||
uint32_t tc_size, TC_per_warp;
|
||||
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_SIZE, &tc_size));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_NUM, &TC_per_warp));
|
||||
|
||||
std::cout << "Debug :: tc_size (optarg) = " << TC_size << std::endl;
|
||||
std::cout << "Debug :: tc_num (optarg) = " << tc_num << std::endl;
|
||||
|
||||
//Add assert/knob
|
||||
tc_size = TC_size;
|
||||
TC_per_warp = tc_num;
|
||||
|
||||
// RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_SIZE, &tc_size));
|
||||
// RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_NUM, &TC_per_warp));
|
||||
|
||||
std::cout << "Debug :: tc_size = " << tc_size << std::endl;
|
||||
std::cout << "Debug :: tc_num = " << TC_per_warp << std::endl;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue