mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
OpenCL benchmarks running
This commit is contained in:
parent
69139e47bb
commit
d2bd560593
10 changed files with 130 additions and 30 deletions
|
@ -31,6 +31,10 @@ unsigned vx_threadID(void);
|
|||
// Get hardware warp ID
|
||||
unsigned vx_warpID(void);
|
||||
|
||||
// Get Number cycles/Inst
|
||||
unsigned vx_getCycles(void);
|
||||
unsigned vx_getInst(void);
|
||||
|
||||
void vx_resetStack(void);
|
||||
|
||||
|
||||
|
|
|
@ -49,6 +49,19 @@ vx_threadID:
|
|||
csrr a0, 0x20 # read thread IDs
|
||||
ret
|
||||
|
||||
.type vx_getCycles, @function
|
||||
.global vx_getCycles
|
||||
vx_getCycles:
|
||||
csrr a0, 0x26 # read thread IDs
|
||||
ret
|
||||
|
||||
|
||||
.type vx_getInst, @function
|
||||
.global vx_getInst
|
||||
vx_getInst:
|
||||
csrr a0, 0x25 # read thread IDs
|
||||
ret
|
||||
|
||||
|
||||
.type vx_resetStack, @function
|
||||
.global vx_resetStack
|
||||
|
|
|
@ -148,7 +148,7 @@ int _fstat(int file, struct stat * st)
|
|||
|
||||
int _isatty (int file)
|
||||
{
|
||||
vx_print_str("Hello from _isatty\n");
|
||||
// vx_print_str("Hello from _isatty\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -237,8 +237,8 @@ static int head_end = (int) 0x20000000;
|
|||
|
||||
void * _sbrk (int nbytes)
|
||||
{
|
||||
vx_print_str("Hello from _sbrk\n");
|
||||
vx_printf("nbytes: ", nbytes);
|
||||
// vx_print_str("Hello from _sbrk\n");
|
||||
// vx_printf("nbytes: ", nbytes);
|
||||
|
||||
//if (nbytes < 0) //vx_print_str("nbytes less than zero\n");
|
||||
// printf("nBytes: %d\n", nbytes);
|
||||
|
@ -248,7 +248,7 @@ void * _sbrk (int nbytes)
|
|||
nbytes = nbytes * -1;
|
||||
}
|
||||
|
||||
vx_printf("New nbytes: ", nbytes);
|
||||
// vx_printf("New nbytes: ", nbytes);
|
||||
|
||||
// if (nbytes > 10240)
|
||||
// {
|
||||
|
@ -260,9 +260,9 @@ void * _sbrk (int nbytes)
|
|||
{
|
||||
int base = heap_start;
|
||||
heap_start += nbytes;
|
||||
vx_print_str("_sbrk returning: ");
|
||||
vx_print_hex((unsigned) base);
|
||||
vx_print_str("\n");
|
||||
// vx_print_str("_sbrk returning: ");
|
||||
// vx_print_hex((unsigned) base);
|
||||
// vx_print_str("\n");
|
||||
return (void *) base;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -20,7 +20,7 @@ _start:
|
|||
# Initialize SP
|
||||
# la sp, __stack_top
|
||||
la a1, vx_set_sp
|
||||
li a0, 4
|
||||
li a0, 32
|
||||
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
||||
jal vx_set_sp
|
||||
li a0, 1
|
||||
|
@ -46,7 +46,7 @@ _start:
|
|||
.type vx_set_sp, @function
|
||||
.global vx_set_sp
|
||||
vx_set_sp:
|
||||
li a0, 4
|
||||
li a0, 32
|
||||
.word 0x0005006b # tmc 4
|
||||
|
||||
.option push
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define TOTAL_WARPS 2
|
||||
#define TOTAL_THREADS 16
|
||||
|
||||
func_t global_function_pointer;
|
||||
// void (func_t)(void *)
|
||||
|
@ -46,16 +48,39 @@ uint8_t * pocl_args;
|
|||
uint8_t * pocl_ctx;
|
||||
vx_pocl_workgroup_func pocl_pfn;
|
||||
|
||||
unsigned global_z;
|
||||
unsigned global_y;
|
||||
unsigned global_x;
|
||||
|
||||
|
||||
void pocl_spawn_real()
|
||||
{
|
||||
vx_tmc(pocl_threads);
|
||||
int x = vx_threadID();
|
||||
int y = vx_warpID();
|
||||
int base_x = vx_threadID();
|
||||
int base_y = vx_warpID();
|
||||
|
||||
(pocl_pfn)( pocl_args, pocl_ctx, x, y, 0);
|
||||
int local_x;
|
||||
int local_y;
|
||||
|
||||
if (y != 0)
|
||||
for (int iter_z = 0; iter_z < global_z; iter_z++)
|
||||
{
|
||||
for (int iter_x = 0; iter_x < global_x; iter_x++)
|
||||
{
|
||||
for (int iter_y = 0; iter_y < global_y; iter_y++)
|
||||
{
|
||||
|
||||
local_x = (iter_x * TOTAL_THREADS) + base_x;
|
||||
local_y = (iter_y * TOTAL_WARPS ) + base_y;
|
||||
|
||||
(pocl_pfn)( pocl_args, pocl_ctx, local_x, local_y, iter_z);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// (pocl_pfn)( pocl_args, pocl_ctx, x, y, 0);
|
||||
|
||||
if (base_y != 0)
|
||||
{
|
||||
vx_tmc(0);
|
||||
}
|
||||
|
@ -66,24 +91,67 @@ void pocl_spawn_real()
|
|||
void pocl_spawn(struct context_t * ctx, const void * pfn, void * arguments)
|
||||
{
|
||||
|
||||
if (ctx->num_groups[2] > 1)
|
||||
|
||||
// printf("ctx->num_groups[0]: %d\n", ctx->num_groups[0]);
|
||||
// printf("ctx->num_groups[1]: %d\n", ctx->num_groups[1]);
|
||||
// printf("ctx->num_groups[2]: %d\n", ctx->num_groups[2]);
|
||||
|
||||
// printf("\n\n");
|
||||
|
||||
// printf("ctx->local_size[0]: %d\n", ctx->local_size[0]);
|
||||
// printf("ctx->local_size[1]: %d\n", ctx->local_size[1]);
|
||||
// printf("ctx->local_size[2]: %d\n", ctx->local_size[2]);
|
||||
if (ctx->num_groups[0] > TOTAL_THREADS)
|
||||
{
|
||||
printf("ERROR: pocl_spawn doesn't support Z dimension yet!\n");
|
||||
return;
|
||||
pocl_threads = TOTAL_THREADS;
|
||||
global_x = ctx->num_groups[0] / TOTAL_THREADS;
|
||||
printf("pocl_threads: %d\n", pocl_threads);
|
||||
// printf("global_x: %d\n", global_x);
|
||||
}
|
||||
else
|
||||
{
|
||||
pocl_threads = ctx->num_groups[0];
|
||||
global_x = 1;
|
||||
// printf("pocl_threads: %d\n", pocl_threads);
|
||||
// printf("global_x: %d\n", global_x);
|
||||
}
|
||||
|
||||
pocl_threads = ctx->num_groups[0];
|
||||
|
||||
global_z = ctx->num_groups[2];
|
||||
pocl_pfn = (vx_pocl_workgroup_func) pfn;
|
||||
pocl_ctx = (uint8_t *) ctx;
|
||||
pocl_args = (uint8_t *) arguments;
|
||||
|
||||
if (ctx->num_groups[1] > 1)
|
||||
{
|
||||
vx_wspawn(ctx->num_groups[1], (unsigned) &pocl_spawn_real);
|
||||
if (ctx->num_groups[1] > TOTAL_WARPS)
|
||||
{
|
||||
global_y = ctx->num_groups[1] / TOTAL_WARPS;
|
||||
vx_wspawn(TOTAL_WARPS, (unsigned) &pocl_spawn_real);
|
||||
// printf("global_y: %d\n", global_y);
|
||||
// printf("Warps: %d\n", TOTAL_WARPS);
|
||||
}
|
||||
else
|
||||
{
|
||||
global_y = 1;
|
||||
vx_wspawn(ctx->num_groups[1], (unsigned) &pocl_spawn_real);
|
||||
// printf("global_y: %d\n", global_y);
|
||||
// printf("Warps: %d\n", ctx->num_groups[1]);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned starting_cycles = vx_getCycles();
|
||||
unsigned starting_inst = vx_getInst();
|
||||
|
||||
pocl_spawn_real();
|
||||
|
||||
unsigned end_cycles = vx_getCycles();
|
||||
unsigned end_inst = vx_getInst();
|
||||
|
||||
|
||||
printf("pocl_spawn: Total Cycles: %d\n", (end_cycles - starting_cycles));
|
||||
printf("pocl_spawn: Total Inst : %d\n", (end_inst - starting_inst ));
|
||||
|
||||
// int z;
|
||||
// int y;
|
||||
// int x;
|
||||
|
|
|
@ -106,7 +106,7 @@ void Harp::reg_doWrite(Word cpuId, Word regNum) {
|
|||
#endif
|
||||
|
||||
Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id):
|
||||
a(a), iDec(d), mem(mem), steps(4)
|
||||
a(a), iDec(d), mem(mem), steps(4), num_cycles(0), num_instructions(0)
|
||||
{
|
||||
release_warp = false;
|
||||
foundSchedule = true;
|
||||
|
@ -162,12 +162,13 @@ bool Core::interrupt(Word r0) {
|
|||
|
||||
void Core::step()
|
||||
{
|
||||
cout << "\n\n\n------------------------------------------------------\n";
|
||||
D(3, "\n\n\n------------------------------------------------------");
|
||||
|
||||
D(3, "Started core::step" << flush);
|
||||
|
||||
steps++;
|
||||
D(3, "CYCLE: " << steps);
|
||||
this->num_cycles++;
|
||||
D(3, "CYCLE: " << this->num_cycles);
|
||||
|
||||
D(3, "Stalled Warps:");
|
||||
for (int widd = 0; widd < a.getNWarps(); widd++)
|
||||
|
@ -407,10 +408,12 @@ void Core::fetch()
|
|||
if (foundSchedule)
|
||||
{
|
||||
D(3, "Core step stepping warp " << schedule_w << '[' << w[schedule_w].activeThreads << ']');
|
||||
this->num_instructions = this->num_instructions + w[schedule_w].activeThreads;
|
||||
// this->num_instructions++;
|
||||
w[schedule_w].step(&inst_in_fetch);
|
||||
D(3, "Now " << w[schedule_w].activeThreads << " active threads in " << schedule_w << flush);
|
||||
|
||||
this->getCacheDelays(&inst_in_fetch);
|
||||
// this->getCacheDelays(&inst_in_fetch);
|
||||
D(3, "Got cache delays" << flush);
|
||||
if (inst_in_fetch.stall_warp)
|
||||
{
|
||||
|
@ -444,7 +447,10 @@ void Core::fetch()
|
|||
{
|
||||
D(3, " 0");
|
||||
}
|
||||
if (j != w[schedule_w].tmask.size()-1 || schedule_w != w.size()-1) cout << ',';
|
||||
if (j != w[schedule_w].tmask.size()-1 || schedule_w != w.size()-1)
|
||||
{
|
||||
D(3, ',');
|
||||
}
|
||||
}
|
||||
D(3, "\nPrinted active threads" << flush);
|
||||
// #endif
|
||||
|
@ -600,7 +606,7 @@ void Core::execute_unit()
|
|||
}
|
||||
else
|
||||
{
|
||||
cout << "&&&&&&&&&&&&&&&&&&&&&&&& EXECUTE SRCS NOT READY\n";
|
||||
D(3, "&&&&&&&&&&&&&&&&&&&&&&&& EXECUTE SRCS NOT READY");
|
||||
inst_in_scheduler.stalled = true;
|
||||
// INIT_TRACE(inst_in_exe);
|
||||
do_nothing = true;
|
||||
|
@ -759,8 +765,9 @@ void Warp::step(trace_inst_t * trace_inst) {
|
|||
bool fetchMore;
|
||||
|
||||
fetchMore = false;
|
||||
unsigned fetchSize(wordSize - (pc+fetchPos)%wordSize);
|
||||
fetchBuffer.resize(fetchPos + fetchSize);
|
||||
// unsigned fetchSize(wordSize - (pc+fetchPos)%wordSize);
|
||||
unsigned fetchSize = 4;
|
||||
fetchBuffer.resize(fetchSize);
|
||||
Word fetched = core->mem.fetch(pc + fetchPos, supervisorMode);
|
||||
writeWord(fetchBuffer, fetchPos, fetchSize, fetched);
|
||||
decPos = 0;
|
||||
|
|
|
@ -23,8 +23,8 @@ namespace Harp {
|
|||
encChar = 'w';
|
||||
nRegs = 32;
|
||||
nPRegs = 0;
|
||||
nThds = 8;
|
||||
nWarps = 8;
|
||||
nThds = 32;
|
||||
nWarps = 32;
|
||||
|
||||
extent = EXT_WARPS;
|
||||
|
||||
|
|
|
@ -144,6 +144,8 @@ namespace Harp {
|
|||
Word interruptEntry;
|
||||
|
||||
unsigned long steps;
|
||||
unsigned long num_cycles;
|
||||
unsigned long num_instructions;
|
||||
std::vector<Warp> w;
|
||||
std::map<Word, std::set<Warp *> > b; // Barriers
|
||||
int schedule_w;
|
||||
|
|
|
@ -849,6 +849,12 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
|
|||
{
|
||||
reg[rdest] = c.id;
|
||||
D(2, "CSR Reading wid " << hex << immsrc << dec << " and returning " << reg[rdest]);
|
||||
} else if (immsrc == 0x25)
|
||||
{
|
||||
reg[rdest] = c.core->num_instructions;
|
||||
} else if (immsrc == 0x26)
|
||||
{
|
||||
reg[rdest] = c.core->num_cycles;
|
||||
}
|
||||
// switch (func3)
|
||||
// {
|
||||
|
@ -2225,7 +2231,7 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
|
|||
}
|
||||
|
||||
// break;
|
||||
cout << "outside case" << endl << flush;
|
||||
// cout << "outside case" << endl << flush;
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ Byte Harp::readByte(const vector<Byte> &b, Size &n) {
|
|||
}
|
||||
|
||||
Word_u Harp::readWord(const vector<Byte> &b, Size &n, Size wordSize) {
|
||||
if (b.size() - n < wordSize) throw OutOfBytes();
|
||||
// if (b.size() - n < wordSize) throw OutOfBytes();
|
||||
Word_u w(0);
|
||||
n += wordSize;
|
||||
// std::cout << "wordSize: " << wordSize << "\n";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue