mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
minor update
This commit is contained in:
parent
d99aaf3933
commit
9b79d60507
5 changed files with 23 additions and 16 deletions
|
@ -36,7 +36,7 @@ def monitor(stop_event):
|
|||
sys.stdout.flush()
|
||||
elapsed_time = 0
|
||||
|
||||
def execute_verbose(command):
|
||||
def execute(command):
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
while True:
|
||||
output = process.stdout.readline()
|
||||
|
@ -49,15 +49,9 @@ def execute_verbose(command):
|
|||
process.stdout.flush()
|
||||
ret = process.poll()
|
||||
if ret is not None:
|
||||
print(" + exitcode="+str(ret))
|
||||
return ret
|
||||
return -1
|
||||
|
||||
def execute(command):
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
ret = process.wait()
|
||||
return ret
|
||||
|
||||
def main(argv):
|
||||
if not argv:
|
||||
print("Usage: travis_run.py <command>")
|
||||
|
|
|
@ -82,8 +82,8 @@
|
|||
`INST_ALU_XOR: `TRACE(level, ("XOR"));
|
||||
`INST_ALU_OR: `TRACE(level, ("OR"));
|
||||
`INST_ALU_AND: `TRACE(level, ("AND"));
|
||||
`INST_ALU_CZEQ: `TRACE(level, ("CZEQ"));
|
||||
`INST_ALU_CZNE: `TRACE(level, ("CZNE"));
|
||||
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"));
|
||||
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -88,6 +88,12 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
break;
|
||||
}
|
||||
|
||||
in32_t thread_last = num_threads - 1;
|
||||
for (; thread_last >= 0; --thread_last) {
|
||||
if (warp.tmask.test(thread_last))
|
||||
break;
|
||||
}
|
||||
|
||||
std::vector<reg_data_t[3]> rsdata(num_threads);
|
||||
std::vector<reg_data_t> rddata(num_threads);
|
||||
|
||||
|
@ -661,7 +667,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
continue;
|
||||
rddata[t].i = next_pc;
|
||||
}
|
||||
next_pc = rsdata[thread_start][0].i + immsrc;
|
||||
next_pc = rsdata[thread_last][0].i + immsrc;
|
||||
trace->fetch_stall = true;
|
||||
rd_write = true;
|
||||
break;
|
||||
|
@ -1306,7 +1312,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
trace->fetch_stall = true;
|
||||
next_tmask.reset();
|
||||
for (uint32_t t = 0; t < num_threads; ++t) {
|
||||
next_tmask.set(t, rsdata.at(thread_start)[0].i & (1 << t));
|
||||
next_tmask.set(t, rsdata.at(thread_last)[0].i & (1 << t));
|
||||
}
|
||||
} break;
|
||||
case 1: {
|
||||
|
@ -1316,7 +1322,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
trace->fetch_stall = true;
|
||||
trace->data = std::make_shared<SFUTraceData>(rsdata.at(thread_start)[0].i, rsdata.at(thread_start)[1].i);
|
||||
trace->data = std::make_shared<SFUTraceData>(rsdata.at(thread_last)[0].i, rsdata.at(thread_last)[1].i);
|
||||
} break;
|
||||
case 2: {
|
||||
// SPLIT
|
||||
|
@ -1366,7 +1372,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
trace->used_iregs.set(rsrc0);
|
||||
trace->fetch_stall = true;
|
||||
|
||||
auto stack_ptr = warp.ireg_file.at(thread_start).at(rsrc0);
|
||||
auto stack_ptr = warp.ireg_file.at(thread_last).at(rsrc0);
|
||||
if (stack_ptr != warp.ipdom_stack.size()) {
|
||||
if (warp.ipdom_stack.empty()) {
|
||||
std::cout << "IPDOM stack is empty!\n" << std::flush;
|
||||
|
@ -1386,7 +1392,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
trace->fetch_stall = true;
|
||||
trace->data = std::make_shared<SFUTraceData>(rsdata[thread_start][0].i, rsdata[thread_start][1].i);
|
||||
trace->data = std::make_shared<SFUTraceData>(rsdata[thread_last][0].i, rsdata[thread_last][1].i);
|
||||
} break;
|
||||
case 5: {
|
||||
// PRED
|
||||
|
@ -1404,7 +1410,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
if (pred.any()) {
|
||||
next_tmask &= pred;
|
||||
} else {
|
||||
next_tmask = warp.ireg_file.at(thread_start).at(rsrc1);
|
||||
next_tmask = warp.ireg_file.at(thread_last).at(rsrc1);
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
|
|
|
@ -14,6 +14,7 @@ all:
|
|||
$(MAKE) -C vecaddx
|
||||
$(MAKE) -C sgemmx
|
||||
$(MAKE) -C conv3x
|
||||
$(MAKE) -C sgemm2x
|
||||
|
||||
run-simx:
|
||||
$(MAKE) -C basic run-simx
|
||||
|
@ -28,6 +29,7 @@ run-simx:
|
|||
$(MAKE) -C vecaddx run-simx
|
||||
$(MAKE) -C sgemmx run-simx
|
||||
$(MAKE) -C conv3x run-simx
|
||||
$(MAKE) -C sgemm2x run-simx
|
||||
|
||||
run-rtlsim:
|
||||
$(MAKE) -C basic run-rtlsim
|
||||
|
@ -42,6 +44,7 @@ run-rtlsim:
|
|||
$(MAKE) -C vecaddx run-rtlsim
|
||||
$(MAKE) -C sgemmx run-rtlsim
|
||||
$(MAKE) -C conv3x run-rtlsim
|
||||
$(MAKE) -C sgemm2x run-rtlsim
|
||||
|
||||
run-opae:
|
||||
$(MAKE) -C basic run-opae
|
||||
|
@ -56,6 +59,7 @@ run-opae:
|
|||
$(MAKE) -C vecaddx run-opae
|
||||
$(MAKE) -C sgemmx run-opae
|
||||
$(MAKE) -C conv3x run-opae
|
||||
$(MAKE) -C sgemm2x run-opae
|
||||
|
||||
clean:
|
||||
$(MAKE) -C basic clean
|
||||
|
@ -70,6 +74,7 @@ clean:
|
|||
$(MAKE) -C vecaddx clean
|
||||
$(MAKE) -C sgemmx clean
|
||||
$(MAKE) -C conv3x clean
|
||||
$(MAKE) -C sgemm2x clean
|
||||
|
||||
clean-all:
|
||||
$(MAKE) -C basic clean-all
|
||||
|
@ -84,3 +89,4 @@ clean-all:
|
|||
$(MAKE) -C vecaddx clean-all
|
||||
$(MAKE) -C sgemmx clean-all
|
||||
$(MAKE) -C conv3x clean-all
|
||||
$(MAKE) -C sgemm2x clean-all
|
||||
|
|
|
@ -159,7 +159,7 @@ int main(int argc, char *argv[]) {
|
|||
uint32_t buf_size = size_sq * sizeof(TYPE);
|
||||
|
||||
uint32_t group_size = tile_size * tile_size;
|
||||
uint32_t num_groups = (size * size) / group_size;
|
||||
uint32_t num_groups = size_sq / group_size;
|
||||
uint32_t local_mem = 2 * group_size * sizeof(TYPE);
|
||||
|
||||
std::cout << "data type: " << Comparator<TYPE>::type_str() << std::endl;
|
||||
|
@ -177,6 +177,7 @@ int main(int argc, char *argv[]) {
|
|||
// check work group occupancy
|
||||
uint32_t max_barriers, max_localmem;
|
||||
RT_CHECK(vx_check_occupancy(device, group_size, &max_barriers, &max_localmem));
|
||||
std::cout << "occupancy: max_barriers=" << max_barriers << ", max_localmem=" << max_localmem << " bytes" << std::endl;
|
||||
RT_CHECK(max_barriers < 2);
|
||||
RT_CHECK(max_localmem < local_mem);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue