minor update

This commit is contained in:
Blaise Tine 2024-05-21 05:39:35 -07:00
parent d99aaf3933
commit 9b79d60507
5 changed files with 23 additions and 16 deletions

View file

@ -36,7 +36,7 @@ def monitor(stop_event):
sys.stdout.flush()
elapsed_time = 0
def execute_verbose(command):
def execute(command):
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
while True:
output = process.stdout.readline()
@ -49,15 +49,9 @@ def execute_verbose(command):
process.stdout.flush()
ret = process.poll()
if ret is not None:
print(" + exitcode="+str(ret))
return ret
return -1
def execute(command):
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
ret = process.wait()
return ret
def main(argv):
if not argv:
print("Usage: travis_run.py <command>")

View file

@ -82,8 +82,8 @@
`INST_ALU_XOR: `TRACE(level, ("XOR"));
`INST_ALU_OR: `TRACE(level, ("OR"));
`INST_ALU_AND: `TRACE(level, ("AND"));
`INST_ALU_CZEQ: `TRACE(level, ("CZEQ"));
`INST_ALU_CZNE: `TRACE(level, ("CZNE"));
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"));
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"));
default: `TRACE(level, ("?"));
endcase
end

View file

@ -88,6 +88,12 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
break;
}
in32_t thread_last = num_threads - 1;
for (; thread_last >= 0; --thread_last) {
if (warp.tmask.test(thread_last))
break;
}
std::vector<reg_data_t[3]> rsdata(num_threads);
std::vector<reg_data_t> rddata(num_threads);
@ -661,7 +667,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
continue;
rddata[t].i = next_pc;
}
next_pc = rsdata[thread_start][0].i + immsrc;
next_pc = rsdata[thread_last][0].i + immsrc;
trace->fetch_stall = true;
rd_write = true;
break;
@ -1306,7 +1312,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
trace->fetch_stall = true;
next_tmask.reset();
for (uint32_t t = 0; t < num_threads; ++t) {
next_tmask.set(t, rsdata.at(thread_start)[0].i & (1 << t));
next_tmask.set(t, rsdata.at(thread_last)[0].i & (1 << t));
}
} break;
case 1: {
@ -1316,7 +1322,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
trace->used_iregs.set(rsrc0);
trace->used_iregs.set(rsrc1);
trace->fetch_stall = true;
trace->data = std::make_shared<SFUTraceData>(rsdata.at(thread_start)[0].i, rsdata.at(thread_start)[1].i);
trace->data = std::make_shared<SFUTraceData>(rsdata.at(thread_last)[0].i, rsdata.at(thread_last)[1].i);
} break;
case 2: {
// SPLIT
@ -1366,7 +1372,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
trace->used_iregs.set(rsrc0);
trace->fetch_stall = true;
auto stack_ptr = warp.ireg_file.at(thread_start).at(rsrc0);
auto stack_ptr = warp.ireg_file.at(thread_last).at(rsrc0);
if (stack_ptr != warp.ipdom_stack.size()) {
if (warp.ipdom_stack.empty()) {
std::cout << "IPDOM stack is empty!\n" << std::flush;
@ -1386,7 +1392,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
trace->used_iregs.set(rsrc0);
trace->used_iregs.set(rsrc1);
trace->fetch_stall = true;
trace->data = std::make_shared<SFUTraceData>(rsdata[thread_start][0].i, rsdata[thread_start][1].i);
trace->data = std::make_shared<SFUTraceData>(rsdata[thread_last][0].i, rsdata[thread_last][1].i);
} break;
case 5: {
// PRED
@ -1404,7 +1410,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
if (pred.any()) {
next_tmask &= pred;
} else {
next_tmask = warp.ireg_file.at(thread_start).at(rsrc1);
next_tmask = warp.ireg_file.at(thread_last).at(rsrc1);
}
} break;
default:

View file

@ -14,6 +14,7 @@ all:
$(MAKE) -C vecaddx
$(MAKE) -C sgemmx
$(MAKE) -C conv3x
$(MAKE) -C sgemm2x
run-simx:
$(MAKE) -C basic run-simx
@ -28,6 +29,7 @@ run-simx:
$(MAKE) -C vecaddx run-simx
$(MAKE) -C sgemmx run-simx
$(MAKE) -C conv3x run-simx
$(MAKE) -C sgemm2x run-simx
run-rtlsim:
$(MAKE) -C basic run-rtlsim
@ -42,6 +44,7 @@ run-rtlsim:
$(MAKE) -C vecaddx run-rtlsim
$(MAKE) -C sgemmx run-rtlsim
$(MAKE) -C conv3x run-rtlsim
$(MAKE) -C sgemm2x run-rtlsim
run-opae:
$(MAKE) -C basic run-opae
@ -56,6 +59,7 @@ run-opae:
$(MAKE) -C vecaddx run-opae
$(MAKE) -C sgemmx run-opae
$(MAKE) -C conv3x run-opae
$(MAKE) -C sgemm2x run-opae
clean:
$(MAKE) -C basic clean
@ -70,6 +74,7 @@ clean:
$(MAKE) -C vecaddx clean
$(MAKE) -C sgemmx clean
$(MAKE) -C conv3x clean
$(MAKE) -C sgemm2x clean
clean-all:
$(MAKE) -C basic clean-all
@ -84,3 +89,4 @@ clean-all:
$(MAKE) -C vecaddx clean-all
$(MAKE) -C sgemmx clean-all
$(MAKE) -C conv3x clean-all
$(MAKE) -C sgemm2x clean-all

View file

@ -159,7 +159,7 @@ int main(int argc, char *argv[]) {
uint32_t buf_size = size_sq * sizeof(TYPE);
uint32_t group_size = tile_size * tile_size;
uint32_t num_groups = (size * size) / group_size;
uint32_t num_groups = size_sq / group_size;
uint32_t local_mem = 2 * group_size * sizeof(TYPE);
std::cout << "data type: " << Comparator<TYPE>::type_str() << std::endl;
@ -177,6 +177,7 @@ int main(int argc, char *argv[]) {
// check work group occupancy
uint32_t max_barriers, max_localmem;
RT_CHECK(vx_check_occupancy(device, group_size, &max_barriers, &max_localmem));
std::cout << "occupancy: max_barriers=" << max_barriers << ", max_localmem=" << max_localmem << " bytes" << std::endl;
RT_CHECK(max_barriers < 2);
RT_CHECK(max_localmem < local_mem);