Intrinsics: tests for TMC+Control Divergence

This commit is contained in:
felsabbagh3 2019-11-01 21:53:37 -04:00
parent 2b9f6f3d0b
commit bbb2373919
14 changed files with 588 additions and 260 deletions

View file

@ -60,15 +60,15 @@ module VX_execute_unit (
endgenerate
wire [$clog2(`NT)-1:0] branch_use_index;
wire branch_found_valid;
wire [$clog2(`NT)-1:0] jal_branch_use_index;
wire jal_branch_found_valid;
VX_generic_priority_encoder #(.N(`NT)) choose_alu_result(
.valids(VX_exec_unit_req.valid),
.index (branch_use_index),
.found (branch_found_valid)
.index (jal_branch_use_index),
.found (jal_branch_found_valid)
);
wire[31:0] branch_use_alu_result = alu_result[branch_use_index];
wire[31:0] branch_use_alu_result = alu_result[jal_branch_use_index];
reg temp_branch_dir;
always @(*)
@ -104,7 +104,7 @@ module VX_execute_unit (
// Jal rsp
assign VX_jal_rsp.jal = in_jal;
assign VX_jal_rsp.jal_dest = $signed(in_a_reg_data[0]) + $signed(in_jal_offset);
assign VX_jal_rsp.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset);
assign VX_jal_rsp.jal_warp_num = VX_exec_unit_req.warp_num;
// Branch rsp

View file

@ -57,6 +57,7 @@ module VX_fetch (
// Split
.is_split (VX_warp_ctl.is_split),
.dont_split (VX_warp_ctl.dont_split),
.split_new_mask (VX_warp_ctl.split_new_mask),
.split_later_mask (VX_warp_ctl.split_later_mask),
.split_save_pc (VX_warp_ctl.split_save_pc),

View file

@ -71,7 +71,8 @@ module VX_gpgpu_inst (
// wire[`NW_M1:0] num_valids = $countones(curr_valids);
assign VX_warp_ctl.is_split = is_split && (num_valids > 1) && (split_new_use_mask != 0) && (split_new_use_mask != {`NT{1'b1}});
assign VX_warp_ctl.is_split = is_split && (num_valids > 1);
assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NT{1'b1}}));
assign VX_warp_ctl.split_new_mask = split_new_use_mask;
assign VX_warp_ctl.split_later_mask = split_new_later_mask;
assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next;

View file

@ -29,6 +29,7 @@ module VX_warp_scheduler (
// Split
input wire is_split,
input wire dont_split,
input wire[`NT_M1:0] split_new_mask,
input wire[`NT_M1:0] split_later_mask,
input wire[31:0] split_save_pc,
@ -104,6 +105,8 @@ module VX_warp_scheduler (
reg[`NW-1:0] total_barrier_stall;
reg didnt_split;
/* verilator lint_off UNUSED */
// wire[$clog2(`NW):0] num_active;
/* verilator lint_on UNUSED */
@ -122,6 +125,7 @@ module VX_warp_scheduler (
visible_active[0] <= 1; // Activating first warp
thread_masks[0] <= 1; // Activating first thread in first warp
warp_stalled <= 0;
didnt_split <= 0;
// total_barrier_stall = 0;
for (curr_w_help = 1; curr_w_help < `NW; curr_w_help=curr_w_help+1) begin
warp_pcs[curr_w_help] <= 0;
@ -148,14 +152,20 @@ module VX_warp_scheduler (
end else if (ctm) begin
thread_masks[ctm_warp_num] <= ctm_mask;
warp_stalled[ctm_warp_num] <= 0;
end else if (is_join) begin
end else if (is_join && !didnt_split) begin
if (!join_fall) begin
warp_pcs[join_warp_num] <= join_pc;
end
thread_masks[join_warp_num] <= join_tm;
didnt_split <= 0;
end else if (is_split) begin
warp_stalled[split_warp_num] <= 0;
thread_masks[split_warp_num] <= split_new_mask;
if (!dont_split) begin
thread_masks[split_warp_num] <= split_new_mask;
didnt_split <= 0;
end else begin
didnt_split <= 1;
end
end
if (whalt) begin
@ -243,9 +253,9 @@ module VX_warp_scheduler (
wire correct_warp_s = (curr_warp == split_warp_num);
wire correct_warp_j = (curr_warp == join_warp_num);
wire push = is_split && correct_warp_s;
wire push = (is_split && !dont_split) && correct_warp_s;
wire pop = is_join && correct_warp_j;
VX_generic_stack #(.WIDTH(1+32+`NT), .DEPTH($clog2(`NT))) ipdom_stack(
VX_generic_stack #(.WIDTH(1+32+`NT), .DEPTH($clog2(`NT)+1)) ipdom_stack(
.clk (clk),
.reset(reset),
.push (push),

View file

@ -149,6 +149,23 @@ module VX_Cache_Bank
wire[31:0] lhu_data = (data_unQual & 32'hFFFF);
wire[31:0] lw_data = (data_unQual);
wire[31:0] sw_data = writedata;
wire[31:0] sb_data = b1 ? {{16{1'b0}}, writedata[7:0], { 8{1'b0}}} :
b2 ? {{ 8{1'b0}}, writedata[7:0], {16{1'b0}}} :
b3 ? {{ 0{1'b0}}, writedata[7:0], {24{1'b0}}} :
writedata;
wire[31:0] sh_data = b2 ? {writedata[15:0], {16{1'b0}}} : writedata;
wire[31:0] use_write_data = sb ? sb_data :
sh ? sh_data :
sw_data;
wire[31:0] data_Qual = lb ? lb_data :
lh ? lh_data :
lhu ? lhu_data :
@ -177,7 +194,7 @@ module VX_Cache_Bank
// assign we[g] = (normal_write || (write_from_mem)) ? 1'b1 : 1'b0;
assign data_write[g] = write_from_mem ? fetched_writedata[g] : writedata;
assign data_write[g] = write_from_mem ? fetched_writedata[g] : use_write_data;
assign way_to_update = write_from_mem ? evicted_way : update_way;
end

View file

@ -23,6 +23,7 @@ interface VX_warp_ctl_inter ();
wire[$clog2(`NW):0] num_warps;
wire is_split;
wire dont_split;
wire[`NW_M1:0] split_warp_num;
wire[`NT_M1:0] split_new_mask;
wire[`NT_M1:0] split_later_mask;

View file

@ -202,7 +202,7 @@ void io_handler(bool clk, bool io_valid, unsigned io_data)
void gracefulExit(int cycles)
{
fprintf(stderr, "\n*********************\n\n");
fprintf(stderr, "*********************\n\n");
fprintf(stderr, "DPI Cycle Num: %d\tVerilog Cycle Num: %d\n", num_cycles, cycles);
}

View file

@ -22,3 +22,13 @@ unsigned vx_threadID(void);
// Get hardware warp ID
unsigned vx_warpID(void);
#define __if(b) vx_split(b); \
if (b) \
#define __else else
#define __endif vx_join();

View file

@ -27,6 +27,7 @@ vx_barrier:
.global vx_split
vx_split:
.word 0x0005206b # split a0
ret
.type vx_join, @function
.global vx_join

View file

@ -3,40 +3,64 @@
.type _start, @function
.global _start
_start:
li a0, 4
.word 0x0005006b # tmc a0
csrr a1,0x20
slli a1, a1, 2
# li a0, 4
# .word 0x0005006b # tmc a0
# csrr a1,0x20
# slli a1, a1, 2
la a2, 0x20000000
add a2, a2, a1
sw a1, 0(a2)
# la a2, 0x20000000
# add a2, a2, a1
# sw a1, 0(a2)
la a2, 0x40000000
add a2, a2, a1
li a3, 5
sw a3, 0(a2)
# la a2, 0x40000000
# add a2, a2, a1
# li a3, 5
# sw a3, 0(a2)
la a2, 0x80000000
add a2, a2, a1
li a3, 7
sw a3, 0(a2)
# la a2, 0x80000000
# add a2, a2, a1
# li a3, 7
# sw a3, 0(a2)
la a2, 0x60000000
add a2, a2, a1
li a3, 7
sw a3, 0(a2)
# la a2, 0x60000000
# add a2, a2, a1
# li a3, 7
# sw a3, 0(a2)
la a2, 0x20000000
add a2, a2, a1
lw a4, 0(a2)
li a0, 0
.word 0x0005006b # tmc a0
##########################
# lui sp, 0x7ffff
# jal main
# la a2, 0x20000000
# add a2, a2, a1
# lw a4, 0(a2)
# li a0, 0
# .word 0x0005006b # tmc a0
##########################
# li a0, 4
# .word 0x0005006b # tmc 4
# csrr a1,0x20 # get tid
# slli a1, a1, 10 # multiply tid by 1024
# lui sp, 0x6ffff # load base sp
# sub sp, sp, a1 # sub sp - (1024*tid)
# la a2, 0x20000000
# csrr a3,0x20 # get tid
# slli a3, a3, 2
# add a2, a2, a3
# sw a3, 0(a2)
# lw a4, 0(a2)
# li a0, 0
# .word 0x0005006b # tmc a0
##########################
li a0, 4
.word 0x0005006b # tmc 4
csrr a1,0x20 # get tid
slli a1, a1, 10 # multiply tid by 1024
lui sp, 0x6ffff # load base sp
sub sp, sp, a1 # sub sp - (1024*tid)
li a0, 4
.word 0x0005006b # tmc 4
jal main
li a0, 0
.word 0x0005006b # tmc a0

View file

@ -8,140 +8,288 @@ Disassembly of section .text:
80000000: 00400513 li a0,4
80000004: 0005006b 0x5006b
80000008: 020025f3 csrr a1,0x20
8000000c: 00259593 slli a1,a1,0x2
80000010: 20000637 lui a2,0x20000
80000014: 00b60633 add a2,a2,a1
80000018: 00b62023 sw a1,0(a2) # 20000000 <_start-0x60000000>
8000001c: 40000637 lui a2,0x40000
80000020: 00b60633 add a2,a2,a1
80000024: 00500693 li a3,5
80000028: 00d62023 sw a3,0(a2) # 40000000 <_start-0x40000000>
8000002c: 80000637 lui a2,0x80000
80000030: 00b60633 add a2,a2,a1
80000034: 00700693 li a3,7
80000038: 00d62023 sw a3,0(a2) # 80000000 <arr+0xfefffefc>
8000003c: 60000637 lui a2,0x60000
80000040: 00b60633 add a2,a2,a1
80000044: 00700693 li a3,7
80000048: 00d62023 sw a3,0(a2) # 60000000 <_start-0x20000000>
8000004c: 20000637 lui a2,0x20000
80000050: 00b60633 add a2,a2,a1
80000054: 00062703 lw a4,0(a2) # 20000000 <_start-0x60000000>
80000058: 00000513 li a0,0
8000005c: 0005006b 0x5006b
8000000c: 00a59593 slli a1,a1,0xa
80000010: 6ffff137 lui sp,0x6ffff
80000014: 40b10133 sub sp,sp,a1
80000018: 00400513 li a0,4
8000001c: 0005006b 0x5006b
80000020: 380000ef jal ra,800003a0 <main>
80000024: 00000513 li a0,0
80000028: 0005006b 0x5006b
80000060 <vx_wsapwn>:
80000060: 00b5106b 0xb5106b
80000064: 00008067 ret
8000002c <vx_wsapwn>:
8000002c: 00b5106b 0xb5106b
80000030: 00008067 ret
80000068 <vx_tmc>:
80000068: 0005006b 0x5006b
8000006c: 00008067 ret
80000034 <vx_tmc>:
80000034: 0005006b 0x5006b
80000038: 00008067 ret
80000070 <vx_barrier>:
80000070: 00b5406b 0xb5406b
80000074: 00008067 ret
8000003c <vx_barrier>:
8000003c: 00b5406b 0xb5406b
80000040: 00008067 ret
80000078 <vx_split>:
80000078: 0005206b 0x5206b
80000044 <vx_split>:
80000044: 0005206b 0x5206b
80000048: 00008067 ret
8000007c <vx_join>:
8000007c: 0000306b 0x306b
80000080: 00008067 ret
8000004c <vx_join>:
8000004c: 0000306b 0x306b
80000050: 00008067 ret
80000084 <vx_warpID>:
80000084: 02102573 csrr a0,0x21
80000088: 00008067 ret
80000054 <vx_warpID>:
80000054: 02102573 csrr a0,0x21
80000058: 00008067 ret
8000008c <vx_threadID>:
8000008c: 02002573 csrr a0,0x20
8000005c <vx_threadID>:
8000005c: 02002573 csrr a0,0x20
80000060: 00008067 ret
80000064 <vx_print_str>:
80000064: ff410113 addi sp,sp,-12 # 6fffeff4 <_start-0x1000100c>
80000068: 00112023 sw ra,0(sp)
8000006c: 00b12223 sw a1,4(sp)
80000070 <bl>:
80000070: 00054583 lbu a1,0(a0)
80000074: 00058863 beqz a1,80000084 <be>
80000078: 01c000ef jal ra,80000094 <vx_printc>
8000007c: 00150513 addi a0,a0,1
80000080: ff1ff06f j 80000070 <bl>
80000084 <be>:
80000084: 00012083 lw ra,0(sp)
80000088: 00412583 lw a1,4(sp)
8000008c: 00c10113 addi sp,sp,12
80000090: 00008067 ret
80000094 <vx_print_str>:
80000094: ff410113 addi sp,sp,-12
80000098: 00112023 sw ra,0(sp)
8000009c: 00b12223 sw a1,4(sp)
80000094 <vx_printc>:
80000094: 000102b7 lui t0,0x10
80000098: 00b2a023 sw a1,0(t0) # 10000 <_start-0x7fff0000>
8000009c: 00008067 ret
800000a0 <bl>:
800000a0: 00054583 lbu a1,0(a0)
800000a4: 00058863 beqz a1,800000b4 <be>
800000a8: 01c000ef jal ra,800000c4 <vx_printc>
800000ac: 00150513 addi a0,a0,1
800000b0: ff1ff06f j 800000a0 <bl>
800000a0 <vx_print_hex>:
800000a0: fe010113 addi sp,sp,-32
800000a4: 00112e23 sw ra,28(sp)
800000a8: 00812c23 sw s0,24(sp)
800000ac: 02010413 addi s0,sp,32
800000b0: fea42623 sw a0,-20(s0)
800000b4: 810007b7 lui a5,0x81000
800000b8: fec42703 lw a4,-20(s0)
800000bc: 00271713 slli a4,a4,0x2
800000c0: 0ac78793 addi a5,a5,172 # 810000ac <arr+0xffffff80>
800000c4: 00f707b3 add a5,a4,a5
800000c8: 0007a783 lw a5,0(a5)
800000cc: 00078513 mv a0,a5
800000d0: f95ff0ef jal ra,80000064 <vx_print_str>
800000d4: 00000013 nop
800000d8: 01c12083 lw ra,28(sp)
800000dc: 01812403 lw s0,24(sp)
800000e0: 02010113 addi sp,sp,32
800000e4: 00008067 ret
800000b4 <be>:
800000b4: 00012083 lw ra,0(sp)
800000b8: 00412583 lw a1,4(sp)
800000bc: 00c10113 addi sp,sp,12
800000c0: 00008067 ret
800000e8 <vx_printf>:
800000e8: fe010113 addi sp,sp,-32
800000ec: 00112e23 sw ra,28(sp)
800000f0: 00812c23 sw s0,24(sp)
800000f4: 02010413 addi s0,sp,32
800000f8: fea42623 sw a0,-20(s0)
800000fc: feb42423 sw a1,-24(s0)
80000100: fec42503 lw a0,-20(s0)
80000104: f61ff0ef jal ra,80000064 <vx_print_str>
80000108: fe842503 lw a0,-24(s0)
8000010c: f95ff0ef jal ra,800000a0 <vx_print_hex>
80000110: 810007b7 lui a5,0x81000
80000114: 04078513 addi a0,a5,64 # 81000040 <arr+0xffffff14>
80000118: f4dff0ef jal ra,80000064 <vx_print_str>
8000011c: 00000013 nop
80000120: 01c12083 lw ra,28(sp)
80000124: 01812403 lw s0,24(sp)
80000128: 02010113 addi sp,sp,32
8000012c: 00008067 ret
800000c4 <vx_printc>:
800000c4: 000102b7 lui t0,0x10
800000c8: 00b2a023 sw a1,0(t0) # 10000 <_start-0x7fff0000>
800000cc: 00008067 ret
800000d0 <vx_print_hex>:
800000d0: fe010113 addi sp,sp,-32
800000d4: 00112e23 sw ra,28(sp)
800000d8: 00812c23 sw s0,24(sp)
800000dc: 02010413 addi s0,sp,32
800000e0: fea42623 sw a0,-20(s0)
800000e4: 810007b7 lui a5,0x81000
800000e8: fec42703 lw a4,-20(s0)
800000ec: 00271713 slli a4,a4,0x2
800000f0: 08478793 addi a5,a5,132 # 81000084 <arr+0xffffff80>
800000f4: 00f707b3 add a5,a4,a5
800000f8: 0007a783 lw a5,0(a5)
800000fc: 00078513 mv a0,a5
80000100: f95ff0ef jal ra,80000094 <vx_print_str>
80000104: 00000013 nop
80000108: 01c12083 lw ra,28(sp)
8000010c: 01812403 lw s0,24(sp)
80000110: 02010113 addi sp,sp,32
80000114: 00008067 ret
80000118 <vx_printf>:
80000118: fe010113 addi sp,sp,-32
8000011c: 00112e23 sw ra,28(sp)
80000120: 00812c23 sw s0,24(sp)
80000124: 02010413 addi s0,sp,32
80000128: fea42623 sw a0,-20(s0)
8000012c: feb42423 sw a1,-24(s0)
80000130: fec42503 lw a0,-20(s0)
80000134: f61ff0ef jal ra,80000094 <vx_print_str>
80000138: fe842503 lw a0,-24(s0)
8000013c: f95ff0ef jal ra,800000d0 <vx_print_hex>
80000130 <test_tmc>:
80000130: fe010113 addi sp,sp,-32
80000134: 00112e23 sw ra,28(sp)
80000138: 00812c23 sw s0,24(sp)
8000013c: 02010413 addi s0,sp,32
80000140: 810007b7 lui a5,0x81000
80000144: 04078513 addi a0,a5,64 # 81000040 <arr+0xffffff3c>
80000148: f4dff0ef jal ra,80000094 <vx_print_str>
8000014c: 00000013 nop
80000150: 01c12083 lw ra,28(sp)
80000154: 01812403 lw s0,24(sp)
80000158: 02010113 addi sp,sp,32
8000015c: 00008067 ret
80000144: 08478513 addi a0,a5,132 # 81000084 <arr+0xffffff58>
80000148: f1dff0ef jal ra,80000064 <vx_print_str>
8000014c: 00400513 li a0,4
80000150: ee5ff0ef jal ra,80000034 <vx_tmc>
80000154: f09ff0ef jal ra,8000005c <vx_threadID>
80000158: fea42623 sw a0,-20(s0)
8000015c: fec42703 lw a4,-20(s0)
80000160: 810007b7 lui a5,0x81000
80000164: fec42683 lw a3,-20(s0)
80000168: 00269693 slli a3,a3,0x2
8000016c: 12c78793 addi a5,a5,300 # 8100012c <arr+0x0>
80000170: 00f687b3 add a5,a3,a5
80000174: 00e7a023 sw a4,0(a5)
80000178: 00100513 li a0,1
8000017c: eb9ff0ef jal ra,80000034 <vx_tmc>
80000180: 810007b7 lui a5,0x81000
80000184: 12c7a783 lw a5,300(a5) # 8100012c <arr+0x0>
80000188: 00078513 mv a0,a5
8000018c: f15ff0ef jal ra,800000a0 <vx_print_hex>
80000190: 810007b7 lui a5,0x81000
80000194: 09078513 addi a0,a5,144 # 81000090 <arr+0xffffff64>
80000198: ecdff0ef jal ra,80000064 <vx_print_str>
8000019c: 810007b7 lui a5,0x81000
800001a0: 12c78793 addi a5,a5,300 # 8100012c <arr+0x0>
800001a4: 0047a783 lw a5,4(a5)
800001a8: 00078513 mv a0,a5
800001ac: ef5ff0ef jal ra,800000a0 <vx_print_hex>
800001b0: 810007b7 lui a5,0x81000
800001b4: 09078513 addi a0,a5,144 # 81000090 <arr+0xffffff64>
800001b8: eadff0ef jal ra,80000064 <vx_print_str>
800001bc: 810007b7 lui a5,0x81000
800001c0: 12c78793 addi a5,a5,300 # 8100012c <arr+0x0>
800001c4: 0087a783 lw a5,8(a5)
800001c8: 00078513 mv a0,a5
800001cc: ed5ff0ef jal ra,800000a0 <vx_print_hex>
800001d0: 810007b7 lui a5,0x81000
800001d4: 09078513 addi a0,a5,144 # 81000090 <arr+0xffffff64>
800001d8: e8dff0ef jal ra,80000064 <vx_print_str>
800001dc: 810007b7 lui a5,0x81000
800001e0: 12c78793 addi a5,a5,300 # 8100012c <arr+0x0>
800001e4: 00c7a783 lw a5,12(a5)
800001e8: 00078513 mv a0,a5
800001ec: eb5ff0ef jal ra,800000a0 <vx_print_hex>
800001f0: 810007b7 lui a5,0x81000
800001f4: 09078513 addi a0,a5,144 # 81000090 <arr+0xffffff64>
800001f8: e6dff0ef jal ra,80000064 <vx_print_str>
800001fc: 00000013 nop
80000200: 01c12083 lw ra,28(sp)
80000204: 01812403 lw s0,24(sp)
80000208: 02010113 addi sp,sp,32
8000020c: 00008067 ret
80000160 <main>:
80000160: fe010113 addi sp,sp,-32
80000164: 00112e23 sw ra,28(sp)
80000168: 00812c23 sw s0,24(sp)
8000016c: 02010413 addi s0,sp,32
80000170: 00400513 li a0,4
80000174: ef5ff0ef jal ra,80000068 <vx_tmc>
80000178: f15ff0ef jal ra,8000008c <vx_threadID>
8000017c: fea42623 sw a0,-20(s0)
80000180: fec42703 lw a4,-20(s0)
80000184: 810007b7 lui a5,0x81000
80000188: fec42683 lw a3,-20(s0)
8000018c: 00269693 slli a3,a3,0x2
80000190: 10478793 addi a5,a5,260 # 81000104 <arr+0x0>
80000194: 00f687b3 add a5,a3,a5
80000198: 00e7a023 sw a4,0(a5)
8000019c: 00000793 li a5,0
800001a0: 00078513 mv a0,a5
800001a4: 01c12083 lw ra,28(sp)
800001a8: 01812403 lw s0,24(sp)
800001ac: 02010113 addi sp,sp,32
800001b0: 00008067 ret
80000210 <test_divergence>:
80000210: fe010113 addi sp,sp,-32
80000214: 00112e23 sw ra,28(sp)
80000218: 00812c23 sw s0,24(sp)
8000021c: 02010413 addi s0,sp,32
80000220: e3dff0ef jal ra,8000005c <vx_threadID>
80000224: fea42623 sw a0,-20(s0)
80000228: fec42783 lw a5,-20(s0)
8000022c: 0027b793 sltiu a5,a5,2
80000230: fef405a3 sb a5,-21(s0)
80000234: feb44783 lbu a5,-21(s0)
80000238: 00078513 mv a0,a5
8000023c: e09ff0ef jal ra,80000044 <vx_split>
80000240: feb44783 lbu a5,-21(s0)
80000244: 06078463 beqz a5,800002ac <test_divergence+0x9c>
80000248: fec42783 lw a5,-20(s0)
8000024c: 0017b793 seqz a5,a5
80000250: fef40523 sb a5,-22(s0)
80000254: fea44783 lbu a5,-22(s0)
80000258: 00078513 mv a0,a5
8000025c: de9ff0ef jal ra,80000044 <vx_split>
80000260: fea44783 lbu a5,-22(s0)
80000264: 02078263 beqz a5,80000288 <test_divergence+0x78>
80000268: 810007b7 lui a5,0x81000
8000026c: fec42703 lw a4,-20(s0)
80000270: 00271713 slli a4,a4,0x2
80000274: 12c78793 addi a5,a5,300 # 8100012c <arr+0x0>
80000278: 00f707b3 add a5,a4,a5
8000027c: 00a00713 li a4,10
80000280: 00e7a023 sw a4,0(a5)
80000284: 0200006f j 800002a4 <test_divergence+0x94>
80000288: 810007b7 lui a5,0x81000
8000028c: fec42703 lw a4,-20(s0)
80000290: 00271713 slli a4,a4,0x2
80000294: 12c78793 addi a5,a5,300 # 8100012c <arr+0x0>
80000298: 00f707b3 add a5,a4,a5
8000029c: 00b00713 li a4,11
800002a0: 00e7a023 sw a4,0(a5)
800002a4: da9ff0ef jal ra,8000004c <vx_join>
800002a8: 0640006f j 8000030c <test_divergence+0xfc>
800002ac: fec42783 lw a5,-20(s0)
800002b0: 0037b793 sltiu a5,a5,3
800002b4: fef404a3 sb a5,-23(s0)
800002b8: fe944783 lbu a5,-23(s0)
800002bc: 00078513 mv a0,a5
800002c0: d85ff0ef jal ra,80000044 <vx_split>
800002c4: fe944783 lbu a5,-23(s0)
800002c8: 02078263 beqz a5,800002ec <test_divergence+0xdc>
800002cc: 810007b7 lui a5,0x81000
800002d0: fec42703 lw a4,-20(s0)
800002d4: 00271713 slli a4,a4,0x2
800002d8: 12c78793 addi a5,a5,300 # 8100012c <arr+0x0>
800002dc: 00f707b3 add a5,a4,a5
800002e0: 00c00713 li a4,12
800002e4: 00e7a023 sw a4,0(a5)
800002e8: 0200006f j 80000308 <test_divergence+0xf8>
800002ec: 810007b7 lui a5,0x81000
800002f0: fec42703 lw a4,-20(s0)
800002f4: 00271713 slli a4,a4,0x2
800002f8: 12c78793 addi a5,a5,300 # 8100012c <arr+0x0>
800002fc: 00f707b3 add a5,a4,a5
80000300: 00d00713 li a4,13
80000304: 00e7a023 sw a4,0(a5)
80000308: d45ff0ef jal ra,8000004c <vx_join>
8000030c: d41ff0ef jal ra,8000004c <vx_join>
80000310: 810007b7 lui a5,0x81000
80000314: 12c7a783 lw a5,300(a5) # 8100012c <arr+0x0>
80000318: 00078513 mv a0,a5
8000031c: d85ff0ef jal ra,800000a0 <vx_print_hex>
80000320: 810007b7 lui a5,0x81000
80000324: 09078513 addi a0,a5,144 # 81000090 <arr+0xffffff64>
80000328: d3dff0ef jal ra,80000064 <vx_print_str>
8000032c: 810007b7 lui a5,0x81000
80000330: 12c78793 addi a5,a5,300 # 8100012c <arr+0x0>
80000334: 0047a783 lw a5,4(a5)
80000338: 00078513 mv a0,a5
8000033c: d65ff0ef jal ra,800000a0 <vx_print_hex>
80000340: 810007b7 lui a5,0x81000
80000344: 09078513 addi a0,a5,144 # 81000090 <arr+0xffffff64>
80000348: d1dff0ef jal ra,80000064 <vx_print_str>
8000034c: 810007b7 lui a5,0x81000
80000350: 12c78793 addi a5,a5,300 # 8100012c <arr+0x0>
80000354: 0087a783 lw a5,8(a5)
80000358: 00078513 mv a0,a5
8000035c: d45ff0ef jal ra,800000a0 <vx_print_hex>
80000360: 810007b7 lui a5,0x81000
80000364: 09078513 addi a0,a5,144 # 81000090 <arr+0xffffff64>
80000368: cfdff0ef jal ra,80000064 <vx_print_str>
8000036c: 810007b7 lui a5,0x81000
80000370: 12c78793 addi a5,a5,300 # 8100012c <arr+0x0>
80000374: 00c7a783 lw a5,12(a5)
80000378: 00078513 mv a0,a5
8000037c: d25ff0ef jal ra,800000a0 <vx_print_hex>
80000380: 810007b7 lui a5,0x81000
80000384: 09078513 addi a0,a5,144 # 81000090 <arr+0xffffff64>
80000388: cddff0ef jal ra,80000064 <vx_print_str>
8000038c: 00000013 nop
80000390: 01c12083 lw ra,28(sp)
80000394: 01812403 lw s0,24(sp)
80000398: 02010113 addi sp,sp,32
8000039c: 00008067 ret
800003a0 <main>:
800003a0: ff010113 addi sp,sp,-16
800003a4: 00112623 sw ra,12(sp)
800003a8: 00812423 sw s0,8(sp)
800003ac: 01010413 addi s0,sp,16
800003b0: 00100513 li a0,1
800003b4: c81ff0ef jal ra,80000034 <vx_tmc>
800003b8: d79ff0ef jal ra,80000130 <test_tmc>
800003bc: 810007b7 lui a5,0x81000
800003c0: 09478513 addi a0,a5,148 # 81000094 <arr+0xffffff68>
800003c4: ca1ff0ef jal ra,80000064 <vx_print_str>
800003c8: 00400513 li a0,4
800003cc: c69ff0ef jal ra,80000034 <vx_tmc>
800003d0: e41ff0ef jal ra,80000210 <test_divergence>
800003d4: 00100513 li a0,1
800003d8: c5dff0ef jal ra,80000034 <vx_tmc>
800003dc: 00000793 li a5,0
800003e0: 00078513 mv a0,a5
800003e4: 00c12083 lw ra,12(sp)
800003e8: 00812403 lw s0,8(sp)
800003ec: 01010113 addi sp,sp,16
800003f0: 00008067 ret
Disassembly of section .rodata:
@ -168,7 +316,7 @@ Disassembly of section .rodata:
8100002a: 0000 unimp
8100002c: 0062 c.slli zero,0x18
8100002e: 0000 unimp
81000030: 00000063 beqz zero,81000030 <main+0xfffed0>
81000030: 00000063 beqz zero,81000030 <main+0xfffc90>
81000034: 0064 addi s1,sp,12
81000036: 0000 unimp
81000038: 0065 c.nop 25
@ -199,86 +347,104 @@ Disassembly of section .rodata:
8100006e: 0000 unimp
81000070: 0062 c.slli zero,0x18
81000072: 0000 unimp
81000074: 00000063 beqz zero,81000074 <main+0xffff14>
81000074: 00000063 beqz zero,81000074 <main+0xfffcd4>
81000078: 0064 addi s1,sp,12
8100007a: 0000 unimp
8100007c: 0065 c.nop 25
8100007e: 0000 unimp
81000080: 0066 c.slli zero,0x19
81000082: 0000 unimp
81000084: 6574 flw fa3,76(a0)
81000086: 745f7473 csrrci s0,0x745,30
8100008a: 636d lui t1,0x1b
8100008c: 000a c.slli zero,0x2
8100008e: 0000 unimp
81000090: 000a c.slli zero,0x2
81000092: 0000 unimp
81000094: 6e32 flw ft8,12(sp)
81000096: 7765 lui a4,0xffff9
81000098: 7420 flw fs0,104(s0)
8100009a: 7365 lui t1,0xffff9
8100009c: 5f74 lw a3,124(a4)
8100009e: 6964 flw fs1,84(a0)
810000a0: 6576 flw fa0,92(sp)
810000a2: 6772 flw fa4,28(sp)
810000a4: 6e65 lui t3,0x19
810000a6: 000a6563 bltu s4,zero,810000b0 <hextoa+0x4>
Disassembly of section .data:
81000084 <hextoa>:
81000084: 0000 unimp
81000086: 8100 0x8100
81000088: 0004 0x4
8100008a: 8100 0x8100
8100008c: 0008 0x8
8100008e: 8100 0x8100
81000090: 000c 0xc
81000092: 8100 0x8100
81000094: 0010 0x10
81000096: 8100 0x8100
81000098: 0014 0x14
8100009a: 8100 0x8100
8100009c: 0018 0x18
8100009e: 8100 0x8100
810000a0: 001c 0x1c
810000a2: 8100 0x8100
810000a4: 0020 addi s0,sp,8
810000a6: 8100 0x8100
810000a8: 0024 addi s1,sp,8
810000aa: 8100 0x8100
810000ac: 0028 addi a0,sp,8
810000ac <hextoa>:
810000ac: 0000 unimp
810000ae: 8100 0x8100
810000b0: 002c addi a1,sp,8
810000b0: 0004 0x4
810000b2: 8100 0x8100
810000b4: 0030 addi a2,sp,8
810000b4: 0008 0x8
810000b6: 8100 0x8100
810000b8: 0034 addi a3,sp,8
810000b8: 000c 0xc
810000ba: 8100 0x8100
810000bc: 0038 addi a4,sp,8
810000bc: 0010 0x10
810000be: 8100 0x8100
810000c0: 003c addi a5,sp,8
810000c0: 0014 0x14
810000c2: 8100 0x8100
810000c4 <hextoa>:
810000c4: 0044 addi s1,sp,4
810000c4: 0018 0x18
810000c6: 8100 0x8100
810000c8: 0048 addi a0,sp,4
810000c8: 001c 0x1c
810000ca: 8100 0x8100
810000cc: 004c addi a1,sp,4
810000cc: 0020 addi s0,sp,8
810000ce: 8100 0x8100
810000d0: 0050 addi a2,sp,4
810000d0: 0024 addi s1,sp,8
810000d2: 8100 0x8100
810000d4: 0054 addi a3,sp,4
810000d4: 0028 addi a0,sp,8
810000d6: 8100 0x8100
810000d8: 0058 addi a4,sp,4
810000d8: 002c addi a1,sp,8
810000da: 8100 0x8100
810000dc: 005c addi a5,sp,4
810000dc: 0030 addi a2,sp,8
810000de: 8100 0x8100
810000e0: 0060 addi s0,sp,12
810000e0: 0034 addi a3,sp,8
810000e2: 8100 0x8100
810000e4: 0064 addi s1,sp,12
810000e4: 0038 addi a4,sp,8
810000e6: 8100 0x8100
810000e8: 0068 addi a0,sp,12
810000e8: 003c addi a5,sp,8
810000ea: 8100 0x8100
810000ec: 006c addi a1,sp,12
810000ec <hextoa>:
810000ec: 0044 addi s1,sp,4
810000ee: 8100 0x8100
810000f0: 0070 addi a2,sp,12
810000f0: 0048 addi a0,sp,4
810000f2: 8100 0x8100
810000f4: 0074 addi a3,sp,12
810000f4: 004c addi a1,sp,4
810000f6: 8100 0x8100
810000f8: 0078 addi a4,sp,12
810000f8: 0050 addi a2,sp,4
810000fa: 8100 0x8100
810000fc: 007c addi a5,sp,12
810000fc: 0054 addi a3,sp,4
810000fe: 8100 0x8100
81000100: 0080 addi s0,sp,64
81000100: 0058 addi a4,sp,4
81000102: 8100 0x8100
81000104: 005c addi a5,sp,4
81000106: 8100 0x8100
81000108: 0060 addi s0,sp,12
8100010a: 8100 0x8100
8100010c: 0064 addi s1,sp,12
8100010e: 8100 0x8100
81000110: 0068 addi a0,sp,12
81000112: 8100 0x8100
81000114: 006c addi a1,sp,12
81000116: 8100 0x8100
81000118: 0070 addi a2,sp,12
8100011a: 8100 0x8100
8100011c: 0074 addi a3,sp,12
8100011e: 8100 0x8100
81000120: 0078 addi a4,sp,12
81000122: 8100 0x8100
81000124: 007c addi a5,sp,12
81000126: 8100 0x8100
81000128: 0080 addi s0,sp,64
8100012a: 8100 0x8100
Disassembly of section .bss:
81000104 <arr>:
8100012c <arr>:
...
Disassembly of section .comment:

Binary file not shown.

View file

@ -1,32 +1,68 @@
:0200000480007A
:10000000130540006B000500F325000293952500C1
:10001000370600203306B6002320B600370600401E
:100020003306B600930650002320D6003706008022
:100030003306B600930670002320D6003706006012
:100040003306B600930670002320D6003706002042
:100050003306B60003270600130500006B000500F9
:100060006B10B500678000006B0005006780000022
:100070006B40B500678000006B2005006B3000000E
:10008000678000007325100267800000732500025E
:1000900067800000130141FF232011002322B100DB
:1000A0008345050063880500EF00C00113051500B6
:1000B0006FF01FFF83200100832541001301C10061
:1000C00067800000B702010023A0B2006780000033
:1000D000130101FE232E1100232C810013040102C1
:1000E0002326A4FEB70700810327C4FE13172700A9
:1000F00093874708B307F70083A707001385070016
:10010000EFF05FF9130000008320C1010324810197
:100110001301010267800000130101FE232E11006C
:10012000232C8100130401022326A4FE2324B4FE01
:100130000325C4FEEFF01FF6032584FEEFF05FF900
:10014000B707008113850704EFF0DFF41300000008
:100150008320C10103248101130101026780000093
:10016000130101FE232E1100232C81001304010230
:1001700013054000EFF05FEFEFF05FF12326A4FEE0
:100180000327C4FEB70700818326C4FE939626008A
:1001900093874710B387F60023A0E700930700007A
:1001A000138507008320C10103248101130101028B
:0401B0006780000064
:10000000130540006B000500F32500029395A50041
:1000100037F1FF6F3301B140130540006B0005005D
:10002000EF000038130500006B0005006B10B500F1
:10003000678000006B000500678000006B40B50022
:10004000678000006B200500678000006B300000B7
:10005000678000007325100267800000732500028E
:1000600067800000130141FF232011002322B1000B
:100070008345050063880500EF00C00113051500E6
:100080006FF01FFF83200100832541001301C10091
:1000900067800000B702010023A0B2006780000063
:1000A000130101FE232E1100232C810013040102F1
:1000B0002326A4FEB70700810327C4FE13172700D9
:1000C0009387C70AB307F70083A7070013850700C4
:1000D000EFF05FF9130000008320C10103248101C8
:1000E0001301010267800000130101FE232E11009D
:1000F000232C8100130401022326A4FE2324B4FE32
:100100000325C4FEEFF01FF6032584FEEFF05FF930
:10011000B707008113850704EFF0DFF41300000038
:100120008320C101032481011301010267800000C3
:10013000130101FE232E1100232C81001304010260
:10014000B707008113854708EFF0DFF11305400082
:10015000EFF05FEEEFF09FF02326A4FE0327C4FE2E
:10016000B70700818326C4FE939626009387C712A3
:10017000B387F60023A0E70013051000EFF09FEB14
:10018000B707008183A7C71213850700EFF05FF15F
:10019000B707008113850709EFF0DFECB70700818F
:1001A0009387C71283A7470013850700EFF05FEF1F
:1001B000B707008113850709EFF0DFEAB707008171
:1001C0009387C71283A7870013850700EFF05FEDC1
:1001D000B707008113850709EFF0DFE8B707008153
:1001E0009387C71283A7C70013850700EFF05FEB63
:1001F000B707008113850709EFF0DFE61300000061
:100200008320C101032481011301010267800000E2
:10021000130101FE232E1100232C8100130401027F
:10022000EFF0DFE32326A4FE8327C4FE93B7270065
:10023000A305F4FE8347B4FE13850700EFF09FE0AB
:100240008347B4FE638407068327C4FE93B7170071
:100250002305F4FE8347A4FE13850700EFF09FDE1D
:100260008347A4FE63820702B70700810327C4FE09
:10027000131727009387C712B307F7001307A000CF
:1002800023A0E7006F000002B70700810327C4FE28
:10029000131727009387C712B307F7001307B0009F
:1002A00023A0E700EFF09FDA6F0040068327C4FE2B
:1002B00093B73700A304F4FE834794FE1385070029
:1002C000EFF05FD8834794FE63820702B70700818F
:1002D0000327C4FE131727009387C712B307F7003D
:1002E0001307C00023A0E7006F000002B7070081DA
:1002F0000327C4FE131727009387C712B307F7001D
:100300001307D00023A0E700EFF05FD4EFF01FD475
:10031000B707008183A7C71213850700EFF05FD8E6
:10032000B707008113850709EFF0DFD3B707008116
:100330009387C71283A7470013850700EFF05FD6A6
:10034000B707008113850709EFF0DFD1B7070081F8
:100350009387C71283A7870013850700EFF05FD448
:10036000B707008113850709EFF0DFCFB7070081DA
:100370009387C71283A7C70013850700EFF05FD2EA
:10038000B707008113850709EFF0DFCD13000000E8
:100390008320C10103248101130101026780000051
:1003A000130101FF232611002324810013040101FE
:1003B00013051000EFF01FC8EFF09FD7B7070081BB
:1003C00013854709EFF01FCA13054000EFF09FC6E1
:1003D000EFF01FE413051000EFF0DFC593070000F6
:1003E000138507008320C10003248100130101014C
:0403F0006780000022
:02000004810079
:10000000300000003100000032000000330000002A
:10001000340000003500000036000000370000000A
@ -36,14 +72,16 @@
:1000500033000000340000003500000036000000CE
:100060003700000038000000390000006100000087
:1000700062000000630000006400000065000000F2
:02008000660018
:100084000000008104000081080000810C00008150
:100094001000008114000081180000811C00008100
:1000A4002000008124000081280000812C000081B0
:1000B4003000008134000081380000813C00008160
:1000C40044000081480000814C0000815000008100
:1000D40054000081580000815C00008160000081B0
:1000E40064000081680000816C0000817000008160
:1000F40074000081780000817C0000818000008110
:1000800066000000746573745F746D630A0000009D
:100090000A000000326E657720746573745F6469CE
:0A00A00076657267656E63650A00FD
:1000AC000000008104000081080000810C00008128
:1000BC001000008114000081180000811C000081D8
:1000CC002000008124000081280000812C00008188
:1000DC003000008134000081380000813C00008138
:1000EC0044000081480000814C00008150000081D8
:1000FC0054000081580000815C0000816000008188
:10010C0064000081680000816C0000817000008137
:10011C0074000081780000817C00008180000081E7
:040000058000000077
:00000001FF

View file

@ -4,27 +4,86 @@
int arr[4];
int main()
{
// vx_print_str("Hello from runtime\n");
vx_tmc(4); // Activate 4 threads
void test_tmc()
{
vx_print_str("test_tmc\n");
vx_tmc(4);
unsigned tid = vx_threadID(); // Get TID
arr[tid] = tid;
// vx_tmc(1);
vx_tmc(1);
// vx_print_hex(arr[0]);
// vx_print_str("\n");
// vx_print_hex(arr[1]);
// vx_print_str("\n");
// vx_print_hex(arr[2]);
// vx_print_str("\n");
// vx_print_hex(arr[3]);
// vx_print_str("\n");
vx_print_hex(arr[0]);
vx_print_str("\n");
vx_print_hex(arr[1]);
vx_print_str("\n");
vx_print_hex(arr[2]);
vx_print_str("\n");
vx_print_hex(arr[3]);
vx_print_str("\n");
return;
}
void test_divergence()
{
unsigned tid = vx_threadID(); // Get TID
bool b = tid < 2;
__if (b)
{
bool c = tid < 1;
__if (c)
{
arr[tid] = 10;
}
__else
{
arr[tid] = 11;
}
__endif
}
__else
{
bool c = tid < 3;
__if (c)
{
arr[tid] = 12;
}
__else
{
arr[tid] = 13;
}
__endif
}
__endif
vx_print_hex(arr[0]);
vx_print_str("\n");
vx_print_hex(arr[1]);
vx_print_str("\n");
vx_print_hex(arr[2]);
vx_print_str("\n");
vx_print_hex(arr[3]);
vx_print_str("\n");
}
int main()
{
vx_tmc(1);
// TMC test
test_tmc();
// Control Divergence Test
vx_print_str("2new test_divergence\n");
vx_tmc(4);
test_divergence();
vx_tmc(1);
return 0;