mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
Fix for Single-Threaded
This commit is contained in:
parent
10ebfd7e24
commit
82ea79c680
16 changed files with 46894 additions and 46887 deletions
|
@ -311,8 +311,8 @@ module VX_bank
|
|||
// assign is_fill_in_pipe = (|is_fill_st1) || is_fill_st2;
|
||||
|
||||
|
||||
assign dfpq_pop = !dfpq_empty && !stall_bank_pipe && !dfpq_hazard_st0;
|
||||
assign mrvq_pop = !dfpq_pop && mrvq_valid_st0 && !stall_bank_pipe && !mrvq_hazard_st0;
|
||||
assign mrvq_pop = mrvq_valid_st0 && !stall_bank_pipe && !mrvq_hazard_st0;
|
||||
assign dfpq_pop = !mrvq_pop && !dfpq_empty && !stall_bank_pipe && !dfpq_hazard_st0;
|
||||
assign reqq_pop = !mrvq_pop && !dfpq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !(reqq_hazard_st0 || (mrvq_valid_st0 && mrvq_hazard_st0)) && !is_fill_in_pipe;
|
||||
assign snrq_pop = !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && snrq_valid_st0 && !stall_bank_pipe && !snrq_hazard_st0;
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ module VX_fill_invalidator
|
|||
|
||||
if (success_fill) begin
|
||||
success_found = 1;
|
||||
success_index = curr_fill[(`vx_clog2(FILL_INVALIDAOR_SIZE))-1:0];
|
||||
success_index = curr_fill;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -73,12 +73,12 @@ module VX_tag_data_access
|
|||
);
|
||||
|
||||
|
||||
reg[`DBANK_LINE_SIZE_RNG][31:0] readdata_st[STAGE_1_CYCLES-2:0];
|
||||
reg[`DBANK_LINE_SIZE_RNG][31:0] readdata_st[STAGE_1_CYCLES-1:0];
|
||||
|
||||
reg read_valid_st1c[STAGE_1_CYCLES-2:0];
|
||||
reg read_dirty_st1c[STAGE_1_CYCLES-2:0];
|
||||
reg[`TAG_SELECT_SIZE_RNG] read_tag_st1c [STAGE_1_CYCLES-2:0];
|
||||
reg[`DBANK_LINE_SIZE_RNG][31:0] read_data_st1c [STAGE_1_CYCLES-2:0];
|
||||
reg read_valid_st1c[STAGE_1_CYCLES-1:0];
|
||||
reg read_dirty_st1c[STAGE_1_CYCLES-1:0];
|
||||
reg[`TAG_SELECT_SIZE_RNG] read_tag_st1c [STAGE_1_CYCLES-1:0];
|
||||
reg[`DBANK_LINE_SIZE_RNG][31:0] read_data_st1c [STAGE_1_CYCLES-1:0];
|
||||
|
||||
|
||||
wire qual_read_valid_st1;
|
||||
|
@ -94,6 +94,9 @@ module VX_tag_data_access
|
|||
wire[`DBANK_LINE_SIZE_RNG][31:0] use_write_data;
|
||||
|
||||
|
||||
wire real_writefill = writefill_st1e && miss_st1e;
|
||||
|
||||
|
||||
wire fill_sent;
|
||||
wire invalidate_line;
|
||||
VX_tag_data_structure #(
|
||||
|
@ -128,13 +131,14 @@ module VX_tag_data_access
|
|||
|
||||
.invalidate (invalidate_line),
|
||||
.write_enable(use_write_enable),
|
||||
.write_fill (writefill_st1e),
|
||||
.write_fill (real_writefill),
|
||||
.write_addr (writeaddr_st1e),
|
||||
.write_data (use_write_data),
|
||||
.fill_sent (fill_sent)
|
||||
);
|
||||
|
||||
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_c0 (
|
||||
// VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_c0 (
|
||||
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) ), .Valid(0)) s0_1_c0 (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall),
|
||||
|
@ -145,7 +149,7 @@ module VX_tag_data_access
|
|||
|
||||
genvar curr_stage;
|
||||
generate
|
||||
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES-2; curr_stage = curr_stage + 1) begin
|
||||
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES-1; curr_stage = curr_stage + 1) begin
|
||||
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_cc (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
|
@ -158,13 +162,13 @@ module VX_tag_data_access
|
|||
endgenerate
|
||||
|
||||
|
||||
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-2] || (FUNC_ID == `SFUNC_ID); // If shared memory, always valid
|
||||
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-2] && (FUNC_ID == `DFUNC_ID); // Dirty only applies in Dcache
|
||||
assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_SELECT_ADDR_RNG] : read_tag_st1c [STAGE_1_CYCLES-2]; // Tag is always the same in SM
|
||||
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || (FUNC_ID == `SFUNC_ID); // If shared memory, always valid
|
||||
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && (FUNC_ID != `SFUNC_ID); // Dirty only applies in Dcache
|
||||
assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_SELECT_ADDR_RNG] : read_tag_st1c [STAGE_1_CYCLES-1]; // Tag is always the same in SM
|
||||
|
||||
genvar curr_w;
|
||||
for (curr_w = 0; curr_w < `DBANK_LINE_SIZE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-2][curr_w][31:0];
|
||||
// assign use_read_data_st1e = read_data_st1c [STAGE_1_CYCLES-2];
|
||||
for (curr_w = 0; curr_w < `DBANK_LINE_SIZE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-1][curr_w][31:0];
|
||||
// assign use_read_data_st1e = read_data_st1c [STAGE_1_CYCLES-1];
|
||||
|
||||
/////////////////////// LOAD LOGIC ///////////////////
|
||||
|
||||
|
@ -182,12 +186,12 @@ module VX_tag_data_access
|
|||
wire b2 = (byte_select == 2);
|
||||
wire b3 = (byte_select == 3);
|
||||
|
||||
wire[31:0] w0 = read_data_st1c[STAGE_1_CYCLES-2][0][31:0];
|
||||
wire[31:0] w1 = read_data_st1c[STAGE_1_CYCLES-2][1][31:0];
|
||||
wire[31:0] w2 = read_data_st1c[STAGE_1_CYCLES-2][2][31:0];
|
||||
wire[31:0] w3 = read_data_st1c[STAGE_1_CYCLES-2][3][31:0];
|
||||
wire[31:0] w0 = read_data_st1c[STAGE_1_CYCLES-1][0][31:0];
|
||||
wire[31:0] w1 = read_data_st1c[STAGE_1_CYCLES-1][1][31:0];
|
||||
wire[31:0] w2 = read_data_st1c[STAGE_1_CYCLES-1][2][31:0];
|
||||
wire[31:0] w3 = read_data_st1c[STAGE_1_CYCLES-1][3][31:0];
|
||||
|
||||
wire[31:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-2][block_offset][31:0];
|
||||
wire[31:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-1][block_offset][31:0];
|
||||
|
||||
wire[31:0] data_unQual = (b0 || lw) ? (data_unmod) :
|
||||
b1 ? (data_unmod >> 8) :
|
||||
|
@ -234,7 +238,7 @@ module VX_tag_data_access
|
|||
wire[3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100);
|
||||
|
||||
wire should_write = (sw || sb || sh) && valid_req_st1e && use_read_valid_st1e && !miss_st1e;
|
||||
wire force_write = writefill_st1e && valid_req_st1e && (!use_read_valid_st1e || (use_read_valid_st1e && !miss_st1e));
|
||||
wire force_write = writefill_st1e && valid_req_st1e && miss_st1e && (!use_read_valid_st1e || (use_read_valid_st1e && !miss_st1e));
|
||||
|
||||
wire[`DBANK_LINE_SIZE_RNG][3:0] we;
|
||||
wire[`DBANK_LINE_SIZE_RNG][31:0] data_write;
|
||||
|
@ -262,7 +266,7 @@ module VX_tag_data_access
|
|||
|
||||
///////////////////////
|
||||
if (FUNC_ID == `LLFUNC_ID) begin
|
||||
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-2];
|
||||
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1];
|
||||
end else begin
|
||||
assign readword_st1e = data_Qual;
|
||||
end
|
||||
|
@ -272,7 +276,7 @@ module VX_tag_data_access
|
|||
assign readdata_st1e = use_read_data_st1e;
|
||||
assign readtag_st1e = use_read_tag_st1e;
|
||||
assign fill_sent = miss_st1e;
|
||||
assign fill_saw_dirty_st1e = force_write && dirty_st1e;
|
||||
assign fill_saw_dirty_st1e = force_write && dirty_st1e && miss_st1e;
|
||||
assign invalidate_line = is_snp_st1e && !miss_st1e;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -92,6 +92,7 @@ module VX_tag_data_structure
|
|||
end
|
||||
end else if (fill_sent) begin
|
||||
dirty[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
|
||||
valid[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
|
||||
end
|
||||
|
||||
if (invalidate) begin
|
||||
|
|
|
@ -127,7 +127,7 @@
|
|||
|
||||
`define NUMBER_CORES (`NUMBER_CORES_PER_CLUSTER*`NUMBER_CLUSTERS)
|
||||
|
||||
// `define SINGLE_CORE_BENCH 0
|
||||
`define SINGLE_CORE_BENCH 1
|
||||
`define GLOBAL_BLOCK_SIZE_BYTES 16
|
||||
// ========================================= Dcache Configurable Knobs =========================================
|
||||
|
||||
|
@ -141,7 +141,7 @@
|
|||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
`define DNUMBER_REQUESTS `NT
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
`define DSTAGE_1_CYCLES 2
|
||||
`define DSTAGE_1_CYCLES 1
|
||||
// Function ID
|
||||
`define DFUNC_ID 0
|
||||
|
||||
|
@ -172,7 +172,7 @@
|
|||
`define DFFSQ_SIZE 8
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`define DFILL_INVALIDAOR_SIZE 16
|
||||
`define DFILL_INVALIDAOR_SIZE 0
|
||||
|
||||
// Dram knobs
|
||||
`define DSIMULATED_DRAM_LATENCY_CYCLES 10
|
||||
|
@ -192,7 +192,7 @@
|
|||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
`define INUMBER_REQUESTS 1
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
`define ISTAGE_1_CYCLES 2
|
||||
`define ISTAGE_1_CYCLES 1
|
||||
// Function ID
|
||||
`define IFUNC_ID 1
|
||||
|
||||
|
@ -214,16 +214,16 @@
|
|||
// Core Writeback Queue Size
|
||||
`define ICWBQ_SIZE `IREQQ_SIZE
|
||||
// Dram Writeback Queue Size
|
||||
`define IDWBQ_SIZE 0
|
||||
`define IDWBQ_SIZE 16
|
||||
// Dram Fill Req Queue Size
|
||||
`define IDFQQ_SIZE `IREQQ_SIZE
|
||||
// Lower Level Cache Hit Queue Size
|
||||
`define ILLVQ_SIZE 0
|
||||
`define ILLVQ_SIZE 16
|
||||
// Fill Forward SNP Queue
|
||||
`define IFFSQ_SIZE 8
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`define IFILL_INVALIDAOR_SIZE 16
|
||||
`define IFILL_INVALIDAOR_SIZE 0
|
||||
|
||||
// Dram knobs
|
||||
`define ISIMULATED_DRAM_LATENCY_CYCLES 10
|
||||
|
@ -244,7 +244,7 @@
|
|||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
`define SNUMBER_REQUESTS `NT
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
`define SSTAGE_1_CYCLES 2
|
||||
`define SSTAGE_1_CYCLES 1
|
||||
// Function ID
|
||||
`define SFUNC_ID 2
|
||||
|
||||
|
@ -258,24 +258,24 @@
|
|||
// Miss Reserv Queue Knob
|
||||
`define SMRVQ_SIZE `SREQQ_SIZE
|
||||
// Dram Fill Rsp Queue Size
|
||||
`define SDFPQ_SIZE 0
|
||||
`define SDFPQ_SIZE 16
|
||||
// Snoop Req Queue
|
||||
`define SSNRQ_SIZE 0
|
||||
`define SSNRQ_SIZE 16
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
`define SCWBQ_SIZE `SREQQ_SIZE
|
||||
// Dram Writeback Queue Size
|
||||
`define SDWBQ_SIZE 0
|
||||
`define SDWBQ_SIZE 16
|
||||
// Dram Fill Req Queue Size
|
||||
`define SDFQQ_SIZE 0
|
||||
`define SDFQQ_SIZE 16
|
||||
// Lower Level Cache Hit Queue Size
|
||||
`define SLLVQ_SIZE 0
|
||||
`define SLLVQ_SIZE 16
|
||||
// Fill Forward SNP Queue
|
||||
`define SFFSQ_SIZE 0
|
||||
`define SFFSQ_SIZE 16
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`define SFILL_INVALIDAOR_SIZE 16
|
||||
`define SFILL_INVALIDAOR_SIZE 0
|
||||
|
||||
// Dram knobs
|
||||
`define SSIMULATED_DRAM_LATENCY_CYCLES 10
|
||||
|
@ -296,7 +296,7 @@
|
|||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
`define LLNUMBER_REQUESTS (2*`NUMBER_CORES_PER_CLUSTER)
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
`define LLSTAGE_1_CYCLES 2
|
||||
`define LLSTAGE_1_CYCLES 1
|
||||
// Function ID
|
||||
`define LLFUNC_ID 3
|
||||
|
||||
|
@ -322,12 +322,12 @@
|
|||
// Dram Fill Req Queue Size
|
||||
`define LLDFQQ_SIZE `LLREQQ_SIZE
|
||||
// Lower Level Cache Hit Queue Size
|
||||
`define LLLLVQ_SIZE 0
|
||||
`define LLLLVQ_SIZE 16
|
||||
// Fill Forward SNP Queue
|
||||
`define LLFFSQ_SIZE 8
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`define LLFILL_INVALIDAOR_SIZE 16
|
||||
`define LLFILL_INVALIDAOR_SIZE 0
|
||||
|
||||
// Dram knobs
|
||||
`define LLSIMULATED_DRAM_LATENCY_CYCLES 10
|
||||
|
@ -348,7 +348,7 @@
|
|||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
`define L3NUMBER_REQUESTS (`NUMBER_CLUSTERS)
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
`define L3STAGE_1_CYCLES 2
|
||||
`define L3STAGE_1_CYCLES 1
|
||||
// Function ID
|
||||
`define L3FUNC_ID 3
|
||||
|
||||
|
@ -379,7 +379,7 @@
|
|||
`define L3FFSQ_SIZE 8
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`define L3FILL_INVALIDAOR_SIZE 16
|
||||
`define L3FILL_INVALIDAOR_SIZE 0
|
||||
|
||||
// Dram knobs
|
||||
`define L3SIMULATED_DRAM_LATENCY_CYCLES 10
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
`ifndef VX_DEFINE_SYNTH
|
||||
`define VX_DEFINE_SYNTH
|
||||
|
||||
`define NT 4
|
||||
`define NT 8
|
||||
`define NW 8
|
||||
`define NUMBER_CORES_PER_CLUSTER 2
|
||||
`define NUMBER_CORES_PER_CLUSTER 1
|
||||
`define NUMBER_CLUSTERS 1
|
||||
`define DCACHE_SIZE_BYTES 4096
|
||||
`define ICACHE_SIZE_BYTES 1024
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
module VX_generic_register
|
||||
#( parameter N = 1)
|
||||
#( parameter N = 1, parameter Valid = 1)
|
||||
(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -10,18 +10,26 @@ module VX_generic_register
|
|||
output wire[(N-1):0] out
|
||||
);
|
||||
|
||||
reg[(N-1):0] value;
|
||||
if (Valid == 0) begin
|
||||
|
||||
always @(posedge clk or posedge reset) begin
|
||||
if (reset) begin
|
||||
value <= 0;
|
||||
end else if (flush) begin
|
||||
value <= 0;
|
||||
end else if (~stall) begin
|
||||
value <= in;
|
||||
assign out = in;
|
||||
|
||||
end else begin
|
||||
|
||||
reg[(N-1):0] value;
|
||||
|
||||
always @(posedge clk or posedge reset) begin
|
||||
if (reset) begin
|
||||
value <= 0;
|
||||
end else if (flush) begin
|
||||
value <= 0;
|
||||
end else if (~stall) begin
|
||||
value <= in;
|
||||
end
|
||||
end
|
||||
|
||||
assign out = value;
|
||||
|
||||
end
|
||||
|
||||
assign out = value;
|
||||
|
||||
endmodule
|
|
@ -13,10 +13,11 @@ module VX_gpgpu_inst (
|
|||
wire is_split = (VX_gpu_inst_req.is_split);
|
||||
|
||||
wire[`NT_M1:0] tmc_new_mask;
|
||||
wire all_threads = `NT < VX_gpu_inst_req.a_reg_data[0];
|
||||
genvar curr_t;
|
||||
generate
|
||||
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin : tmc_new_mask_init
|
||||
assign tmc_new_mask[curr_t] = curr_t < VX_gpu_inst_req.a_reg_data[0];
|
||||
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < VX_gpu_inst_req.a_reg_data[0];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
@ -30,13 +31,14 @@ module VX_gpgpu_inst (
|
|||
assign VX_warp_ctl.ebreak = VX_warp_ctl.change_mask && (VX_warp_ctl.thread_mask == 0);
|
||||
|
||||
|
||||
wire wspawn = VX_gpu_inst_req.is_wspawn;
|
||||
wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2;
|
||||
wire wspawn = VX_gpu_inst_req.is_wspawn;
|
||||
wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2;
|
||||
wire all_active = `NW < VX_gpu_inst_req.a_reg_data[0];
|
||||
wire[`NW-1:0] wspawn_new_active;
|
||||
genvar curr_w;
|
||||
generate
|
||||
for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1) begin : wspawn_new_active_init
|
||||
assign wspawn_new_active[curr_w] = curr_w < VX_gpu_inst_req.a_reg_data[0];
|
||||
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < VX_gpu_inst_req.a_reg_data[0];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
|
|
@ -218,9 +218,11 @@ module VX_warp_scheduler (
|
|||
// Lock/Release
|
||||
if (scheduled_warp && !stall) begin
|
||||
warp_lock[warp_num] <= 1'b1;
|
||||
// warp_lock <= {`NW{1'b1}};
|
||||
end
|
||||
if (|icache_stage_valids && !stall) begin
|
||||
warp_lock[icache_stage_wid] <= 1'b0;
|
||||
// warp_lock <= {`NW{1'b0}};
|
||||
end
|
||||
|
||||
end
|
||||
|
@ -292,7 +294,7 @@ module VX_warp_scheduler (
|
|||
|
||||
assign hazard = (should_jal || should_bra) && schedule;
|
||||
|
||||
assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule];
|
||||
assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule] && !warp_lock[0];
|
||||
|
||||
assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || is_join);
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
|
||||
COMP = riscv32-unknown-elf-gcc
|
||||
COMP = /opt/riscv-new/drops/bin/riscv32-unknown-elf-gcc
|
||||
CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,../vortex_link.ld -ffreestanding -nostdlib
|
||||
|
||||
DMP = riscv32-unknown-elf-objdump
|
||||
CPY = riscv32-unknown-elf-objcopy
|
||||
DMP = /opt/riscv-new/drops/bin/riscv32-unknown-elf-objdump
|
||||
CPY = /opt/riscv-new/drops/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
|
||||
NEWLIB = ../../newlib/newlib.c
|
||||
|
@ -13,7 +13,7 @@ VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
|||
VX_API = ../../vx_api/vx_api.c
|
||||
VX_TEST = ../../tests/tests.c
|
||||
VX_FIO = ../../fileio/fileio.s
|
||||
LIBS = ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
LIBS = /opt/riscv-new/drops/riscv32-unknown-elf/lib/libc.a /opt/riscv-new/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
|
||||
VX_MAIN = vx_simple_main
|
||||
|
||||
|
|
|
@ -52,6 +52,18 @@ int main()
|
|||
// Main is called with all threads active of warp 0
|
||||
vx_tmc(1);
|
||||
|
||||
vx_print_str("Let's start...\n");
|
||||
unsigned what[36];
|
||||
for (int i = 0; i < 36; i++)
|
||||
{
|
||||
what[i] = i;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 36; i++)
|
||||
{
|
||||
vx_printf("Value: ", what[i]);
|
||||
}
|
||||
|
||||
|
||||
vx_print_str("Simple Main\n");
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
|
@ -20,22 +20,22 @@ _start:
|
|||
# Initialize SP
|
||||
# la sp, __stack_top
|
||||
la a1, vx_set_sp
|
||||
li a0, 32
|
||||
li a0, 4
|
||||
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
||||
jal vx_set_sp
|
||||
li a0, 1
|
||||
.word 0x0005006b # tmc 1
|
||||
# li a0, 1
|
||||
# .word 0x0005006b # tmc 1
|
||||
# Initialize global pointerp
|
||||
# call __cxx_global_var_init
|
||||
# Clear the bss segment
|
||||
la a0, _edata
|
||||
la a2, _end
|
||||
sub a2, a2, a0
|
||||
li a1, 0
|
||||
call memset
|
||||
la a0, __libc_fini_array # Register global termination functions
|
||||
call atexit # to be called upon exit
|
||||
call __libc_init_array # Run global initialization functions
|
||||
# la a0, _edata
|
||||
# la a2, _end
|
||||
# sub a2, a2, a0
|
||||
# li a1, 0
|
||||
# call memset
|
||||
# la a0, __libc_fini_array # Register global termination functions
|
||||
# call atexit # to be called upon exit
|
||||
# call __libc_init_array # Run global initialization functions
|
||||
# li a0, 4
|
||||
# .word 0x0005006b # tmc 4
|
||||
call main
|
||||
|
@ -46,7 +46,7 @@ _start:
|
|||
.type vx_set_sp, @function
|
||||
.global vx_set_sp
|
||||
vx_set_sp:
|
||||
li a0, 32
|
||||
li a0, 4
|
||||
.word 0x0005006b # tmc 4
|
||||
|
||||
.option push
|
||||
|
@ -55,7 +55,7 @@ vx_set_sp:
|
|||
addi gp, gp, %pcrel_lo(1b)
|
||||
.option pop
|
||||
|
||||
csrr a3, 0x21 # get wid
|
||||
csrr a3, 0x22 # get wid
|
||||
slli a3, a3, 0x1a # shift by wid
|
||||
csrr a2, 0x20 # get tid
|
||||
slli a1, a2, 10 # multiply tid by 1024
|
||||
|
|
|
@ -13,6 +13,7 @@ void test_tmc()
|
|||
vx_tmc(4);
|
||||
|
||||
unsigned tid = vx_threadID(); // Get TID
|
||||
|
||||
tmc_array[tid] = tid;
|
||||
|
||||
vx_tmc(1);
|
||||
|
@ -85,6 +86,7 @@ void simple_kernel()
|
|||
|
||||
wsapwn_arr[wid] = wid;
|
||||
|
||||
wid = vx_warpID();
|
||||
if (wid != 0)
|
||||
{
|
||||
vx_tmc(0);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue