fixed warp_sched lock bug

This commit is contained in:
Blaise Tine 2020-05-28 08:52:20 -04:00
parent 98b98b1005
commit 611ceb000a
21 changed files with 85757 additions and 3601 deletions

View file

@ -48,7 +48,7 @@
`endif
`ifndef IO_BUS_ADDR_COUT
`define IO_BUS_ADDR_COUT 30'h3FFFFFFF
`define IO_BUS_ADDR_COUT 32'hFFFFFFFC
`endif
`ifndef L2_ENABLE

View file

@ -10,14 +10,13 @@ module VX_decode(
VX_wstall_if wstall_if,
VX_join_if join_if
);
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
wire in_valid = (| fd_inst_meta_de.valid);
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
assign frE_to_bckE_req_if.curr_PC = in_curr_PC;
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
wire[6:0] curr_opcode;
wire is_itype;
@ -115,7 +114,7 @@ module VX_decode(
assign is_split = is_gpgpu && (func3 == 2); // Goes to BE
assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE
assign join_if.is_join = is_join && (| in_valid);
assign join_if.is_join = is_join && in_valid;
assign join_if.join_warp_num = in_warp_num;
assign frE_to_bckE_req_if.is_wspawn = is_wspawn;
@ -170,23 +169,23 @@ module VX_decode(
case (curr_opcode)
`INST_JAL:
begin
temp_jal = 1'b1 && (| in_valid);
temp_jal = 1'b1 && in_valid;
temp_jal_offset = jal_1_offset;
end
`INST_JALR:
begin
temp_jal = 1'b1 && (| in_valid);
temp_jal = 1'b1 && in_valid;
temp_jal_offset = jal_2_offset;
end
`INST_SYS:
begin
// $display("SYS EBREAK %h", (jal_sys_jal && (| in_valid)));
temp_jal = jal_sys_jal && (| in_valid);
// $display("SYS EBREAK %h", (jal_sys_jal && in_valid));
temp_jal = jal_sys_jal && in_valid;
temp_jal_offset = jal_sys_off;
end
default:
begin
temp_jal = 1'b0 && (| in_valid);
temp_jal = 1'b0 && in_valid;
temp_jal_offset = 32'hdeadbeef;
end
endcase
@ -228,7 +227,7 @@ module VX_decode(
case (curr_opcode)
`INST_B: begin
// $display("BRANCH IN DECODE");
temp_branch_stall = 1'b1 && (| in_valid);
temp_branch_stall = 1'b1 && in_valid;
case (func3)
3'h0: temp_branch_type = `BR_EQ;
3'h1: temp_branch_type = `BR_NE;
@ -241,22 +240,22 @@ module VX_decode(
end
`INST_JAL: begin
temp_branch_type = `BR_NO;
temp_branch_stall = 1'b1 && (| in_valid);
temp_branch_stall = 1'b1 && in_valid;
end
`INST_JALR: begin
temp_branch_type = `BR_NO;
temp_branch_stall = 1'b1 && (| in_valid);
temp_branch_stall = 1'b1 && in_valid;
end
default: begin
temp_branch_type = `BR_NO;
temp_branch_stall = 1'b0 && (| in_valid);
temp_branch_stall = 1'b0 && in_valid;
end
endcase
end
assign frE_to_bckE_req_if.branch_type = temp_branch_type;
assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (| in_valid);
assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && in_valid;
assign wstall_if.warp_num = in_warp_num;
always @(*) begin

View file

@ -148,12 +148,6 @@
// Cache ID
`define DCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 0)
// Core request address bits
`define DCORE_ADDR_WIDTH (32-`CLOG2(`DWORD_SIZE))
// Core request byte enable bits
`define DCORE_BYTEEN_WIDTH `DWORD_SIZE
// TAG sharing enable
`define DCORE_TAG_ID_BITS `LOG2UP(`DCREQ_SIZE)

View file

@ -8,7 +8,7 @@ module VX_fetch (
input wire schedule_delay,
input wire icache_stage_delay,
input wire[`NW_BITS-1:0] icache_stage_wid,
input wire[`NUM_THREADS-1:0] icache_stage_valids,
input wire icache_stage_response,
output wire busy,
VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if,
@ -52,7 +52,7 @@ module VX_fetch (
.wstall_warp_num (wstall_if.warp_num),
// Lock/release Stuff
.icache_stage_valids(icache_stage_valids),
.icache_stage_response(icache_stage_response),
.icache_stage_wid (icache_stage_wid),
// Join

View file

@ -31,7 +31,7 @@ module VX_front_end #(
wire icache_stage_delay;
wire[`NW_BITS-1:0] icache_stage_wid;
wire[`NUM_THREADS-1:0] icache_stage_valids;
wire icache_stage_response;
VX_wstall_if wstall_if();
VX_join_if join_if();
@ -40,7 +40,7 @@ module VX_front_end #(
.clk (clk),
.reset (reset),
.icache_stage_wid (icache_stage_wid),
.icache_stage_valids(icache_stage_valids),
.icache_stage_response(icache_stage_response),
.wstall_if (wstall_if),
.join_if (join_if),
.schedule_delay (schedule_delay),
@ -67,7 +67,7 @@ module VX_front_end #(
.reset (reset),
.total_freeze (total_freeze),
.icache_stage_delay (icache_stage_delay),
.icache_stage_valids(icache_stage_valids),
.icache_stage_response(icache_stage_response),
.icache_stage_wid (icache_stage_wid),
.fe_inst_meta_fi (fe_inst_meta_fi2),
.fe_inst_meta_id (fe_inst_meta_id),

View file

@ -58,8 +58,14 @@ module VX_gpr_wrapper (
.a_reg_data (temp_a_reg_data[i]),
.b_reg_data (temp_b_reg_data[i])
);
/*always_ff @(posedge clk) begin
if (valid_write_request && ((writeback_if.wb != 0))) begin
$display("%t: GPR%01d$: wid=%0d, rd=%0d, data=%0h", $time, 0, writeback_if.warp_num, writeback_if.rd, writeback_if.data);
end
end*/
end
endgenerate
endgenerate
endmodule

View file

@ -8,7 +8,7 @@ module VX_gpu_inst (
VX_warp_ctl_if warp_ctl_if
);
wire[`NUM_THREADS-1:0] curr_valids = gpu_inst_req_if.valid;
wire is_split = (gpu_inst_req_if.is_split);
wire is_split = gpu_inst_req_if.is_split;
wire[`NUM_THREADS-1:0] tmc_new_mask;
wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0];
@ -23,12 +23,12 @@ module VX_gpu_inst (
wire valid_inst = (| curr_valids);
assign warp_ctl_if.warp_num = gpu_inst_req_if.warp_num;
assign warp_ctl_if.change_mask = (gpu_inst_req_if.is_tmc) && valid_inst;
assign warp_ctl_if.change_mask = gpu_inst_req_if.is_tmc && valid_inst;
assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0;
assign warp_ctl_if.whalt = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0);
assign warp_ctl_if.whalt = warp_ctl_if.change_mask && (0 == warp_ctl_if.thread_mask);
wire wspawn = gpu_inst_req_if.is_wspawn;
wire wspawn = gpu_inst_req_if.is_wspawn && valid_inst;
wire[31:0] wspawn_pc = gpu_inst_req_if.rd2;
wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0];
wire[`NUM_WARPS-1:0] wspawn_new_active;

View file

@ -8,7 +8,7 @@ module VX_icache_stage #(
input wire total_freeze,
output wire icache_stage_delay,
output wire[`NW_BITS-1:0] icache_stage_wid,
output wire[`NUM_THREADS-1:0] icache_stage_valids,
output wire icache_stage_response,
VX_inst_meta_if fe_inst_meta_fi,
VX_inst_meta_if fe_inst_meta_id,
@ -25,28 +25,41 @@ module VX_icache_stage #(
wire [`ICORE_TAG_WIDTH-1:0] mem_rsp_tag = icache_rsp_if.core_rsp_tag;
`DEBUG_END
wire [`LOG2UP(`ICREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr;
wire [`LOG2UP(`ICREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr, dbg_mrq_write_addr;
wire mrq_full;
wire mrq_push = (| icache_req_if.core_req_valid) && icache_req_if.core_req_ready;
wire mrq_pop = (| icache_rsp_if.core_rsp_valid) && icache_rsp_if.core_rsp_ready;
wire mrq_push = icache_req_if.core_req_valid && icache_req_if.core_req_ready;
wire mrq_pop = icache_rsp_if.core_rsp_valid && icache_rsp_if.core_rsp_ready;
assign mrq_read_addr = icache_rsp_if.core_rsp_tag[0][`LOG2UP(`ICREQ_SIZE)-1:0];
VX_indexable_queue #(
.DATAW (32 + `NW_BITS),
.DATAW (`LOG2UP(`ICREQ_SIZE) + 32 + `NW_BITS),
.SIZE (`ICREQ_SIZE)
) mem_req_queue (
.clk (clk),
.reset (reset),
.write_data ({fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num}),
.write_data ({mrq_write_addr, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num}),
.write_addr (mrq_write_addr),
.push (mrq_push),
.full (mrq_full),
.pop (mrq_pop),
.read_addr (mrq_read_addr),
.read_data ({fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num})
);
.read_data ({dbg_mrq_write_addr, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num})
);
always @(posedge clk) begin
if (reset) begin
//--
end else begin
if (mrq_push) begin
valid_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
end
if (mrq_pop) begin
assert(mrq_read_addr == dbg_mrq_write_addr);
end
end
end
// Icache Request
assign icache_req_if.core_req_valid = valid_inst && ~mrq_full;
@ -67,22 +80,12 @@ module VX_icache_stage #(
assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_data[0];
assign fe_inst_meta_id.valid = icache_rsp_if.core_rsp_valid ? valid_threads[fe_inst_meta_id.warp_num] : 0;
assign icache_stage_response = mrq_pop;
assign icache_stage_wid = fe_inst_meta_id.warp_num;
assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
// Can't accept new response
assign icache_rsp_if.core_rsp_ready = ~total_freeze;
always @(posedge clk) begin
if (reset) begin
//--
end else begin
if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin
valid_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
end
end
end
`ifdef DBG_PRINT_CORE_ICACHE
always_ff @(posedge clk) begin
if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin

View file

@ -75,26 +75,20 @@ module VX_lsu_unit #(
wire[2:0] core_rsp_mem_read;
for (i = 0; i < `NUM_THREADS; ++i) begin
always @(*) begin
always @(*) begin
case (core_req_rw ? use_mem_write[1:0] : use_mem_read[1:0])
2'b0: begin
case (use_address[i][1:0])
1: mem_req_offset[i] = 8;
2: mem_req_offset[i] = 16;
3: mem_req_offset[i] = 24;
default : mem_req_offset[i] = 0;
1: mem_req_offset[i] = 8;
2: mem_req_offset[i] = 16;
3: mem_req_offset[i] = 24;
default: mem_req_offset[i] = 0;
endcase
end
2'b1: begin
case (use_address[i][1:0])
2: mem_req_offset[i] = 16;
default : mem_req_offset[i] = 0;
endcase
mem_req_offset[i] = (2 == use_address[i][1:0]) ? 16 : 0;
end
default : begin
mem_req_offset[i] = 0;
end
default: mem_req_offset[i] = 0;
endcase
end
@ -105,7 +99,7 @@ module VX_lsu_unit #(
reg [`NUM_THREADS-1:0] mem_rsp_mask[`DCREQ_SIZE-1:0];
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr;
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr, dbg_mrq_write_addr;
wire mrq_full;
wire mrq_push = (0 == core_req_rw) && (| dcache_req_if.core_req_valid) && dcache_req_if.core_req_ready;
@ -117,6 +111,21 @@ module VX_lsu_unit #(
wire mrq_pop = mrq_pop_part && (0 == mem_rsp_mask_next);
VX_indexable_queue #(
.DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + 2 + (`NUM_THREADS * 5) + `BYTE_EN_BITS + 5 + `NW_BITS),
.SIZE (`DCREQ_SIZE)
) mem_req_queue (
.clk (clk),
.reset (reset),
.write_data ({mrq_write_addr, use_pc, use_wb, mem_req_offset, use_mem_read, use_rd, use_warp_num}),
.write_addr (mrq_write_addr),
.push (mrq_push),
.full (mrq_full),
.pop (mrq_pop),
.read_addr (mrq_read_addr),
.read_data ({dbg_mrq_write_addr, mem_wb_if.pc, mem_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_if.rd, mem_wb_if.warp_num})
);
always @(posedge clk) begin
if (reset) begin
//--
@ -126,25 +135,11 @@ module VX_lsu_unit #(
end
if (mrq_pop_part) begin
mem_rsp_mask[mrq_read_addr] <= mem_rsp_mask_next;
assert(mrq_read_addr == dbg_mrq_write_addr);
end
end
end
VX_indexable_queue #(
.DATAW (32 + 2 + (`NUM_THREADS * 5) + `BYTE_EN_BITS + 5 + `NW_BITS),
.SIZE (`DCREQ_SIZE)
) mem_req_queue (
.clk (clk),
.reset (reset),
.write_data ({use_pc, use_wb, mem_req_offset, use_mem_read, use_rd, use_warp_num}),
.write_addr (mrq_write_addr),
.push (mrq_push),
.full (mrq_full),
.pop (mrq_pop),
.read_addr (mrq_read_addr),
.read_data ({mem_wb_if.pc, mem_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_if.rd, mem_wb_if.warp_num})
);
// Core Request
assign dcache_req_if.core_req_valid = use_valid & {`NUM_THREADS{~mrq_full}};

View file

@ -61,8 +61,10 @@ module VX_scheduler (
end
end else begin
if (valid_wb) begin
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
if (0 == valid_wb_new_mask) begin
assert(count_valid != 0);
count_valid <= count_valid - 1;
end
end

View file

@ -60,7 +60,7 @@ module VX_warp_sched (
output wire scheduled_warp,
input wire[`NW_BITS-1:0] icache_stage_wid,
input wire[`NUM_THREADS-1:0] icache_stage_valids
input wire icache_stage_response
);
wire update_use_wspawn;
wire update_visible_active;
@ -209,7 +209,7 @@ module VX_warp_sched (
// Branch
if (branch_valid) begin
if (branch_dir) warp_pcs[branch_warp_num] <= branch_dest;
if (branch_dir) warp_pcs[branch_warp_num] <= branch_dest;
warp_stalled[branch_warp_num] <= 0;
end
@ -218,7 +218,7 @@ module VX_warp_sched (
warp_lock[warp_num] <= 1'b1;
// warp_lock <= {`NUM_WARPS{1'b1}};
end
if ((| icache_stage_valids) && !stall) begin
if (icache_stage_response) begin
warp_lock[icache_stage_wid] <= 1'b0;
// warp_lock <= {`NUM_WARPS{1'b0}};
end
@ -251,7 +251,7 @@ module VX_warp_sched (
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join);
assign update_visible_active = (0 == count_visible_active) && !(stall || wstall_this_cycle || hazard || is_join);
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[split_warp_num]};
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, split_save_pc, split_later_mask};
@ -305,7 +305,7 @@ module VX_warp_sched (
assign new_pc = warp_pc + 4;
assign use_active = (count_visible_active < 1) ? (warp_active & (~warp_stalled) & (~total_barrier_stall) & (~warp_lock)) : visible_active;
assign use_active = (count_visible_active != 0) ? visible_active : (warp_active & (~warp_stalled) & (~total_barrier_stall) & (~warp_lock));
// Choosing a warp to schedule
VX_priority_encoder #(

View file

@ -51,8 +51,8 @@ module Vortex #(
// I/O request
output wire io_req_valid,
output wire io_req_rw,
output wire[`DCORE_BYTEEN_WIDTH-1:0] io_req_byteen,
output wire[`DCORE_ADDR_WIDTH-1:0] io_req_addr,
output wire[3:0] io_req_byteen,
output wire[29:0] io_req_addr,
output wire[31:0] io_req_data,
output wire[`DCORE_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,

View file

@ -37,8 +37,8 @@ module Vortex_Cluster #(
// I/O request
output wire io_req_valid,
output wire io_req_rw,
output wire[`DCORE_BYTEEN_WIDTH-1:0] io_req_byteen,
output wire[`DCORE_ADDR_WIDTH-1:0] io_req_addr,
output wire[3:0] io_req_byteen,
output wire[29:0] io_req_addr,
output wire[31:0] io_req_data,
output wire[`DCORE_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
@ -91,8 +91,8 @@ module Vortex_Cluster #(
`IGNORE_WARNINGS_BEGIN
wire[`NUM_CORES-1:0] per_core_io_req_valid;
wire[`NUM_CORES-1:0] per_core_io_req_rw;
wire[`NUM_CORES-1:0][`DCORE_BYTEEN_WIDTH-1:0] per_core_io_req_byteen;
wire[`NUM_CORES-1:0][`DCORE_ADDR_WIDTH-1:0] per_core_io_req_addr;
wire[`NUM_CORES-1:0][3:0] per_core_io_req_byteen;
wire[`NUM_CORES-1:0][29:0] per_core_io_req_addr;
wire[`NUM_CORES-1:0][31:0] per_core_io_req_data;
wire[`NUM_CORES-1:0][`DCORE_TAG_WIDTH-1:0] per_core_io_req_tag;

View file

@ -35,8 +35,8 @@ module Vortex_Socket (
// I/O request
output wire io_req_valid,
output wire io_req_rw,
output wire[`DCORE_BYTEEN_WIDTH-1:0] io_req_byteen,
output wire[`DCORE_ADDR_WIDTH-1:0] io_req_addr,
output wire[3:0] io_req_byteen,
output wire[29:0] io_req_addr,
output wire[31:0] io_req_data,
output wire[`DCORE_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
@ -125,8 +125,8 @@ module Vortex_Socket (
`IGNORE_WARNINGS_BEGIN
wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_valid;
wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_rw;
wire[`NUM_CLUSTERS-1:0][`DCORE_BYTEEN_WIDTH-1:0] per_cluster_io_req_byteen;
wire[`NUM_CLUSTERS-1:0][`DCORE_ADDR_WIDTH-1:0] per_cluster_io_req_addr;
wire[`NUM_CLUSTERS-1:0][3:0] per_cluster_io_req_byteen;
wire[`NUM_CLUSTERS-1:0][29:0] per_cluster_io_req_addr;
wire[`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_data;
wire[`NUM_CLUSTERS-1:0][`DCORE_TAG_WIDTH-1:0] per_cluster_io_req_tag;

View file

@ -33,67 +33,65 @@ module VX_snp_forwarder #(
input wire [NUM_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdin_tag,
output wire [NUM_REQUESTS-1:0] snp_fwdin_ready
);
reg [`DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH-1:0] pending_reqs [SNRQ_SIZE-1:0];
reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
reg [`LOG2UP(SNRQ_SIZE):0] rd_ptr, wr_ptr;
reg [`REQS_BITS-1:0] fwdin_sel;
wire [`LOG2UP(SNRQ_SIZE)-1:0] rd_a, wr_a;
wire enqueue, dequeue, empty, full;
wire fwdout_ready;
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr, dbg_sfq_write_addr;
wire sfq_push, sfq_pop, sfq_full;
wire fwdin_valid;
wire [SNP_FWD_TAG_WIDTH-1:0] fwdin_tag;
wire fwdin_ready;
wire fwdin_taken;
assign fwdout_ready = (& snp_fwdout_ready);
wire fwdin_ready = snp_rsp_ready;
wire fwdin_taken = fwdin_valid && fwdin_ready;
assign snp_req_ready = !full && fwdout_ready;
wire fwdout_ready = (& snp_fwdout_ready);
assign rd_a = rd_ptr[`LOG2UP(SNRQ_SIZE)-1:0];
assign wr_a = wr_ptr[`LOG2UP(SNRQ_SIZE)-1:0];
assign snp_rsp_valid = fwdin_taken && (1 == pending_cntrs[sfq_read_addr]); // send response
assign sfq_read_addr = fwdin_tag[`LOG2UP(SNRQ_SIZE)-1:0];
assign sfq_push = snp_req_valid && fwdout_ready;
assign sfq_pop = snp_rsp_valid;
VX_indexable_queue #(
.DATAW (`LOG2UP(SNRQ_SIZE) + `DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH),
.SIZE (SNRQ_SIZE)
) snp_fwd_queue (
.clk (clk),
.reset (reset),
.write_data ({sfq_write_addr, snp_req_addr, snp_req_tag}),
.write_addr (sfq_write_addr),
.push (sfq_push),
.full (sfq_full),
.pop (sfq_pop),
.read_addr (sfq_read_addr),
.read_data ({dbg_sfq_write_addr, snp_rsp_addr, snp_rsp_tag})
);
always @(posedge clk) begin
if (reset) begin
//--
end else begin
if (sfq_push) begin
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS;
end
if (fwdin_taken) begin
pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1;
assert(sfq_read_addr == dbg_sfq_write_addr);
end
end
end
genvar i;
for (i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdout_valid[i] = enqueue && fwdout_ready;
assign snp_fwdout_valid[i] = snp_req_valid && !sfq_full;
assign snp_fwdout_addr[i] = snp_req_addr;
assign snp_fwdout_tag[i] = wr_a;
assign snp_fwdout_tag[i] = sfq_write_addr;
end
assign fwdin_ready = snp_rsp_ready;
assign fwdin_taken = fwdin_valid && fwdin_ready;
assign snp_rsp_valid = fwdin_taken && (1 == pending_cntrs[fwdin_tag]); // send response
assign {snp_rsp_addr, snp_rsp_tag} = pending_reqs[fwdin_tag];
assign empty = (wr_ptr == rd_ptr);
assign full = (wr_a == rd_a) && (wr_ptr[`LOG2UP(SNRQ_SIZE)] != rd_ptr[`LOG2UP(SNRQ_SIZE)]);
assign enqueue = snp_req_valid && snp_req_ready;
assign dequeue = !empty && (0 == pending_cntrs[rd_a]);
always @(posedge clk) begin
if (reset) begin
rd_ptr <= 0;
wr_ptr <= 0;
end else begin
if (enqueue) begin
pending_reqs[wr_a] <= {snp_req_addr, snp_req_tag};
pending_cntrs[wr_a] <= NUM_REQUESTS;
wr_ptr <= wr_ptr + 1;
end
if (dequeue) begin
rd_ptr <= rd_ptr + 1;
end
if (fwdin_taken) begin
pending_cntrs[fwdin_tag] <= pending_cntrs[fwdin_tag] - 1;
end
end
end
assign snp_req_ready = !sfq_full && fwdout_ready;
always @(posedge clk) begin
if (reset) begin
@ -104,7 +102,7 @@ module VX_snp_forwarder #(
end
assign fwdin_valid = snp_fwdin_valid[fwdin_sel];
assign fwdin_tag = snp_fwdin_tag[fwdin_sel];
assign fwdin_tag = snp_fwdin_tag[fwdin_sel];
for (i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i));

View file

@ -204,7 +204,7 @@ void Simulator::dbus_driver() {
void Simulator::io_driver() {
if (vortex_->io_req_valid
&& vortex_->io_req_rw
&& vortex_->io_req_addr == IO_BUS_ADDR_COUT) {
&& ((vortex_->io_req_addr << 2) == IO_BUS_ADDR_COUT)) {
uint32_t data_write = (uint32_t)vortex_->io_req_data;
char c = (char)data_write;
std::cout << c;

View file

@ -23,7 +23,7 @@ VX_MAIN = vx_simple_main
VX_SRCS = vx_simple_main.c tests.c
all: HEX DUMP ELF
all: HEX DUMP ELF BIN
DUMP: ELF
$(DMP) -D $(VX_MAIN).elf > $(VX_MAIN).dump
@ -31,5 +31,8 @@ DUMP: ELF
HEX: ELF
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
BIN: ELF
$(CPY) -O binary $(VX_MAIN).elf $(VX_MAIN).bin
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) $(LIBS) -Iinclude -o $(VX_MAIN).elf

View file

@ -50,7 +50,7 @@ void mat_add_kernel(void * void_arguments)
int main()
{
/*// ensure single thread
// ensure single thread
vx_tmc(1);
vx_print_str("Let's start... (This might take a while)\n");
@ -87,7 +87,7 @@ int main()
vx_print_str("Simple Main\n");
// TMC test
test_tmc();*/
test_tmc();
// Control Divergence Test
vx_print_str("test_divergence\n");
@ -95,7 +95,6 @@ int main()
test_divergence();
vx_tmc(1);
/*
// Test wspawn
vx_print_str("test_wspawn\n");
test_wsapwn();
@ -143,7 +142,7 @@ int main()
vx_print_str(" ");
}
vx_print_str("\n");
}*/
}
return 0;
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff