mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
fixed warp_sched lock bug
This commit is contained in:
parent
98b98b1005
commit
611ceb000a
21 changed files with 85757 additions and 3601 deletions
|
@ -48,7 +48,7 @@
|
|||
`endif
|
||||
|
||||
`ifndef IO_BUS_ADDR_COUT
|
||||
`define IO_BUS_ADDR_COUT 30'h3FFFFFFF
|
||||
`define IO_BUS_ADDR_COUT 32'hFFFFFFFC
|
||||
`endif
|
||||
|
||||
`ifndef L2_ENABLE
|
||||
|
|
|
@ -10,14 +10,13 @@ module VX_decode(
|
|||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if
|
||||
);
|
||||
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
||||
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
|
||||
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
wire in_valid = (| fd_inst_meta_de.valid);
|
||||
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
||||
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
|
||||
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
|
||||
assign frE_to_bckE_req_if.curr_PC = in_curr_PC;
|
||||
|
||||
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
|
||||
|
||||
wire[6:0] curr_opcode;
|
||||
|
||||
wire is_itype;
|
||||
|
@ -115,7 +114,7 @@ module VX_decode(
|
|||
assign is_split = is_gpgpu && (func3 == 2); // Goes to BE
|
||||
assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE
|
||||
|
||||
assign join_if.is_join = is_join && (| in_valid);
|
||||
assign join_if.is_join = is_join && in_valid;
|
||||
assign join_if.join_warp_num = in_warp_num;
|
||||
|
||||
assign frE_to_bckE_req_if.is_wspawn = is_wspawn;
|
||||
|
@ -170,23 +169,23 @@ module VX_decode(
|
|||
case (curr_opcode)
|
||||
`INST_JAL:
|
||||
begin
|
||||
temp_jal = 1'b1 && (| in_valid);
|
||||
temp_jal = 1'b1 && in_valid;
|
||||
temp_jal_offset = jal_1_offset;
|
||||
end
|
||||
`INST_JALR:
|
||||
begin
|
||||
temp_jal = 1'b1 && (| in_valid);
|
||||
temp_jal = 1'b1 && in_valid;
|
||||
temp_jal_offset = jal_2_offset;
|
||||
end
|
||||
`INST_SYS:
|
||||
begin
|
||||
// $display("SYS EBREAK %h", (jal_sys_jal && (| in_valid)));
|
||||
temp_jal = jal_sys_jal && (| in_valid);
|
||||
// $display("SYS EBREAK %h", (jal_sys_jal && in_valid));
|
||||
temp_jal = jal_sys_jal && in_valid;
|
||||
temp_jal_offset = jal_sys_off;
|
||||
end
|
||||
default:
|
||||
begin
|
||||
temp_jal = 1'b0 && (| in_valid);
|
||||
temp_jal = 1'b0 && in_valid;
|
||||
temp_jal_offset = 32'hdeadbeef;
|
||||
end
|
||||
endcase
|
||||
|
@ -228,7 +227,7 @@ module VX_decode(
|
|||
case (curr_opcode)
|
||||
`INST_B: begin
|
||||
// $display("BRANCH IN DECODE");
|
||||
temp_branch_stall = 1'b1 && (| in_valid);
|
||||
temp_branch_stall = 1'b1 && in_valid;
|
||||
case (func3)
|
||||
3'h0: temp_branch_type = `BR_EQ;
|
||||
3'h1: temp_branch_type = `BR_NE;
|
||||
|
@ -241,22 +240,22 @@ module VX_decode(
|
|||
end
|
||||
`INST_JAL: begin
|
||||
temp_branch_type = `BR_NO;
|
||||
temp_branch_stall = 1'b1 && (| in_valid);
|
||||
temp_branch_stall = 1'b1 && in_valid;
|
||||
end
|
||||
`INST_JALR: begin
|
||||
temp_branch_type = `BR_NO;
|
||||
temp_branch_stall = 1'b1 && (| in_valid);
|
||||
temp_branch_stall = 1'b1 && in_valid;
|
||||
end
|
||||
default: begin
|
||||
temp_branch_type = `BR_NO;
|
||||
temp_branch_stall = 1'b0 && (| in_valid);
|
||||
temp_branch_stall = 1'b0 && in_valid;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign frE_to_bckE_req_if.branch_type = temp_branch_type;
|
||||
|
||||
assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (| in_valid);
|
||||
assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && in_valid;
|
||||
assign wstall_if.warp_num = in_warp_num;
|
||||
|
||||
always @(*) begin
|
||||
|
|
|
@ -148,12 +148,6 @@
|
|||
// Cache ID
|
||||
`define DCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 0)
|
||||
|
||||
// Core request address bits
|
||||
`define DCORE_ADDR_WIDTH (32-`CLOG2(`DWORD_SIZE))
|
||||
|
||||
// Core request byte enable bits
|
||||
`define DCORE_BYTEEN_WIDTH `DWORD_SIZE
|
||||
|
||||
// TAG sharing enable
|
||||
`define DCORE_TAG_ID_BITS `LOG2UP(`DCREQ_SIZE)
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ module VX_fetch (
|
|||
input wire schedule_delay,
|
||||
input wire icache_stage_delay,
|
||||
input wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
input wire[`NUM_THREADS-1:0] icache_stage_valids,
|
||||
input wire icache_stage_response,
|
||||
output wire busy,
|
||||
VX_jal_rsp_if jal_rsp_if,
|
||||
VX_branch_rsp_if branch_rsp_if,
|
||||
|
@ -52,7 +52,7 @@ module VX_fetch (
|
|||
.wstall_warp_num (wstall_if.warp_num),
|
||||
|
||||
// Lock/release Stuff
|
||||
.icache_stage_valids(icache_stage_valids),
|
||||
.icache_stage_response(icache_stage_response),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
|
||||
// Join
|
||||
|
|
|
@ -31,7 +31,7 @@ module VX_front_end #(
|
|||
wire icache_stage_delay;
|
||||
|
||||
wire[`NW_BITS-1:0] icache_stage_wid;
|
||||
wire[`NUM_THREADS-1:0] icache_stage_valids;
|
||||
wire icache_stage_response;
|
||||
|
||||
VX_wstall_if wstall_if();
|
||||
VX_join_if join_if();
|
||||
|
@ -40,7 +40,7 @@ module VX_front_end #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
.icache_stage_valids(icache_stage_valids),
|
||||
.icache_stage_response(icache_stage_response),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
|
@ -67,7 +67,7 @@ module VX_front_end #(
|
|||
.reset (reset),
|
||||
.total_freeze (total_freeze),
|
||||
.icache_stage_delay (icache_stage_delay),
|
||||
.icache_stage_valids(icache_stage_valids),
|
||||
.icache_stage_response(icache_stage_response),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
.fe_inst_meta_fi (fe_inst_meta_fi2),
|
||||
.fe_inst_meta_id (fe_inst_meta_id),
|
||||
|
|
|
@ -58,8 +58,14 @@ module VX_gpr_wrapper (
|
|||
.a_reg_data (temp_a_reg_data[i]),
|
||||
.b_reg_data (temp_b_reg_data[i])
|
||||
);
|
||||
|
||||
/*always_ff @(posedge clk) begin
|
||||
if (valid_write_request && ((writeback_if.wb != 0))) begin
|
||||
$display("%t: GPR%01d$: wid=%0d, rd=%0d, data=%0h", $time, 0, writeback_if.warp_num, writeback_if.rd, writeback_if.data);
|
||||
end
|
||||
end*/
|
||||
end
|
||||
endgenerate
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ module VX_gpu_inst (
|
|||
VX_warp_ctl_if warp_ctl_if
|
||||
);
|
||||
wire[`NUM_THREADS-1:0] curr_valids = gpu_inst_req_if.valid;
|
||||
wire is_split = (gpu_inst_req_if.is_split);
|
||||
wire is_split = gpu_inst_req_if.is_split;
|
||||
|
||||
wire[`NUM_THREADS-1:0] tmc_new_mask;
|
||||
wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0];
|
||||
|
@ -23,12 +23,12 @@ module VX_gpu_inst (
|
|||
wire valid_inst = (| curr_valids);
|
||||
|
||||
assign warp_ctl_if.warp_num = gpu_inst_req_if.warp_num;
|
||||
assign warp_ctl_if.change_mask = (gpu_inst_req_if.is_tmc) && valid_inst;
|
||||
assign warp_ctl_if.change_mask = gpu_inst_req_if.is_tmc && valid_inst;
|
||||
assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0;
|
||||
|
||||
assign warp_ctl_if.whalt = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0);
|
||||
assign warp_ctl_if.whalt = warp_ctl_if.change_mask && (0 == warp_ctl_if.thread_mask);
|
||||
|
||||
wire wspawn = gpu_inst_req_if.is_wspawn;
|
||||
wire wspawn = gpu_inst_req_if.is_wspawn && valid_inst;
|
||||
wire[31:0] wspawn_pc = gpu_inst_req_if.rd2;
|
||||
wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0];
|
||||
wire[`NUM_WARPS-1:0] wspawn_new_active;
|
||||
|
|
|
@ -8,7 +8,7 @@ module VX_icache_stage #(
|
|||
input wire total_freeze,
|
||||
output wire icache_stage_delay,
|
||||
output wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
output wire[`NUM_THREADS-1:0] icache_stage_valids,
|
||||
output wire icache_stage_response,
|
||||
VX_inst_meta_if fe_inst_meta_fi,
|
||||
VX_inst_meta_if fe_inst_meta_id,
|
||||
|
||||
|
@ -25,28 +25,41 @@ module VX_icache_stage #(
|
|||
wire [`ICORE_TAG_WIDTH-1:0] mem_rsp_tag = icache_rsp_if.core_rsp_tag;
|
||||
`DEBUG_END
|
||||
|
||||
wire [`LOG2UP(`ICREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr;
|
||||
wire [`LOG2UP(`ICREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr, dbg_mrq_write_addr;
|
||||
wire mrq_full;
|
||||
|
||||
wire mrq_push = (| icache_req_if.core_req_valid) && icache_req_if.core_req_ready;
|
||||
wire mrq_pop = (| icache_rsp_if.core_rsp_valid) && icache_rsp_if.core_rsp_ready;
|
||||
wire mrq_push = icache_req_if.core_req_valid && icache_req_if.core_req_ready;
|
||||
wire mrq_pop = icache_rsp_if.core_rsp_valid && icache_rsp_if.core_rsp_ready;
|
||||
|
||||
assign mrq_read_addr = icache_rsp_if.core_rsp_tag[0][`LOG2UP(`ICREQ_SIZE)-1:0];
|
||||
|
||||
VX_indexable_queue #(
|
||||
.DATAW (32 + `NW_BITS),
|
||||
.DATAW (`LOG2UP(`ICREQ_SIZE) + 32 + `NW_BITS),
|
||||
.SIZE (`ICREQ_SIZE)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num}),
|
||||
.write_data ({mrq_write_addr, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num}),
|
||||
.write_addr (mrq_write_addr),
|
||||
.push (mrq_push),
|
||||
.full (mrq_full),
|
||||
.pop (mrq_pop),
|
||||
.read_addr (mrq_read_addr),
|
||||
.read_data ({fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num})
|
||||
);
|
||||
.read_data ({dbg_mrq_write_addr, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num})
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
//--
|
||||
end else begin
|
||||
if (mrq_push) begin
|
||||
valid_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
|
||||
end
|
||||
if (mrq_pop) begin
|
||||
assert(mrq_read_addr == dbg_mrq_write_addr);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Icache Request
|
||||
assign icache_req_if.core_req_valid = valid_inst && ~mrq_full;
|
||||
|
@ -67,22 +80,12 @@ module VX_icache_stage #(
|
|||
assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_data[0];
|
||||
assign fe_inst_meta_id.valid = icache_rsp_if.core_rsp_valid ? valid_threads[fe_inst_meta_id.warp_num] : 0;
|
||||
|
||||
assign icache_stage_response = mrq_pop;
|
||||
assign icache_stage_wid = fe_inst_meta_id.warp_num;
|
||||
assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
|
||||
|
||||
|
||||
// Can't accept new response
|
||||
assign icache_rsp_if.core_rsp_ready = ~total_freeze;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
//--
|
||||
end else begin
|
||||
if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin
|
||||
valid_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef DBG_PRINT_CORE_ICACHE
|
||||
always_ff @(posedge clk) begin
|
||||
if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin
|
||||
|
|
|
@ -75,26 +75,20 @@ module VX_lsu_unit #(
|
|||
wire[2:0] core_rsp_mem_read;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
case (core_req_rw ? use_mem_write[1:0] : use_mem_read[1:0])
|
||||
2'b0: begin
|
||||
case (use_address[i][1:0])
|
||||
1: mem_req_offset[i] = 8;
|
||||
2: mem_req_offset[i] = 16;
|
||||
3: mem_req_offset[i] = 24;
|
||||
default : mem_req_offset[i] = 0;
|
||||
1: mem_req_offset[i] = 8;
|
||||
2: mem_req_offset[i] = 16;
|
||||
3: mem_req_offset[i] = 24;
|
||||
default: mem_req_offset[i] = 0;
|
||||
endcase
|
||||
end
|
||||
2'b1: begin
|
||||
case (use_address[i][1:0])
|
||||
2: mem_req_offset[i] = 16;
|
||||
default : mem_req_offset[i] = 0;
|
||||
endcase
|
||||
mem_req_offset[i] = (2 == use_address[i][1:0]) ? 16 : 0;
|
||||
end
|
||||
default : begin
|
||||
mem_req_offset[i] = 0;
|
||||
end
|
||||
default: mem_req_offset[i] = 0;
|
||||
endcase
|
||||
end
|
||||
|
||||
|
@ -105,7 +99,7 @@ module VX_lsu_unit #(
|
|||
|
||||
reg [`NUM_THREADS-1:0] mem_rsp_mask[`DCREQ_SIZE-1:0];
|
||||
|
||||
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr;
|
||||
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr, dbg_mrq_write_addr;
|
||||
wire mrq_full;
|
||||
|
||||
wire mrq_push = (0 == core_req_rw) && (| dcache_req_if.core_req_valid) && dcache_req_if.core_req_ready;
|
||||
|
@ -117,6 +111,21 @@ module VX_lsu_unit #(
|
|||
|
||||
wire mrq_pop = mrq_pop_part && (0 == mem_rsp_mask_next);
|
||||
|
||||
VX_indexable_queue #(
|
||||
.DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + 2 + (`NUM_THREADS * 5) + `BYTE_EN_BITS + 5 + `NW_BITS),
|
||||
.SIZE (`DCREQ_SIZE)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({mrq_write_addr, use_pc, use_wb, mem_req_offset, use_mem_read, use_rd, use_warp_num}),
|
||||
.write_addr (mrq_write_addr),
|
||||
.push (mrq_push),
|
||||
.full (mrq_full),
|
||||
.pop (mrq_pop),
|
||||
.read_addr (mrq_read_addr),
|
||||
.read_data ({dbg_mrq_write_addr, mem_wb_if.pc, mem_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_if.rd, mem_wb_if.warp_num})
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
//--
|
||||
|
@ -126,25 +135,11 @@ module VX_lsu_unit #(
|
|||
end
|
||||
if (mrq_pop_part) begin
|
||||
mem_rsp_mask[mrq_read_addr] <= mem_rsp_mask_next;
|
||||
assert(mrq_read_addr == dbg_mrq_write_addr);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_indexable_queue #(
|
||||
.DATAW (32 + 2 + (`NUM_THREADS * 5) + `BYTE_EN_BITS + 5 + `NW_BITS),
|
||||
.SIZE (`DCREQ_SIZE)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({use_pc, use_wb, mem_req_offset, use_mem_read, use_rd, use_warp_num}),
|
||||
.write_addr (mrq_write_addr),
|
||||
.push (mrq_push),
|
||||
.full (mrq_full),
|
||||
.pop (mrq_pop),
|
||||
.read_addr (mrq_read_addr),
|
||||
.read_data ({mem_wb_if.pc, mem_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_if.rd, mem_wb_if.warp_num})
|
||||
);
|
||||
|
||||
// Core Request
|
||||
|
||||
assign dcache_req_if.core_req_valid = use_valid & {`NUM_THREADS{~mrq_full}};
|
||||
|
|
|
@ -61,8 +61,10 @@ module VX_scheduler (
|
|||
end
|
||||
end else begin
|
||||
if (valid_wb) begin
|
||||
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
|
||||
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
|
||||
if (0 == valid_wb_new_mask) begin
|
||||
assert(count_valid != 0);
|
||||
count_valid <= count_valid - 1;
|
||||
end
|
||||
end
|
||||
|
|
|
@ -60,7 +60,7 @@ module VX_warp_sched (
|
|||
output wire scheduled_warp,
|
||||
|
||||
input wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
input wire[`NUM_THREADS-1:0] icache_stage_valids
|
||||
input wire icache_stage_response
|
||||
);
|
||||
wire update_use_wspawn;
|
||||
wire update_visible_active;
|
||||
|
@ -209,7 +209,7 @@ module VX_warp_sched (
|
|||
|
||||
// Branch
|
||||
if (branch_valid) begin
|
||||
if (branch_dir) warp_pcs[branch_warp_num] <= branch_dest;
|
||||
if (branch_dir) warp_pcs[branch_warp_num] <= branch_dest;
|
||||
warp_stalled[branch_warp_num] <= 0;
|
||||
end
|
||||
|
||||
|
@ -218,7 +218,7 @@ module VX_warp_sched (
|
|||
warp_lock[warp_num] <= 1'b1;
|
||||
// warp_lock <= {`NUM_WARPS{1'b1}};
|
||||
end
|
||||
if ((| icache_stage_valids) && !stall) begin
|
||||
if (icache_stage_response) begin
|
||||
warp_lock[icache_stage_wid] <= 1'b0;
|
||||
// warp_lock <= {`NUM_WARPS{1'b0}};
|
||||
end
|
||||
|
@ -251,7 +251,7 @@ module VX_warp_sched (
|
|||
|
||||
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
|
||||
|
||||
assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join);
|
||||
assign update_visible_active = (0 == count_visible_active) && !(stall || wstall_this_cycle || hazard || is_join);
|
||||
|
||||
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[split_warp_num]};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, split_save_pc, split_later_mask};
|
||||
|
@ -305,7 +305,7 @@ module VX_warp_sched (
|
|||
|
||||
assign new_pc = warp_pc + 4;
|
||||
|
||||
assign use_active = (count_visible_active < 1) ? (warp_active & (~warp_stalled) & (~total_barrier_stall) & (~warp_lock)) : visible_active;
|
||||
assign use_active = (count_visible_active != 0) ? visible_active : (warp_active & (~warp_stalled) & (~total_barrier_stall) & (~warp_lock));
|
||||
|
||||
// Choosing a warp to schedule
|
||||
VX_priority_encoder #(
|
||||
|
|
|
@ -51,8 +51,8 @@ module Vortex #(
|
|||
// I/O request
|
||||
output wire io_req_valid,
|
||||
output wire io_req_rw,
|
||||
output wire[`DCORE_BYTEEN_WIDTH-1:0] io_req_byteen,
|
||||
output wire[`DCORE_ADDR_WIDTH-1:0] io_req_addr,
|
||||
output wire[3:0] io_req_byteen,
|
||||
output wire[29:0] io_req_addr,
|
||||
output wire[31:0] io_req_data,
|
||||
output wire[`DCORE_TAG_WIDTH-1:0] io_req_tag,
|
||||
input wire io_req_ready,
|
||||
|
|
|
@ -37,8 +37,8 @@ module Vortex_Cluster #(
|
|||
// I/O request
|
||||
output wire io_req_valid,
|
||||
output wire io_req_rw,
|
||||
output wire[`DCORE_BYTEEN_WIDTH-1:0] io_req_byteen,
|
||||
output wire[`DCORE_ADDR_WIDTH-1:0] io_req_addr,
|
||||
output wire[3:0] io_req_byteen,
|
||||
output wire[29:0] io_req_addr,
|
||||
output wire[31:0] io_req_data,
|
||||
output wire[`DCORE_TAG_WIDTH-1:0] io_req_tag,
|
||||
input wire io_req_ready,
|
||||
|
@ -91,8 +91,8 @@ module Vortex_Cluster #(
|
|||
`IGNORE_WARNINGS_BEGIN
|
||||
wire[`NUM_CORES-1:0] per_core_io_req_valid;
|
||||
wire[`NUM_CORES-1:0] per_core_io_req_rw;
|
||||
wire[`NUM_CORES-1:0][`DCORE_BYTEEN_WIDTH-1:0] per_core_io_req_byteen;
|
||||
wire[`NUM_CORES-1:0][`DCORE_ADDR_WIDTH-1:0] per_core_io_req_addr;
|
||||
wire[`NUM_CORES-1:0][3:0] per_core_io_req_byteen;
|
||||
wire[`NUM_CORES-1:0][29:0] per_core_io_req_addr;
|
||||
wire[`NUM_CORES-1:0][31:0] per_core_io_req_data;
|
||||
wire[`NUM_CORES-1:0][`DCORE_TAG_WIDTH-1:0] per_core_io_req_tag;
|
||||
|
||||
|
|
|
@ -35,8 +35,8 @@ module Vortex_Socket (
|
|||
// I/O request
|
||||
output wire io_req_valid,
|
||||
output wire io_req_rw,
|
||||
output wire[`DCORE_BYTEEN_WIDTH-1:0] io_req_byteen,
|
||||
output wire[`DCORE_ADDR_WIDTH-1:0] io_req_addr,
|
||||
output wire[3:0] io_req_byteen,
|
||||
output wire[29:0] io_req_addr,
|
||||
output wire[31:0] io_req_data,
|
||||
output wire[`DCORE_TAG_WIDTH-1:0] io_req_tag,
|
||||
input wire io_req_ready,
|
||||
|
@ -125,8 +125,8 @@ module Vortex_Socket (
|
|||
`IGNORE_WARNINGS_BEGIN
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_valid;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_rw;
|
||||
wire[`NUM_CLUSTERS-1:0][`DCORE_BYTEEN_WIDTH-1:0] per_cluster_io_req_byteen;
|
||||
wire[`NUM_CLUSTERS-1:0][`DCORE_ADDR_WIDTH-1:0] per_cluster_io_req_addr;
|
||||
wire[`NUM_CLUSTERS-1:0][3:0] per_cluster_io_req_byteen;
|
||||
wire[`NUM_CLUSTERS-1:0][29:0] per_cluster_io_req_addr;
|
||||
wire[`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_data;
|
||||
wire[`NUM_CLUSTERS-1:0][`DCORE_TAG_WIDTH-1:0] per_cluster_io_req_tag;
|
||||
|
||||
|
|
90
hw/rtl/cache/VX_snp_forwarder.v
vendored
90
hw/rtl/cache/VX_snp_forwarder.v
vendored
|
@ -33,67 +33,65 @@ module VX_snp_forwarder #(
|
|||
input wire [NUM_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdin_tag,
|
||||
output wire [NUM_REQUESTS-1:0] snp_fwdin_ready
|
||||
);
|
||||
reg [`DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH-1:0] pending_reqs [SNRQ_SIZE-1:0];
|
||||
reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
|
||||
reg [`LOG2UP(SNRQ_SIZE):0] rd_ptr, wr_ptr;
|
||||
reg [`REQS_BITS-1:0] fwdin_sel;
|
||||
|
||||
wire [`LOG2UP(SNRQ_SIZE)-1:0] rd_a, wr_a;
|
||||
|
||||
wire enqueue, dequeue, empty, full;
|
||||
|
||||
wire fwdout_ready;
|
||||
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr, dbg_sfq_write_addr;
|
||||
wire sfq_push, sfq_pop, sfq_full;
|
||||
|
||||
wire fwdin_valid;
|
||||
wire [SNP_FWD_TAG_WIDTH-1:0] fwdin_tag;
|
||||
wire fwdin_ready;
|
||||
wire fwdin_taken;
|
||||
|
||||
assign fwdout_ready = (& snp_fwdout_ready);
|
||||
wire fwdin_ready = snp_rsp_ready;
|
||||
wire fwdin_taken = fwdin_valid && fwdin_ready;
|
||||
|
||||
assign snp_req_ready = !full && fwdout_ready;
|
||||
wire fwdout_ready = (& snp_fwdout_ready);
|
||||
|
||||
assign rd_a = rd_ptr[`LOG2UP(SNRQ_SIZE)-1:0];
|
||||
assign wr_a = wr_ptr[`LOG2UP(SNRQ_SIZE)-1:0];
|
||||
assign snp_rsp_valid = fwdin_taken && (1 == pending_cntrs[sfq_read_addr]); // send response
|
||||
|
||||
assign sfq_read_addr = fwdin_tag[`LOG2UP(SNRQ_SIZE)-1:0];
|
||||
|
||||
assign sfq_push = snp_req_valid && fwdout_ready;
|
||||
assign sfq_pop = snp_rsp_valid;
|
||||
|
||||
VX_indexable_queue #(
|
||||
.DATAW (`LOG2UP(SNRQ_SIZE) + `DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH),
|
||||
.SIZE (SNRQ_SIZE)
|
||||
) snp_fwd_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({sfq_write_addr, snp_req_addr, snp_req_tag}),
|
||||
.write_addr (sfq_write_addr),
|
||||
.push (sfq_push),
|
||||
.full (sfq_full),
|
||||
.pop (sfq_pop),
|
||||
.read_addr (sfq_read_addr),
|
||||
.read_data ({dbg_sfq_write_addr, snp_rsp_addr, snp_rsp_tag})
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
//--
|
||||
end else begin
|
||||
if (sfq_push) begin
|
||||
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS;
|
||||
end
|
||||
if (fwdin_taken) begin
|
||||
pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1;
|
||||
assert(sfq_read_addr == dbg_sfq_write_addr);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign snp_fwdout_valid[i] = enqueue && fwdout_ready;
|
||||
assign snp_fwdout_valid[i] = snp_req_valid && !sfq_full;
|
||||
assign snp_fwdout_addr[i] = snp_req_addr;
|
||||
assign snp_fwdout_tag[i] = wr_a;
|
||||
assign snp_fwdout_tag[i] = sfq_write_addr;
|
||||
end
|
||||
|
||||
assign fwdin_ready = snp_rsp_ready;
|
||||
assign fwdin_taken = fwdin_valid && fwdin_ready;
|
||||
|
||||
assign snp_rsp_valid = fwdin_taken && (1 == pending_cntrs[fwdin_tag]); // send response
|
||||
assign {snp_rsp_addr, snp_rsp_tag} = pending_reqs[fwdin_tag];
|
||||
|
||||
assign empty = (wr_ptr == rd_ptr);
|
||||
assign full = (wr_a == rd_a) && (wr_ptr[`LOG2UP(SNRQ_SIZE)] != rd_ptr[`LOG2UP(SNRQ_SIZE)]);
|
||||
|
||||
assign enqueue = snp_req_valid && snp_req_ready;
|
||||
assign dequeue = !empty && (0 == pending_cntrs[rd_a]);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr <= 0;
|
||||
wr_ptr <= 0;
|
||||
end else begin
|
||||
if (enqueue) begin
|
||||
pending_reqs[wr_a] <= {snp_req_addr, snp_req_tag};
|
||||
pending_cntrs[wr_a] <= NUM_REQUESTS;
|
||||
wr_ptr <= wr_ptr + 1;
|
||||
end
|
||||
if (dequeue) begin
|
||||
rd_ptr <= rd_ptr + 1;
|
||||
end
|
||||
if (fwdin_taken) begin
|
||||
pending_cntrs[fwdin_tag] <= pending_cntrs[fwdin_tag] - 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
assign snp_req_ready = !sfq_full && fwdout_ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -104,7 +102,7 @@ module VX_snp_forwarder #(
|
|||
end
|
||||
|
||||
assign fwdin_valid = snp_fwdin_valid[fwdin_sel];
|
||||
assign fwdin_tag = snp_fwdin_tag[fwdin_sel];
|
||||
assign fwdin_tag = snp_fwdin_tag[fwdin_sel];
|
||||
|
||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i));
|
||||
|
|
|
@ -204,7 +204,7 @@ void Simulator::dbus_driver() {
|
|||
void Simulator::io_driver() {
|
||||
if (vortex_->io_req_valid
|
||||
&& vortex_->io_req_rw
|
||||
&& vortex_->io_req_addr == IO_BUS_ADDR_COUT) {
|
||||
&& ((vortex_->io_req_addr << 2) == IO_BUS_ADDR_COUT)) {
|
||||
uint32_t data_write = (uint32_t)vortex_->io_req_data;
|
||||
char c = (char)data_write;
|
||||
std::cout << c;
|
||||
|
|
|
@ -23,7 +23,7 @@ VX_MAIN = vx_simple_main
|
|||
|
||||
VX_SRCS = vx_simple_main.c tests.c
|
||||
|
||||
all: HEX DUMP ELF
|
||||
all: HEX DUMP ELF BIN
|
||||
|
||||
DUMP: ELF
|
||||
$(DMP) -D $(VX_MAIN).elf > $(VX_MAIN).dump
|
||||
|
@ -31,5 +31,8 @@ DUMP: ELF
|
|||
HEX: ELF
|
||||
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
|
||||
|
||||
BIN: ELF
|
||||
$(CPY) -O binary $(VX_MAIN).elf $(VX_MAIN).bin
|
||||
|
||||
ELF:
|
||||
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) $(LIBS) -Iinclude -o $(VX_MAIN).elf
|
|
@ -50,7 +50,7 @@ void mat_add_kernel(void * void_arguments)
|
|||
|
||||
int main()
|
||||
{
|
||||
/*// ensure single thread
|
||||
// ensure single thread
|
||||
vx_tmc(1);
|
||||
|
||||
vx_print_str("Let's start... (This might take a while)\n");
|
||||
|
@ -87,7 +87,7 @@ int main()
|
|||
vx_print_str("Simple Main\n");
|
||||
|
||||
// TMC test
|
||||
test_tmc();*/
|
||||
test_tmc();
|
||||
|
||||
// Control Divergence Test
|
||||
vx_print_str("test_divergence\n");
|
||||
|
@ -95,7 +95,6 @@ int main()
|
|||
test_divergence();
|
||||
vx_tmc(1);
|
||||
|
||||
/*
|
||||
// Test wspawn
|
||||
vx_print_str("test_wspawn\n");
|
||||
test_wsapwn();
|
||||
|
@ -143,7 +142,7 @@ int main()
|
|||
vx_print_str(" ");
|
||||
}
|
||||
vx_print_str("\n");
|
||||
}*/
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue