mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
pipeline refactoring
This commit is contained in:
parent
e2100e9e87
commit
dc7efbcfb4
31 changed files with 1437 additions and 6038 deletions
|
@ -89,7 +89,7 @@ module VX_alu_unit #(
|
|||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + 1 + 32)
|
||||
) rsp_reg (
|
||||
) branch_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_pipe #(
|
||||
module VX_csr_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
|
@ -235,10 +235,10 @@ module VX_decode #(
|
|||
is_ltype ? `WB_MEM :
|
||||
`WB_NO;
|
||||
|
||||
assign join_if.is_join = is_gpu && (gpu_op == `GPU_JOIN) && in_valid;
|
||||
assign join_if.is_join = in_valid && is_gpu && (gpu_op == `GPU_JOIN);
|
||||
assign join_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
|
||||
assign wstall_if.wstall = (is_br || is_gpu) && in_valid;
|
||||
assign wstall_if.wstall = in_valid && (is_btype || is_jal || is_jalr || (is_gpu && (gpu_op == `GPU_TMC || gpu_op == `GPU_SPLIT || gpu_op == `GPU_BAR)));
|
||||
assign wstall_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
|
||||
wire stall = ~decode_if.ready && (| decode_if.valid);
|
||||
|
|
|
@ -61,9 +61,9 @@ module VX_execute #(
|
|||
.lsu_commit_if (lsu_commit_if)
|
||||
);
|
||||
|
||||
VX_csr_pipe #(
|
||||
VX_csr_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) csr_pipe (
|
||||
) csr_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.perf_cntrs_if (perf_cntrs_if),
|
||||
|
|
|
@ -1,77 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr_mux (
|
||||
// inputs
|
||||
VX_execute_if execute_if,
|
||||
input wire [`NUM_THREADS-1:0][31:0] rs1_data,
|
||||
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0] is_alu = {`NUM_THREADS{execute_if.ex_type == `EX_ALU}};
|
||||
wire[`NUM_THREADS-1:0] is_lsu = {`NUM_THREADS{execute_if.ex_type == `EX_LSU}};
|
||||
wire[`NUM_THREADS-1:0] is_csr = {`NUM_THREADS{execute_if.ex_type == `EX_CSR}};
|
||||
wire[`NUM_THREADS-1:0] is_mul = {`NUM_THREADS{execute_if.ex_type == `EX_MUL}};
|
||||
wire[`NUM_THREADS-1:0] is_gpu = {`NUM_THREADS{execute_if.ex_type == `EX_GPU}};
|
||||
|
||||
// ALU unit
|
||||
assign alu_req_if.valid = execute_if.valid & is_alu;
|
||||
assign alu_req_if.warp_num = execute_if.warp_num;
|
||||
assign alu_req_if.curr_PC = execute_if.curr_PC;
|
||||
assign alu_req_if.alu_op = `ALU_OP(execute_if.instr_op);
|
||||
assign alu_req_if.rd = execute_if.rd;
|
||||
assign alu_req_if.wb = execute_if.wb;
|
||||
assign alu_req_if.rs1_data = rs1_data;
|
||||
assign alu_req_if.rs2_data = rs2_data;
|
||||
assign alu_req_if.offset = execute_if.imm;
|
||||
assign alu_req_if.next_PC = execute_if.next_PC;
|
||||
|
||||
// LSU unit
|
||||
assign lsu_req_if.valid = execute_if.valid & is_lsu;
|
||||
assign lsu_req_if.warp_num = execute_if.warp_num;
|
||||
assign lsu_req_if.curr_PC = execute_if.curr_PC;
|
||||
assign lsu_req_if.base_addr = rs1_data;
|
||||
assign lsu_req_if.store_data = rs2_data;
|
||||
assign lsu_req_if.offset = execute_if.imm;
|
||||
assign lsu_req_if.rw = `LSU_RW(execute_if.instr_op);
|
||||
assign lsu_req_if.byteen = `LSU_BE(execute_if.instr_op);
|
||||
assign lsu_req_if.rd = execute_if.rd;
|
||||
assign lsu_req_if.wb = execute_if.wb;
|
||||
|
||||
// CSR unit
|
||||
assign csr_req_if.valid = execute_if.valid & is_csr;
|
||||
assign csr_req_if.warp_num = execute_if.warp_num;
|
||||
assign csr_req_if.curr_PC = execute_if.curr_PC;
|
||||
assign csr_req_if.csr_op = `CSR_OP(execute_if.instr_op);
|
||||
assign csr_req_if.csr_addr = execute_if.imm[`CSR_ADDR_SIZE-1:0];
|
||||
assign csr_req_if.csr_mask = execute_if.rs2_is_imm ? 32'(execute_if.rs1) : rs1_data[0];
|
||||
assign csr_req_if.rd = execute_if.rd;
|
||||
assign csr_req_if.wb = execute_if.wb;
|
||||
assign csr_req_if.is_io = 1'b0;
|
||||
|
||||
// MUL unit
|
||||
assign mul_req_if.valid = execute_if.valid & is_mul;
|
||||
assign mul_req_if.warp_num = execute_if.warp_num;
|
||||
assign mul_req_if.curr_PC = execute_if.curr_PC;
|
||||
assign mul_req_if.mul_op = `MUL_OP(execute_if.instr_op);
|
||||
assign mul_req_if.rs1_data = rs1_data;
|
||||
assign mul_req_if.rs2_data = rs2_data;
|
||||
assign mul_req_if.rd = execute_if.rd;
|
||||
assign mul_req_if.wb = execute_if.wb;
|
||||
|
||||
// GPU unit
|
||||
assign gpu_req_if.valid = execute_if.valid & is_gpu;
|
||||
assign gpu_req_if.warp_num = execute_if.warp_num;
|
||||
assign gpu_req_if.curr_PC = execute_if.curr_PC;
|
||||
assign gpu_req_if.gpu_op = `GPU_OP(execute_if.instr_op);
|
||||
assign gpu_req_if.rs1_data = rs1_data;
|
||||
assign gpu_req_if.rs2_data = rs2_data[0];
|
||||
assign gpu_req_if.next_PC = execute_if.next_PC;
|
||||
|
||||
endmodule
|
|
@ -36,6 +36,7 @@ module VX_gpr_ram (
|
|||
end
|
||||
end
|
||||
assert(~(|we) || (waddr != 0)); // ensure r0 is never written!
|
||||
assert(0 == ram[0]);
|
||||
end
|
||||
|
||||
assign rs1_data = ram[rs1];
|
||||
|
|
|
@ -4,23 +4,16 @@ module VX_gpr_stage #(
|
|||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_wb_if writeback_if,
|
||||
VX_execute_if execute_if,
|
||||
VX_decode_if decode_if,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
VX_gpr_data_if gpr_data_if
|
||||
);
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data_all [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data_all [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_PC;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_imm;
|
||||
wire [`NUM_THREADS-1:0] we [`NUM_WARPS-1:0];
|
||||
|
@ -28,128 +21,27 @@ module VX_gpr_stage #(
|
|||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign rs1_PC[i] = execute_if.curr_PC;
|
||||
assign rs2_imm[i] = execute_if.imm;
|
||||
assign rs1_PC[i] = decode_if.curr_PC;
|
||||
assign rs2_imm[i] = decode_if.imm;
|
||||
end
|
||||
|
||||
assign rs1_data = execute_if.rs1_is_PC ? rs1_PC : rs1_data_all[execute_if.warp_num];
|
||||
assign rs2_data = execute_if.rs2_is_imm ? rs2_imm : rs2_data_all[execute_if.warp_num];
|
||||
|
||||
generate
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign we[i] = writeback_if.valid & {`NUM_THREADS{(i == writeback_if.warp_num)}};
|
||||
VX_gpr_ram gpr_ram (
|
||||
.clk (clk),
|
||||
.we (we[i]),
|
||||
.waddr (writeback_if.rd),
|
||||
.wdata (writeback_if.data),
|
||||
.rs1 (execute_if.rs1),
|
||||
.rs2 (execute_if.rs2),
|
||||
.rs1_data (rs1_data_all[i]),
|
||||
.rs2_data (rs2_data_all[i])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
VX_alu_req_if alu_req_tmp_if();
|
||||
VX_lsu_req_if lsu_req_tmp_if();
|
||||
VX_csr_req_if csr_req_tmp_if();
|
||||
VX_mul_req_if mul_req_tmp_if();
|
||||
VX_gpu_req_if gpu_req_tmp_if();
|
||||
|
||||
VX_gpr_mux gpr_mux (
|
||||
.execute_if (execute_if),
|
||||
.rs1_data (rs1_data),
|
||||
.rs2_data (rs2_data),
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_tmp_if),
|
||||
.csr_req_if (csr_req_tmp_if),
|
||||
.mul_req_if (mul_req_tmp_if),
|
||||
.gpu_req_if (gpu_req_tmp_if)
|
||||
);
|
||||
|
||||
wire stall_alu = ~alu_req_if.ready && (| alu_req_if.valid);
|
||||
wire stall_lsu = ~lsu_req_if.ready && (| lsu_req_if.valid);
|
||||
wire stall_csr = ~csr_req_if.ready && (| csr_req_if.valid);
|
||||
wire stall_mul = ~mul_req_if.ready && (| mul_req_if.valid);
|
||||
wire stall_gpu = ~gpu_req_if.ready && (| gpu_req_if.valid);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `NR_BITS + `WB_BITS)
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_alu),
|
||||
.flush (0),
|
||||
.in ({alu_req_tmp_if.valid, alu_req_tmp_if.warp_num, alu_req_tmp_if.curr_PC, alu_req_tmp_if.alu_op, alu_req_tmp_if.rs1_data, alu_req_tmp_if.rs2_data, alu_req_tmp_if.rd, alu_req_tmp_if.wb}),
|
||||
.out ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.alu_op, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.rd, alu_req_if.wb})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 1 + `BYTEEN_BITS + `NR_BITS + `WB_BITS)
|
||||
) lsu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_lsu),
|
||||
.flush (0),
|
||||
.in ({lsu_req_tmp_if.valid, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.store_data, lsu_req_tmp_if.offset, lsu_req_tmp_if.rw, lsu_req_tmp_if.byteen, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb}),
|
||||
.out ({lsu_req_if.valid, lsu_req_if.warp_num, lsu_req_if.curr_PC, lsu_req_if.base_addr, lsu_req_if.store_data, lsu_req_if.offset, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.rd, lsu_req_if.wb})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + `CSR_ADDR_SIZE + 32 + 1 + `NR_BITS + `WB_BITS)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_csr),
|
||||
.flush (0),
|
||||
.in ({csr_req_tmp_if.valid, csr_req_tmp_if.warp_num, csr_req_tmp_if.curr_PC, csr_req_tmp_if.csr_op, csr_req_tmp_if.csr_addr, csr_req_tmp_if.csr_mask, csr_req_tmp_if.is_io, csr_req_tmp_if.rd, csr_req_tmp_if.wb}),
|
||||
.out ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.csr_op, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io, csr_req_if.rd, csr_req_if.wb})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `NR_BITS + `WB_BITS)
|
||||
) mul_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_mul),
|
||||
.flush (0),
|
||||
.in ({mul_req_tmp_if.valid, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data, mul_req_tmp_if.rd, mul_req_tmp_if.wb}),
|
||||
.out ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.mul_op, mul_req_if.rs1_data, mul_req_if.rs2_data, mul_req_if.rd, mul_req_if.wb})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `GPU_BITS + (`NUM_THREADS * 32) + 32)
|
||||
) gpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_gpu),
|
||||
.flush (0),
|
||||
.in ({gpu_req_tmp_if.valid, gpu_req_tmp_if.warp_num, gpu_req_tmp_if.next_PC, gpu_req_tmp_if.gpu_op, gpu_req_tmp_if.rs1_data, gpu_req_tmp_if.rs2_data}),
|
||||
.out ({gpu_req_if.valid, gpu_req_if.warp_num, gpu_req_if.next_PC, gpu_req_if.gpu_op, gpu_req_if.rs1_data, gpu_req_if.rs2_data})
|
||||
);
|
||||
|
||||
assign execute_if.alu_ready = ~stall_alu;
|
||||
assign execute_if.lsu_ready = ~stall_lsu;
|
||||
assign execute_if.csr_ready = ~stall_csr;
|
||||
assign execute_if.mul_ready = ~stall_mul;
|
||||
assign execute_if.gpu_ready = ~stall_gpu;
|
||||
assign gpr_data_if.rs1_data = decode_if.rs1_is_PC ? rs1_PC : rs1_data_all[decode_if.warp_num];
|
||||
assign gpr_data_if.rs2_data = decode_if.rs2_is_imm ? rs2_imm : rs2_data_all[decode_if.warp_num];
|
||||
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign we[i] = writeback_if.valid & {`NUM_THREADS{(i == writeback_if.warp_num)}};
|
||||
VX_gpr_ram gpr_ram (
|
||||
.clk (clk),
|
||||
.we (we[i]),
|
||||
.waddr (writeback_if.rd),
|
||||
.wdata (writeback_if.data),
|
||||
.rs1 (decode_if.rs1),
|
||||
.rs2 (decode_if.rs2),
|
||||
.rs1_data (rs1_data_all[i]),
|
||||
.rs2_data (rs2_data_all[i])
|
||||
);
|
||||
end
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if ((| execute_if.valid)) begin
|
||||
$display("%t: Core%0d-GPR: warp=%0d, PC=%0h, a=%0h, b=%0h", $time, CORE_ID, execute_if.warp_num, execute_if.curr_PC, rs1_data, rs2_data);
|
||||
|
||||
// scheduler ensures the destination execute unit is ready (garanteed by the scheduler)
|
||||
assert((execute_if.ex_type != `EX_ALU) || alu_req_if.ready);
|
||||
assert((execute_if.ex_type != `EX_LSU) || lsu_req_if.ready);
|
||||
assert((execute_if.ex_type != `EX_CSR) || csr_req_if.ready);
|
||||
assert((execute_if.ex_type != `EX_MUL) || mul_req_if.ready);
|
||||
assert((execute_if.ex_type != `EX_GPU) || gpu_req_if.ready);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -16,12 +16,11 @@ module VX_gpu_unit #(
|
|||
wire is_split = (gpu_req_if.gpu_op == `GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.gpu_op == `GPU_BAR);
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
||||
wire all_threads = `NUM_THREADS < gpu_req_if.rs1_data[0];
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin : tmc_new_mask_init
|
||||
assign tmc_new_mask[i] = all_threads ? 1 : i < gpu_req_if.rs1_data[0];
|
||||
assign tmc_new_mask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
end
|
||||
|
||||
wire valid_inst = (| curr_valids);
|
||||
|
@ -35,11 +34,10 @@ module VX_gpu_unit #(
|
|||
|
||||
wire wspawn = is_wspawn && valid_inst;
|
||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
|
||||
wire all_active = `NUM_WARPS < gpu_req_if.rs1_data[0];
|
||||
wire [`NUM_WARPS-1:0] wspawn_new_active;
|
||||
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin : wspawn_new_active_init
|
||||
assign wspawn_new_active[i] = all_active ? 1 : i < gpu_req_if.rs1_data[0];
|
||||
assign wspawn_new_active[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
end
|
||||
|
||||
assign warp_ctl_if.is_barrier = is_bar && valid_inst;
|
||||
|
@ -75,12 +73,14 @@ module VX_gpu_unit #(
|
|||
assign warp_ctl_if.split_later_mask = split_new_later_mask;
|
||||
assign warp_ctl_if.split_save_pc = gpu_req_if.next_PC;
|
||||
|
||||
assign gpu_req_if.ready = 1'b1; // has no stalls
|
||||
assign gpu_req_if.ready = gpu_commit_if.ready;
|
||||
|
||||
// commit
|
||||
assign gpu_commit_if.valid = gpu_req_if.valid;
|
||||
assign gpu_commit_if.warp_num = gpu_req_if.warp_num;
|
||||
assign gpu_commit_if.curr_PC = gpu_req_if.curr_PC;
|
||||
assign gpu_commit_if.wb = `WB_NO;
|
||||
assign gpu_commit_if.rd = 0;
|
||||
assign gpu_commit_if.data = 0;
|
||||
|
||||
endmodule
|
|
@ -15,7 +15,14 @@ module VX_issue #(
|
|||
VX_mul_req_if mul_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
VX_execute_if execute_if();
|
||||
VX_gpr_data_if gpr_data_if();
|
||||
wire schedule_delay;
|
||||
|
||||
wire alu_busy = ~alu_req_if.ready/* && (| alu_req_if.valid)*/;
|
||||
wire lsu_busy = ~lsu_req_if.ready/* && (| lsu_req_if.valid)*/;
|
||||
wire csr_busy = ~csr_req_if.ready/* && (| csr_req_if.valid)*/;
|
||||
wire mul_busy = ~mul_req_if.ready/* && (| mul_req_if.valid)*/;
|
||||
wire gpu_busy = ~gpu_req_if.ready/* && (| gpu_req_if.valid)*/;
|
||||
|
||||
VX_scheduler #(
|
||||
.CORE_ID(CORE_ID)
|
||||
|
@ -23,25 +30,126 @@ module VX_issue #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.execute_if (execute_if),
|
||||
.writeback_if (writeback_if),
|
||||
.alu_busy (alu_busy),
|
||||
.lsu_busy (lsu_busy),
|
||||
.csr_busy (csr_busy),
|
||||
.mul_busy (mul_busy),
|
||||
.gpu_busy (gpu_busy),
|
||||
.schedule_delay (schedule_delay),
|
||||
`UNUSED_PIN (is_empty)
|
||||
);
|
||||
|
||||
VX_gpr_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) gpr_stage (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.execute_if (execute_if),
|
||||
.clk (clk),
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
.gpr_data_if (gpr_data_if)
|
||||
);
|
||||
|
||||
VX_alu_req_if alu_req_tmp_if();
|
||||
VX_lsu_req_if lsu_req_tmp_if();
|
||||
VX_csr_req_if csr_req_tmp_if();
|
||||
VX_mul_req_if mul_req_tmp_if();
|
||||
VX_gpu_req_if gpu_req_tmp_if();
|
||||
|
||||
VX_issue_mux issue_mux (
|
||||
.decode_if (decode_if),
|
||||
.gpr_data_if (gpr_data_if),
|
||||
.alu_req_if (alu_req_tmp_if),
|
||||
.lsu_req_if (lsu_req_tmp_if),
|
||||
.csr_req_if (csr_req_tmp_if),
|
||||
.mul_req_if (mul_req_tmp_if),
|
||||
.gpu_req_if (gpu_req_tmp_if)
|
||||
);
|
||||
|
||||
wire stall_alu = ~alu_req_if.ready || schedule_delay;
|
||||
wire stall_lsu = ~lsu_req_if.ready || schedule_delay;
|
||||
wire stall_csr = ~csr_req_if.ready || schedule_delay;
|
||||
wire stall_mul = ~mul_req_if.ready || schedule_delay;
|
||||
wire stall_gpu = ~gpu_req_if.ready || schedule_delay;
|
||||
|
||||
wire flush_alu = alu_req_if.ready && schedule_delay;
|
||||
wire flush_lsu = lsu_req_if.ready && schedule_delay;
|
||||
wire flush_csr = csr_req_if.ready && schedule_delay;
|
||||
wire flush_mul = mul_req_if.ready && schedule_delay;
|
||||
wire flush_gpu = gpu_req_if.ready && schedule_delay;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_alu),
|
||||
.flush (flush_alu),
|
||||
.in ({alu_req_tmp_if.valid, alu_req_tmp_if.warp_num, alu_req_tmp_if.curr_PC, alu_req_tmp_if.alu_op, alu_req_tmp_if.wb, alu_req_tmp_if.rd, alu_req_tmp_if.rs1_data, alu_req_tmp_if.rs2_data, alu_req_tmp_if.offset, alu_req_tmp_if.next_PC}),
|
||||
.out ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.alu_op, alu_req_if.wb, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + 1 + `BYTEEN_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32)
|
||||
) lsu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_lsu),
|
||||
.flush (flush_lsu),
|
||||
.in ({lsu_req_tmp_if.valid, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.rw, lsu_req_tmp_if.byteen, lsu_req_tmp_if.wb, lsu_req_tmp_if.rd, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset, lsu_req_tmp_if.store_data}),
|
||||
.out ({lsu_req_if.valid, lsu_req_if.warp_num, lsu_req_if.curr_PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.wb, lsu_req_if.rd, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + `WB_BITS + `NR_BITS + `CSR_ADDR_SIZE + 32 + 1)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_csr),
|
||||
.flush (flush_csr),
|
||||
.in ({csr_req_tmp_if.valid, csr_req_tmp_if.warp_num, csr_req_tmp_if.curr_PC, csr_req_tmp_if.csr_op, csr_req_tmp_if.wb, csr_req_tmp_if.rd, csr_req_tmp_if.csr_addr, csr_req_tmp_if.csr_mask, csr_req_tmp_if.is_io}),
|
||||
.out ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.csr_op, csr_req_if.wb, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||
) mul_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_mul),
|
||||
.flush (flush_mul),
|
||||
.in ({mul_req_tmp_if.valid, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.wb, mul_req_tmp_if.rd, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data}),
|
||||
.out ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.mul_op, mul_req_if.wb, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `GPU_BITS + (`NUM_THREADS * 32) + 32 + 32)
|
||||
) gpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_gpu),
|
||||
.flush (flush_gpu),
|
||||
.in ({gpu_req_tmp_if.valid, gpu_req_tmp_if.warp_num, gpu_req_tmp_if.curr_PC, gpu_req_tmp_if.gpu_op, gpu_req_tmp_if.rs1_data, gpu_req_tmp_if.rs2_data, gpu_req_tmp_if.next_PC}),
|
||||
.out ({gpu_req_if.valid, gpu_req_if.warp_num, gpu_req_if.curr_PC, gpu_req_if.gpu_op, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.next_PC})
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if ((| alu_req_tmp_if.valid) && ~stall_alu) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, alu_req_tmp_if.warp_num, alu_req_tmp_if.curr_PC, alu_req_tmp_if.alu_op, alu_req_tmp_if.wb, alu_req_tmp_if.rd, alu_req_tmp_if.rs1_data, alu_req_tmp_if.rs2_data, alu_req_tmp_if.offset, alu_req_tmp_if.next_PC);
|
||||
end
|
||||
if ((| mul_req_tmp_if.valid) && ~stall_mul) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.wb, mul_req_tmp_if.rd, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data);
|
||||
end
|
||||
if ((| lsu_req_tmp_if.valid) && ~stall_lsu) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, rw=%b, wb=%0d, rd=%0d, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.rw, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb, lsu_req_tmp_if.byteen, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset);
|
||||
end
|
||||
if ((| csr_req_tmp_if.valid) && ~stall_csr) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, op=%0d, wb=%d, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_tmp_if.warp_num, csr_req_tmp_if.curr_PC, csr_req_tmp_if.csr_op, csr_req_tmp_if.wb, csr_req_tmp_if.rd, csr_req_tmp_if.csr_addr, csr_req_tmp_if.csr_mask);
|
||||
end
|
||||
if ((| gpu_req_tmp_if.valid) && ~stall_gpu) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=GPU, op=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, gpu_req_tmp_if.warp_num, gpu_req_tmp_if.curr_PC, gpu_req_tmp_if.gpu_op, gpu_req_tmp_if.rs1_data, gpu_req_tmp_if.rs2_data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
76
hw/rtl/VX_issue_mux.v
Normal file
76
hw/rtl/VX_issue_mux.v
Normal file
|
@ -0,0 +1,76 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue_mux (
|
||||
// inputs
|
||||
VX_decode_if decode_if,
|
||||
VX_gpr_data_if gpr_data_if,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0] is_alu = {`NUM_THREADS{decode_if.ex_type == `EX_ALU}};
|
||||
wire[`NUM_THREADS-1:0] is_lsu = {`NUM_THREADS{decode_if.ex_type == `EX_LSU}};
|
||||
wire[`NUM_THREADS-1:0] is_csr = {`NUM_THREADS{decode_if.ex_type == `EX_CSR}};
|
||||
wire[`NUM_THREADS-1:0] is_mul = {`NUM_THREADS{decode_if.ex_type == `EX_MUL}};
|
||||
wire[`NUM_THREADS-1:0] is_gpu = {`NUM_THREADS{decode_if.ex_type == `EX_GPU}};
|
||||
|
||||
// ALU unit
|
||||
assign alu_req_if.valid = decode_if.valid & is_alu;
|
||||
assign alu_req_if.warp_num = decode_if.warp_num;
|
||||
assign alu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign alu_req_if.alu_op = `ALU_OP(decode_if.instr_op);
|
||||
assign alu_req_if.rd = decode_if.rd;
|
||||
assign alu_req_if.wb = decode_if.wb;
|
||||
assign alu_req_if.rs1_data = gpr_data_if.rs1_data;
|
||||
assign alu_req_if.rs2_data = gpr_data_if.rs2_data;
|
||||
assign alu_req_if.offset = decode_if.imm;
|
||||
assign alu_req_if.next_PC = decode_if.next_PC;
|
||||
|
||||
// LSU unit
|
||||
assign lsu_req_if.valid = decode_if.valid & is_lsu;
|
||||
assign lsu_req_if.warp_num = decode_if.warp_num;
|
||||
assign lsu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign lsu_req_if.base_addr = gpr_data_if.rs1_data;
|
||||
assign lsu_req_if.store_data = gpr_data_if.rs2_data;
|
||||
assign lsu_req_if.offset = decode_if.imm;
|
||||
assign lsu_req_if.rw = `LSU_RW(decode_if.instr_op);
|
||||
assign lsu_req_if.byteen = `LSU_BE(decode_if.instr_op);
|
||||
assign lsu_req_if.rd = decode_if.rd;
|
||||
assign lsu_req_if.wb = decode_if.wb;
|
||||
|
||||
// CSR unit
|
||||
assign csr_req_if.valid = decode_if.valid & is_csr;
|
||||
assign csr_req_if.warp_num = decode_if.warp_num;
|
||||
assign csr_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign csr_req_if.csr_op = `CSR_OP(decode_if.instr_op);
|
||||
assign csr_req_if.csr_addr = decode_if.imm[`CSR_ADDR_SIZE-1:0];
|
||||
assign csr_req_if.csr_mask = decode_if.rs2_is_imm ? 32'(decode_if.rs1) : gpr_data_if.rs1_data[0];
|
||||
assign csr_req_if.rd = decode_if.rd;
|
||||
assign csr_req_if.wb = decode_if.wb;
|
||||
assign csr_req_if.is_io = 1'b0;
|
||||
|
||||
// MUL unit
|
||||
assign mul_req_if.valid = decode_if.valid & is_mul;
|
||||
assign mul_req_if.warp_num = decode_if.warp_num;
|
||||
assign mul_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign mul_req_if.mul_op = `MUL_OP(decode_if.instr_op);
|
||||
assign mul_req_if.rs1_data = gpr_data_if.rs1_data;
|
||||
assign mul_req_if.rs2_data = gpr_data_if.rs2_data;
|
||||
assign mul_req_if.rd = decode_if.rd;
|
||||
assign mul_req_if.wb = decode_if.wb;
|
||||
|
||||
// GPU unit
|
||||
assign gpu_req_if.valid = decode_if.valid & is_gpu;
|
||||
assign gpu_req_if.warp_num = decode_if.warp_num;
|
||||
assign gpu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign gpu_req_if.gpu_op = `GPU_OP(decode_if.instr_op);
|
||||
assign gpu_req_if.rs1_data = gpr_data_if.rs1_data;
|
||||
assign gpu_req_if.rs2_data = gpr_data_if.rs2_data[0];
|
||||
assign gpu_req_if.next_PC = decode_if.next_PC;
|
||||
|
||||
endmodule
|
|
@ -107,13 +107,15 @@ module VX_mul_unit #(
|
|||
wire stall = (~mul_commit_if.ready && (| mul_commit_if.valid))
|
||||
|| pipeline_stall;
|
||||
|
||||
wire flush = mul_commit_if.ready && pipeline_stall;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)),
|
||||
) mul_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.flush (flush),
|
||||
.in ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.rd, mul_req_if.wb, alu_result}),
|
||||
.out ({mul_commit_if.valid, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
|
||||
);
|
||||
|
|
|
@ -8,8 +8,12 @@ module VX_scheduler #(
|
|||
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
|
||||
VX_execute_if execute_if,
|
||||
input wire alu_busy,
|
||||
input wire lsu_busy,
|
||||
input wire csr_busy,
|
||||
input wire mul_busy,
|
||||
input wire gpu_busy,
|
||||
output wire schedule_delay,
|
||||
output wire is_empty
|
||||
);
|
||||
localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1);
|
||||
|
@ -28,13 +32,13 @@ module VX_scheduler #(
|
|||
wire rename_valid = (| decode_if.valid) && (rs1_rename_qual || rs2_rename_qual || rd_rename_qual);
|
||||
|
||||
wire ex_stalled = (| decode_if.valid)
|
||||
&& ((!execute_if.alu_ready && (decode_if.ex_type == `EX_ALU))
|
||||
|| (!execute_if.lsu_ready && (decode_if.ex_type == `EX_LSU))
|
||||
|| (!execute_if.csr_ready && (decode_if.ex_type == `EX_CSR))
|
||||
|| (!execute_if.mul_ready && (decode_if.ex_type == `EX_MUL))
|
||||
|| (!execute_if.gpu_ready && (decode_if.ex_type == `EX_GPU)));
|
||||
&& ((alu_busy && (decode_if.ex_type == `EX_ALU))
|
||||
|| (lsu_busy && (decode_if.ex_type == `EX_LSU))
|
||||
|| (csr_busy && (decode_if.ex_type == `EX_CSR))
|
||||
|| (mul_busy && (decode_if.ex_type == `EX_MUL))
|
||||
|| (gpu_busy && (decode_if.ex_type == `EX_GPU)));
|
||||
|
||||
wire stall = rename_valid || ex_stalled;
|
||||
wire stall = ex_stalled || rename_valid;
|
||||
|
||||
wire acquire_rd = (| decode_if.valid) && (decode_if.wb != 0) && ~stall;
|
||||
|
||||
|
@ -67,19 +71,18 @@ module VX_scheduler #(
|
|||
end
|
||||
end
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + `WB_BITS),
|
||||
) schedule_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.instr_op, decode_if.wb}),
|
||||
.out ({execute_if.valid, execute_if.warp_num, execute_if.curr_PC, execute_if.next_PC, execute_if.rd, execute_if.rs1, execute_if.rs2, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.ex_type, execute_if.instr_op, execute_if.wb})
|
||||
);
|
||||
|
||||
assign decode_if.ready = ~stall;
|
||||
|
||||
assign schedule_delay = stall;
|
||||
|
||||
assign is_empty = (0 == count_valid);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (stall) begin
|
||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, rename=%b%b%b, alu=%b, lsu=%b, csr=%b, mul=%b, gpu=%b", $time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, rd_rename_qual, rs1_rename_qual, rs2_rename_qual, alu_busy, lsu_busy, csr_busy, mul_busy, gpu_busy);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -1,69 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
|
||||
module VX_warp (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire stall,
|
||||
input wire remove,
|
||||
input wire[`NUM_THREADS-1:0] thread_mask,
|
||||
input wire change_mask,
|
||||
input wire jal,
|
||||
input wire[31:0] dest,
|
||||
input wire branch_taken,
|
||||
input wire[31:0] branch_dest,
|
||||
input wire wspawn,
|
||||
input wire[31:0] wspawn_pc,
|
||||
|
||||
output wire[31:0] PC,
|
||||
output wire[`NUM_THREADS-1:0] valid
|
||||
);
|
||||
|
||||
reg [`NUM_THREADS-1:0] valid_t;
|
||||
reg [31:0] real_PC;
|
||||
reg [31:0] temp_PC;
|
||||
reg [31:0] use_PC;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_t <= {{(`NUM_THREADS-1){1'b0}},1'b1}; // Thread 1 active
|
||||
end else if (remove) begin
|
||||
valid_t <= 0;
|
||||
end else if (change_mask) begin
|
||||
valid_t <= thread_mask;
|
||||
end
|
||||
end
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin : valid_assign
|
||||
assign valid[i] = change_mask ? thread_mask[i] : stall ? 1'b0 : valid_t[i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
always @(*) begin
|
||||
if (jal == 1'b1) begin
|
||||
temp_PC = dest;
|
||||
end else if (branch_taken) begin
|
||||
temp_PC = branch_dest;
|
||||
end else begin
|
||||
temp_PC = real_PC;
|
||||
end
|
||||
end
|
||||
|
||||
assign use_PC = temp_PC;
|
||||
assign PC = temp_PC;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
real_PC <= 0;
|
||||
end else if (wspawn) begin
|
||||
real_PC <= wspawn_pc;
|
||||
end else if (!stall) begin
|
||||
real_PC <= use_PC + 32'h4;
|
||||
end else begin
|
||||
real_PC <= use_PC;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -20,7 +20,7 @@ module VX_warp_sched #(
|
|||
wire update_visible_active;
|
||||
wire scheduled_warp;
|
||||
|
||||
wire [(1+32+`NUM_THREADS-1):0] d[`NUM_WARPS-1:0];
|
||||
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];
|
||||
|
||||
wire join_fall;
|
||||
wire [31:0] join_pc;
|
||||
|
@ -71,9 +71,8 @@ module VX_warp_sched #(
|
|||
|
||||
wire stall;
|
||||
|
||||
integer i;
|
||||
|
||||
always @(posedge clk) begin
|
||||
integer i;
|
||||
if (reset) begin
|
||||
for (i = 0; i < `NUM_BARRIERS; i++) begin
|
||||
barrier_stall_mask[i] <= 0;
|
||||
|
@ -99,9 +98,9 @@ module VX_warp_sched #(
|
|||
end else begin
|
||||
|
||||
if (warp_ctl_if.wspawn) begin
|
||||
warp_active <= warp_ctl_if.wspawn_new_active;
|
||||
use_wspawn_pc <= warp_ctl_if.wspawn_pc;
|
||||
use_wspawn <= warp_ctl_if.wspawn_new_active & (~`NUM_WARPS'b1);
|
||||
warp_active <= warp_ctl_if.wspawn_new_active;
|
||||
use_wspawn_pc <= warp_ctl_if.wspawn_pc;
|
||||
use_wspawn <= warp_ctl_if.wspawn_new_active & (~`NUM_WARPS'(1));
|
||||
end
|
||||
|
||||
if (warp_ctl_if.is_barrier) begin
|
||||
|
@ -205,13 +204,12 @@ module VX_warp_sched #(
|
|||
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[warp_ctl_if.warp_num]};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, warp_ctl_if.split_save_pc, warp_ctl_if.split_later_mask};
|
||||
|
||||
assign {join_fall, join_pc, join_tm} = d[join_if.warp_num];
|
||||
assign {join_fall, join_pc, join_tm} = ipdom[join_if.warp_num];
|
||||
|
||||
genvar j;
|
||||
|
||||
for (j = 0; j < `NUM_WARPS; j++) begin : stacks
|
||||
wire correct_warp_s = (j == warp_ctl_if.warp_num);
|
||||
wire correct_warp_j = (j == join_if.warp_num);
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin : stacks
|
||||
wire correct_warp_s = (i == warp_ctl_if.warp_num);
|
||||
wire correct_warp_j = (i == join_if.warp_num);
|
||||
|
||||
wire push = (warp_ctl_if.is_split && warp_ctl_if.do_split) && correct_warp_s;
|
||||
wire pop = join_if.is_join && correct_warp_j;
|
||||
|
@ -224,11 +222,11 @@ module VX_warp_sched #(
|
|||
.reset(reset),
|
||||
.push (push),
|
||||
.pop (pop),
|
||||
.d (d[i]),
|
||||
.d (ipdom[i]),
|
||||
.q1 (q1),
|
||||
.q2 (q2)
|
||||
);
|
||||
end
|
||||
end
|
||||
|
||||
wire should_bra = (branch_ctl_if.valid && branch_ctl_if.taken && (warp_to_schedule == branch_ctl_if.warp_num));
|
||||
|
||||
|
|
328
hw/rtl/cache/VX_cache.v
vendored
328
hw/rtl/cache/VX_cache.v
vendored
|
@ -249,187 +249,185 @@ module VX_cache #(
|
|||
|
||||
genvar i;
|
||||
|
||||
generate
|
||||
for (i = 0; i < NUM_BANKS; i++) begin
|
||||
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
|
||||
wire [NUM_REQUESTS-1:0] curr_bank_core_req_rw;
|
||||
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
|
||||
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
|
||||
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
|
||||
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data;
|
||||
for (i = 0; i < NUM_BANKS; i++) begin
|
||||
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
|
||||
wire [NUM_REQUESTS-1:0] curr_bank_core_req_rw;
|
||||
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
|
||||
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
|
||||
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
|
||||
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data;
|
||||
|
||||
wire curr_bank_core_rsp_valid;
|
||||
wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid;
|
||||
wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
|
||||
wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag;
|
||||
wire curr_bank_core_rsp_ready;
|
||||
wire curr_bank_core_rsp_valid;
|
||||
wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid;
|
||||
wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
|
||||
wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag;
|
||||
wire curr_bank_core_rsp_ready;
|
||||
|
||||
wire curr_bank_dram_fill_rsp_valid;
|
||||
wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_fill_rsp_data;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_rsp_addr;
|
||||
wire curr_bank_dram_fill_rsp_ready;
|
||||
wire curr_bank_dram_fill_rsp_valid;
|
||||
wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_fill_rsp_data;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_rsp_addr;
|
||||
wire curr_bank_dram_fill_rsp_ready;
|
||||
|
||||
wire curr_bank_dram_fill_req_valid;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_req_addr;
|
||||
wire curr_bank_dram_fill_req_ready;
|
||||
wire curr_bank_dram_fill_req_valid;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_req_addr;
|
||||
wire curr_bank_dram_fill_req_ready;
|
||||
|
||||
wire curr_bank_dram_wb_req_valid;
|
||||
wire [BANK_LINE_SIZE-1:0] curr_bank_dram_wb_req_byteen;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_wb_req_addr;
|
||||
wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_wb_req_data;
|
||||
wire curr_bank_dram_wb_req_ready;
|
||||
wire curr_bank_dram_wb_req_valid;
|
||||
wire [BANK_LINE_SIZE-1:0] curr_bank_dram_wb_req_byteen;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_wb_req_addr;
|
||||
wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_wb_req_data;
|
||||
wire curr_bank_dram_wb_req_ready;
|
||||
|
||||
wire curr_bank_snp_req_valid;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
|
||||
wire curr_bank_snp_req_invalidate;
|
||||
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
|
||||
wire curr_bank_snp_req_ready;
|
||||
wire curr_bank_snp_req_valid;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
|
||||
wire curr_bank_snp_req_invalidate;
|
||||
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
|
||||
wire curr_bank_snp_req_ready;
|
||||
|
||||
wire curr_bank_snp_rsp_valid;
|
||||
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
|
||||
wire curr_bank_snp_rsp_ready;
|
||||
wire curr_bank_snp_rsp_valid;
|
||||
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
|
||||
wire curr_bank_snp_rsp_ready;
|
||||
|
||||
wire curr_bank_core_req_ready;
|
||||
wire curr_bank_core_req_ready;
|
||||
|
||||
// Core Req
|
||||
assign curr_bank_core_req_valid = (per_bank_valid[i] & {NUM_REQUESTS{core_req_ready}});
|
||||
assign curr_bank_core_req_addr = core_req_addr;
|
||||
assign curr_bank_core_req_rw = core_req_rw;
|
||||
assign curr_bank_core_req_byteen = core_req_byteen;
|
||||
assign curr_bank_core_req_data = core_req_data;
|
||||
assign curr_bank_core_req_tag = core_req_tag;
|
||||
assign per_bank_core_req_ready[i] = curr_bank_core_req_ready;
|
||||
// Core Req
|
||||
assign curr_bank_core_req_valid = (per_bank_valid[i] & {NUM_REQUESTS{core_req_ready}});
|
||||
assign curr_bank_core_req_addr = core_req_addr;
|
||||
assign curr_bank_core_req_rw = core_req_rw;
|
||||
assign curr_bank_core_req_byteen = core_req_byteen;
|
||||
assign curr_bank_core_req_data = core_req_data;
|
||||
assign curr_bank_core_req_tag = core_req_tag;
|
||||
assign per_bank_core_req_ready[i] = curr_bank_core_req_ready;
|
||||
|
||||
// Core WB
|
||||
assign curr_bank_core_rsp_ready = per_bank_core_rsp_ready[i];
|
||||
assign per_bank_core_rsp_valid [i] = curr_bank_core_rsp_valid;
|
||||
assign per_bank_core_rsp_tid [i] = curr_bank_core_rsp_tid;
|
||||
assign per_bank_core_rsp_tag [i] = curr_bank_core_rsp_tag;
|
||||
assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data;
|
||||
// Core WB
|
||||
assign curr_bank_core_rsp_ready = per_bank_core_rsp_ready[i];
|
||||
assign per_bank_core_rsp_valid [i] = curr_bank_core_rsp_valid;
|
||||
assign per_bank_core_rsp_tid [i] = curr_bank_core_rsp_tid;
|
||||
assign per_bank_core_rsp_tag [i] = curr_bank_core_rsp_tag;
|
||||
assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data;
|
||||
|
||||
// Dram fill request
|
||||
assign per_bank_dram_fill_req_valid[i] = curr_bank_dram_fill_req_valid;
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign per_bank_dram_fill_req_addr[i] = curr_bank_dram_fill_req_addr;
|
||||
end else begin
|
||||
assign per_bank_dram_fill_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_fill_req_addr, i);
|
||||
end
|
||||
assign curr_bank_dram_fill_req_ready = dram_fill_req_ready;
|
||||
// Dram fill request
|
||||
assign per_bank_dram_fill_req_valid[i] = curr_bank_dram_fill_req_valid;
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign per_bank_dram_fill_req_addr[i] = curr_bank_dram_fill_req_addr;
|
||||
end else begin
|
||||
assign per_bank_dram_fill_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_fill_req_addr, i);
|
||||
end
|
||||
assign curr_bank_dram_fill_req_ready = dram_fill_req_ready;
|
||||
|
||||
// Dram fill response
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid;
|
||||
assign curr_bank_dram_fill_rsp_addr = dram_rsp_tag;
|
||||
end else begin
|
||||
assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i);
|
||||
assign curr_bank_dram_fill_rsp_addr = `DRAM_TO_LINE_ADDR(dram_rsp_tag);
|
||||
end
|
||||
assign curr_bank_dram_fill_rsp_data = dram_rsp_data;
|
||||
assign per_bank_dram_fill_rsp_ready[i] = curr_bank_dram_fill_rsp_ready;
|
||||
|
||||
// Dram writeback request
|
||||
assign per_bank_dram_wb_req_valid[i] = curr_bank_dram_wb_req_valid;
|
||||
assign per_bank_dram_wb_req_byteen[i] = curr_bank_dram_wb_req_byteen;
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign per_bank_dram_wb_req_addr[i] = curr_bank_dram_wb_req_addr;
|
||||
end else begin
|
||||
assign per_bank_dram_wb_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_wb_req_addr, i);
|
||||
end
|
||||
assign per_bank_dram_wb_req_data[i] = curr_bank_dram_wb_req_data;
|
||||
assign curr_bank_dram_wb_req_ready = per_bank_dram_wb_req_ready[i];
|
||||
|
||||
// Snoop request
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_snp_req_valid = snp_req_valid_qual;
|
||||
assign curr_bank_snp_req_addr = snp_req_addr_qual;
|
||||
end else begin
|
||||
assign curr_bank_snp_req_valid = snp_req_valid_qual && (`DRAM_ADDR_BANK(snp_req_addr_qual) == i);
|
||||
assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr_qual);
|
||||
end
|
||||
assign curr_bank_snp_req_invalidate = snp_req_invalidate_qual;
|
||||
assign curr_bank_snp_req_tag = snp_req_tag_qual;
|
||||
assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready;
|
||||
|
||||
// Snoop response
|
||||
assign per_bank_snp_rsp_valid[i] = curr_bank_snp_rsp_valid;
|
||||
assign per_bank_snp_rsp_tag[i] = curr_bank_snp_rsp_tag;
|
||||
assign curr_bank_snp_rsp_ready = per_bank_snp_rsp_ready[i];
|
||||
|
||||
VX_bank #(
|
||||
.BANK_ID (i),
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.CREQ_SIZE (CREQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.DRAM_ENABLE (DRAM_ENABLE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.SNOOP_FORWARDING (SNOOP_FORWARDING),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
|
||||
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
|
||||
) bank (
|
||||
`SCOPE_SIGNALS_CACHE_BANK_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
// Core request
|
||||
.core_req_valid (curr_bank_core_req_valid),
|
||||
.core_req_rw (curr_bank_core_req_rw),
|
||||
.core_req_byteen (curr_bank_core_req_byteen),
|
||||
.core_req_addr (curr_bank_core_req_addr),
|
||||
.core_req_data (curr_bank_core_req_data),
|
||||
.core_req_tag (curr_bank_core_req_tag),
|
||||
.core_req_ready (curr_bank_core_req_ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (curr_bank_core_rsp_valid),
|
||||
.core_rsp_tid (curr_bank_core_rsp_tid),
|
||||
.core_rsp_data (curr_bank_core_rsp_data),
|
||||
.core_rsp_tag (curr_bank_core_rsp_tag),
|
||||
.core_rsp_ready (curr_bank_core_rsp_ready),
|
||||
|
||||
// Dram fill request
|
||||
.dram_fill_req_valid (curr_bank_dram_fill_req_valid),
|
||||
.dram_fill_req_addr (curr_bank_dram_fill_req_addr),
|
||||
.dram_fill_req_ready (curr_bank_dram_fill_req_ready),
|
||||
|
||||
// Dram fill response
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid;
|
||||
assign curr_bank_dram_fill_rsp_addr = dram_rsp_tag;
|
||||
end else begin
|
||||
assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i);
|
||||
assign curr_bank_dram_fill_rsp_addr = `DRAM_TO_LINE_ADDR(dram_rsp_tag);
|
||||
end
|
||||
assign curr_bank_dram_fill_rsp_data = dram_rsp_data;
|
||||
assign per_bank_dram_fill_rsp_ready[i] = curr_bank_dram_fill_rsp_ready;
|
||||
.dram_fill_rsp_valid (curr_bank_dram_fill_rsp_valid),
|
||||
.dram_fill_rsp_data (curr_bank_dram_fill_rsp_data),
|
||||
.dram_fill_rsp_addr (curr_bank_dram_fill_rsp_addr),
|
||||
.dram_fill_rsp_ready (curr_bank_dram_fill_rsp_ready),
|
||||
|
||||
// Dram writeback request
|
||||
assign per_bank_dram_wb_req_valid[i] = curr_bank_dram_wb_req_valid;
|
||||
assign per_bank_dram_wb_req_byteen[i] = curr_bank_dram_wb_req_byteen;
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign per_bank_dram_wb_req_addr[i] = curr_bank_dram_wb_req_addr;
|
||||
end else begin
|
||||
assign per_bank_dram_wb_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_wb_req_addr, i);
|
||||
end
|
||||
assign per_bank_dram_wb_req_data[i] = curr_bank_dram_wb_req_data;
|
||||
assign curr_bank_dram_wb_req_ready = per_bank_dram_wb_req_ready[i];
|
||||
// Dram writeback request
|
||||
.dram_wb_req_valid (curr_bank_dram_wb_req_valid),
|
||||
.dram_wb_req_byteen (curr_bank_dram_wb_req_byteen),
|
||||
.dram_wb_req_addr (curr_bank_dram_wb_req_addr),
|
||||
.dram_wb_req_data (curr_bank_dram_wb_req_data),
|
||||
.dram_wb_req_ready (curr_bank_dram_wb_req_ready),
|
||||
|
||||
// Snoop request
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_snp_req_valid = snp_req_valid_qual;
|
||||
assign curr_bank_snp_req_addr = snp_req_addr_qual;
|
||||
end else begin
|
||||
assign curr_bank_snp_req_valid = snp_req_valid_qual && (`DRAM_ADDR_BANK(snp_req_addr_qual) == i);
|
||||
assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr_qual);
|
||||
end
|
||||
assign curr_bank_snp_req_invalidate = snp_req_invalidate_qual;
|
||||
assign curr_bank_snp_req_tag = snp_req_tag_qual;
|
||||
assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready;
|
||||
.snp_req_valid (curr_bank_snp_req_valid),
|
||||
.snp_req_addr (curr_bank_snp_req_addr),
|
||||
.snp_req_invalidate (curr_bank_snp_req_invalidate),
|
||||
.snp_req_tag (curr_bank_snp_req_tag),
|
||||
.snp_req_ready (curr_bank_snp_req_ready),
|
||||
|
||||
// Snoop response
|
||||
assign per_bank_snp_rsp_valid[i] = curr_bank_snp_rsp_valid;
|
||||
assign per_bank_snp_rsp_tag[i] = curr_bank_snp_rsp_tag;
|
||||
assign curr_bank_snp_rsp_ready = per_bank_snp_rsp_ready[i];
|
||||
|
||||
VX_bank #(
|
||||
.BANK_ID (i),
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.STAGE_1_CYCLES (STAGE_1_CYCLES),
|
||||
.CREQ_SIZE (CREQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.DRAM_ENABLE (DRAM_ENABLE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.SNOOP_FORWARDING (SNOOP_FORWARDING),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
|
||||
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
|
||||
) bank (
|
||||
`SCOPE_SIGNALS_CACHE_BANK_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
// Core request
|
||||
.core_req_valid (curr_bank_core_req_valid),
|
||||
.core_req_rw (curr_bank_core_req_rw),
|
||||
.core_req_byteen (curr_bank_core_req_byteen),
|
||||
.core_req_addr (curr_bank_core_req_addr),
|
||||
.core_req_data (curr_bank_core_req_data),
|
||||
.core_req_tag (curr_bank_core_req_tag),
|
||||
.core_req_ready (curr_bank_core_req_ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (curr_bank_core_rsp_valid),
|
||||
.core_rsp_tid (curr_bank_core_rsp_tid),
|
||||
.core_rsp_data (curr_bank_core_rsp_data),
|
||||
.core_rsp_tag (curr_bank_core_rsp_tag),
|
||||
.core_rsp_ready (curr_bank_core_rsp_ready),
|
||||
|
||||
// Dram fill request
|
||||
.dram_fill_req_valid (curr_bank_dram_fill_req_valid),
|
||||
.dram_fill_req_addr (curr_bank_dram_fill_req_addr),
|
||||
.dram_fill_req_ready (curr_bank_dram_fill_req_ready),
|
||||
|
||||
// Dram fill response
|
||||
.dram_fill_rsp_valid (curr_bank_dram_fill_rsp_valid),
|
||||
.dram_fill_rsp_data (curr_bank_dram_fill_rsp_data),
|
||||
.dram_fill_rsp_addr (curr_bank_dram_fill_rsp_addr),
|
||||
.dram_fill_rsp_ready (curr_bank_dram_fill_rsp_ready),
|
||||
|
||||
// Dram writeback request
|
||||
.dram_wb_req_valid (curr_bank_dram_wb_req_valid),
|
||||
.dram_wb_req_byteen (curr_bank_dram_wb_req_byteen),
|
||||
.dram_wb_req_addr (curr_bank_dram_wb_req_addr),
|
||||
.dram_wb_req_data (curr_bank_dram_wb_req_data),
|
||||
.dram_wb_req_ready (curr_bank_dram_wb_req_ready),
|
||||
|
||||
// Snoop request
|
||||
.snp_req_valid (curr_bank_snp_req_valid),
|
||||
.snp_req_addr (curr_bank_snp_req_addr),
|
||||
.snp_req_invalidate (curr_bank_snp_req_invalidate),
|
||||
.snp_req_tag (curr_bank_snp_req_tag),
|
||||
.snp_req_ready (curr_bank_snp_req_ready),
|
||||
|
||||
// Snoop response
|
||||
.snp_rsp_valid (curr_bank_snp_rsp_valid),
|
||||
.snp_rsp_tag (curr_bank_snp_rsp_tag),
|
||||
.snp_rsp_ready (curr_bank_snp_rsp_ready)
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
// Snoop response
|
||||
.snp_rsp_valid (curr_bank_snp_rsp_valid),
|
||||
.snp_rsp_tag (curr_bank_snp_rsp_tag),
|
||||
.snp_rsp_ready (curr_bank_snp_rsp_ready)
|
||||
);
|
||||
end
|
||||
|
||||
VX_cache_dram_req_arb #(
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
|
|
12
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
12
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
|
@ -78,13 +78,11 @@ module VX_cache_miss_resrv #(
|
|||
reg [MRVQ_SIZE-1:0] make_ready_push;
|
||||
reg [MRVQ_SIZE-1:0] valid_address_match;
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < MRVQ_SIZE; i++) begin
|
||||
assign valid_address_match[i] = valid_table[i] ? (addr_table[i] == fill_addr_st1) : 0;
|
||||
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
|
||||
end
|
||||
endgenerate
|
||||
genvar i;
|
||||
for (i = 0; i < MRVQ_SIZE; i++) begin
|
||||
assign valid_address_match[i] = valid_table[i] ? (addr_table[i] == fill_addr_st1) : 0;
|
||||
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
|
||||
end
|
||||
|
||||
assign pending_hazard = |(valid_address_match);
|
||||
|
||||
|
|
|
@ -11,14 +11,14 @@ interface VX_alu_req_if ();
|
|||
|
||||
wire [`ALU_BITS-1:0] alu_op;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
|
||||
wire [31:0] offset;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
wire ready;
|
||||
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
`ifndef VX_EXECUTE_IF
|
||||
`define VX_EXECUTE_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_execute_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire [`EX_BITS-1:0] ex_type;
|
||||
wire [`OP_BITS-1:0] instr_op;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
wire [31:0] imm;
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
|
||||
wire alu_ready;
|
||||
wire mul_ready;
|
||||
wire lsu_ready;
|
||||
wire csr_ready;
|
||||
wire gpu_ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
13
hw/rtl/interfaces/VX_gpr_data_if.v
Normal file
13
hw/rtl/interfaces/VX_gpr_data_if.v
Normal file
|
@ -0,0 +1,13 @@
|
|||
`ifndef VX_GPR_DATA_IF
|
||||
`define VX_GPR_DATA_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_gpr_data_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -5,9 +5,9 @@
|
|||
|
||||
interface VX_ifetch_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [31:0] curr_PC;
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
|
||||
interface VX_ifetch_rsp_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [31:0] curr_PC;
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire [31:0] instr;
|
||||
wire ready;
|
||||
|
||||
|
|
|
@ -6,15 +6,19 @@
|
|||
interface VX_lsu_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [31:0] curr_PC;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NUM_THREADS-1:0][31:0] store_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] base_addr;
|
||||
wire [31:0] offset;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire rw;
|
||||
wire [`BYTEEN_BITS-1:0] byteen;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] store_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] base_addr;
|
||||
wire [31:0] offset;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -5,19 +5,19 @@
|
|||
|
||||
interface VX_mul_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`MUL_BITS-1:0] mul_op;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
|
||||
wire [`MUL_BITS-1:0] mul_op;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -6,9 +6,9 @@
|
|||
interface VX_wb_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -20,12 +20,10 @@ module VX_generic_register #(
|
|||
reg [(N-1):0] value;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
if (reset || flush) begin
|
||||
value <= N'(0);
|
||||
end else if (~stall) begin
|
||||
value <= in;
|
||||
end else if (flush) begin
|
||||
value <= N'(0);
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
module VX_generic_stack #(
|
||||
parameter WIDTH = 40,
|
||||
parameter DEPTH = 2
|
||||
parameter WIDTH = 1,
|
||||
parameter DEPTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
|
|
|
@ -36,7 +36,7 @@ public:
|
|||
void clear() {
|
||||
for (uint32_t i = 0; i < (1 << 12); i++) {
|
||||
if (mem_[i]) {
|
||||
delete mem_[i];
|
||||
delete [] mem_[i];
|
||||
mem_[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue