RTL code refactoring

This commit is contained in:
Blaise Tine 2020-04-19 09:24:04 -04:00
parent 3139d37610
commit e9dfa828fe
33 changed files with 998 additions and 1022 deletions

View file

@ -86,7 +86,7 @@ reg[31:0] io_data;
initial begin
// $fdumpfile("vortex1.vcd");
load_file("../../runtime/tests/simple/vx_simple_main.hex");
load_file("../../runtime/tests/simple/simple_main_if.hex");
$dumpvars(0, vortex_tb);
reset = 1;
clk = 0;

View file

@ -1,130 +1,127 @@
`include "VX_define.vh"
module VX_back_end
#(
parameter CORE_ID = 0
)
(
module VX_back_end #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire schedule_delay,
VX_gpu_dcache_rsp_if vx_dcache_rsp,
VX_gpu_dcache_req_if vx_dcache_req,
VX_gpu_dcache_rsp_if dcache_rsp_if,
VX_gpu_dcache_req_if dcache_req_if,
output wire out_mem_delay,
output wire out_exec_delay,
output wire gpr_stage_delay,
VX_jal_response_if vx_jal_rsp,
VX_branch_response_if vx_branch_rsp,
output wire out_mem_delay,
output wire out_exec_delay,
output wire gpr_stage_delay,
VX_jal_response_if jal_rsp_if,
VX_branch_response_if branch_rsp_if,
VX_frE_to_bckE_req_if vx_bckE_req,
VX_wb_if vx_writeback_if,
VX_frE_to_bckE_req_if bckE_req_if,
VX_wb_if writeback_if,
VX_warp_ctl_if vx_warp_ctl
VX_warp_ctl_if warp_ctl_if
);
VX_wb_if writeback_temp_if();
assign writeback_if.wb = writeback_temp_if.wb;
assign writeback_if.rd = writeback_temp_if.rd;
assign writeback_if.write_data = writeback_temp_if.write_data;
assign writeback_if.wb_valid = writeback_temp_if.wb_valid;
assign writeback_if.wb_warp_num = writeback_temp_if.wb_warp_num;
assign writeback_if.wb_pc = writeback_temp_if.wb_pc;
VX_wb_if vx_writeback_temp();
assign vx_writeback_if.wb = vx_writeback_temp.wb;
assign vx_writeback_if.rd = vx_writeback_temp.rd;
assign vx_writeback_if.write_data = vx_writeback_temp.write_data;
assign vx_writeback_if.wb_valid = vx_writeback_temp.wb_valid;
assign vx_writeback_if.wb_warp_num = vx_writeback_temp.wb_warp_num;
assign vx_writeback_if.wb_pc = vx_writeback_temp.wb_pc;
// assign VX_writeback_if(vx_writeback_temp);
// assign VX_writeback_if(writeback_temp_if);
wire no_slot_mem;
wire no_slot_exec;
// LSU input + output
VX_lsu_req_if vx_lsu_req();
VX_inst_mem_wb_if vx_mem_wb();
VX_lsu_req_if lsu_req_if();
VX_inst_mem_wb_if mem_wb_if();
// Exec unit input + output
VX_exec_unit_req_if vx_exec_unit_req();
VX_inst_exec_wb_if vx_inst_exec_wb();
VX_exec_unit_req_if exec_unit_req_if();
VX_inst_exec_wb_if inst_exec_wb_if();
// GPU unit input
VX_gpu_inst_req_if vx_gpu_inst_req();
VX_gpu_inst_req_if gpu_inst_req_if();
// CSR unit inputs
VX_csr_req_if vx_csr_req();
VX_csr_wb_if vx_csr_wb();
VX_csr_req_if csr_req_if();
VX_csr_wb_if csr_wb_if();
wire no_slot_csr;
wire stall_gpr_csr;
VX_gpr_stage vx_gpr_stage(
VX_gpr_stage gpr_stage (
.clk (clk),
.reset (reset),
.schedule_delay (schedule_delay),
.vx_writeback_if(vx_writeback_temp),
.vx_bckE_req (vx_bckE_req),
.writeback_if (writeback_temp_if),
.bckE_req_if (bckE_req_if),
// New
.vx_exec_unit_req(vx_exec_unit_req),
.vx_lsu_req (vx_lsu_req),
.vx_gpu_inst_req (vx_gpu_inst_req),
.vx_csr_req (vx_csr_req),
.exec_unit_req_if(exec_unit_req_if),
.lsu_req_if (lsu_req_if),
.gpu_inst_req_if (gpu_inst_req_if),
.csr_req_if (csr_req_if),
.stall_gpr_csr (stall_gpr_csr),
// End new
.memory_delay (out_mem_delay),
.exec_delay (out_exec_delay),
.gpr_stage_delay (gpr_stage_delay)
);
.memory_delay (out_mem_delay),
.exec_delay (out_exec_delay),
.gpr_stage_delay (gpr_stage_delay)
);
VX_lsu load_store_unit (
.clk (clk),
.reset (reset),
.vx_lsu_req (vx_lsu_req),
.vx_mem_wb (vx_mem_wb),
.vx_dcache_rsp(vx_dcache_rsp),
.vx_dcache_req(vx_dcache_req),
.lsu_req_if (lsu_req_if),
.mem_wb_if (mem_wb_if),
.dcache_rsp_if(dcache_rsp_if),
.dcache_req_if(dcache_req_if),
.out_delay (out_mem_delay),
.no_slot_mem (no_slot_mem)
);
VX_execute_unit vx_execUnit (
VX_execute_unit execUnit (
.clk (clk),
.reset (reset),
.vx_exec_unit_req(vx_exec_unit_req),
.vx_inst_exec_wb (vx_inst_exec_wb),
.vx_jal_rsp (vx_jal_rsp),
.vx_branch_rsp (vx_branch_rsp),
.exec_unit_req_if(exec_unit_req_if),
.inst_exec_wb_if (inst_exec_wb_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.out_delay (out_exec_delay),
.no_slot_exec (no_slot_exec)
);
VX_gpgpu_inst vx_gpgpu_inst (
.vx_gpu_inst_req(vx_gpu_inst_req),
.vx_warp_ctl (vx_warp_ctl)
VX_gpgpu_inst gpgpu_inst (
.gpu_inst_req_if(gpu_inst_req_if),
.warp_ctl_if (warp_ctl_if)
);
// VX_csr_wrapper vx_csr_wrapper(
// .vx_csr_req(vx_csr_req),
// .vx_csr_wb (vx_csr_wb)
// );
// VX_csr_wrapper csr_wrapper(
// .csr_req_if(csr_req_if),
// .csr_wb_if (csr_wb_if)
// );
VX_csr_pipe #(
.CORE_ID(CORE_ID)
) vx_csr_pipe (
) csr_pipe (
.clk (clk),
.reset (reset),
.no_slot_csr (no_slot_csr),
.vx_csr_req (vx_csr_req),
.vx_writeback(vx_writeback_temp),
.vx_csr_wb (vx_csr_wb),
.csr_req_if (csr_req_if),
.writeback_if(writeback_temp_if),
.csr_wb_if (csr_wb_if),
.stall_gpr_csr(stall_gpr_csr)
);
VX_writeback vx_wb (
VX_writeback wb (
.clk (clk),
.reset (reset),
.vx_mem_wb (vx_mem_wb),
.vx_inst_exec_wb (vx_inst_exec_wb),
.vx_csr_wb (vx_csr_wb),
.mem_wb_if (mem_wb_if),
.inst_exec_wb_if (inst_exec_wb_if),
.csr_wb_if (csr_wb_if),
.vx_writeback_if(vx_writeback_temp),
.writeback_if (writeback_temp_if),
.no_slot_mem (no_slot_mem),
.no_slot_exec (no_slot_exec),
.no_slot_csr (no_slot_csr)

View file

@ -1,7 +1,7 @@
module VX_csr_handler (
input wire clk,
input wire[`CSR_ADDR_SIZE-1:0] in_decode_csr_address, // done
VX_csr_write_request_if vx_csr_w_req,
VX_csr_write_request_if csr_w_req_if,
input wire in_wb_valid,
output wire[31:0] out_decode_csr_data // done
);
@ -9,9 +9,9 @@ module VX_csr_handler (
wire[`CSR_ADDR_SIZE-1:0] in_mem_csr_address;
wire[31:0] in_mem_csr_result;
assign in_mem_is_csr = vx_csr_w_req.is_csr;
assign in_mem_csr_address = vx_csr_w_req.csr_address;
assign in_mem_csr_result = vx_csr_w_req.csr_result;
assign in_mem_is_csr = csr_w_req_if.is_csr;
assign in_mem_csr_address = csr_w_req_if.csr_address;
assign in_mem_csr_result = csr_w_req_if.csr_result;
reg [`CSR_WIDTH-1:0] csr [`NUM_CSRS-1:0];

View file

@ -3,13 +3,13 @@
module VX_csr_pipe #(
parameter CORE_ID = 0
) (
input wire clk, // Clock
input wire reset,
input wire no_slot_csr,
VX_csr_req_if vx_csr_req,
VX_wb_if vx_writeback,
VX_csr_wb_if vx_csr_wb,
output wire stall_gpr_csr
input wire clk,
input wire reset,
input wire no_slot_csr,
VX_csr_req_if csr_req_if,
VX_wb_if writeback_if,
VX_csr_wb_if csr_wb_if,
output wire stall_gpr_csr
);
wire[`NUM_THREADS-1:0] valid_s2;
@ -24,16 +24,16 @@ module VX_csr_pipe #(
wire[31:0] csr_read_data_unqual;
wire[31:0] csr_read_data;
assign stall_gpr_csr = no_slot_csr && vx_csr_req.is_csr && |(vx_csr_req.valid);
assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && |(csr_req_if.valid);
assign csr_read_data = (csr_address_s2 == vx_csr_req.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
assign csr_read_data = (csr_address_s2 == csr_req_if.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
wire writeback = |vx_writeback.wb_valid;
wire writeback = |writeback_if.wb_valid;
VX_csr_data vx_csr_data(
VX_csr_data csr_data(
.clk (clk),
.reset (reset),
.in_read_csr_address (vx_csr_req.csr_address),
.in_read_csr_address (csr_req_if.csr_address),
.in_write_valid (is_csr_s2),
.in_write_csr_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
.in_write_csr_address(csr_address_s2),
@ -44,10 +44,10 @@ module VX_csr_pipe #(
reg [31:0] csr_updated_data;
always @(*) begin
case (vx_csr_req.alu_op)
`CSR_ALU_RW: csr_updated_data = vx_csr_req.csr_mask;
`CSR_ALU_RS: csr_updated_data = csr_read_data | vx_csr_req.csr_mask;
`CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - vx_csr_req.csr_mask);
case (csr_req_if.alu_op)
`CSR_ALU_RW: csr_updated_data = csr_req_if.csr_mask;
`CSR_ALU_RS: csr_updated_data = csr_read_data | csr_req_if.csr_mask;
`CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - csr_req_if.csr_mask);
default: csr_updated_data = 32'hdeadbeef;
endcase
end
@ -61,7 +61,7 @@ module VX_csr_pipe #(
.reset(reset),
.stall(no_slot_csr),
.flush(zero),
.in ({vx_csr_req.valid, vx_csr_req.warp_num, vx_csr_req.rd, vx_csr_req.wb, vx_csr_req.is_csr, vx_csr_req.csr_address, csr_read_data , csr_updated_data }),
.in ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_csr, csr_req_if.csr_address, csr_read_data , csr_updated_data }),
.out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2})
);
@ -97,10 +97,10 @@ module VX_csr_pipe #(
warp_id_select ? warp_idz :
csr_vec_read_data_s2;
assign vx_csr_wb.valid = valid_s2;
assign vx_csr_wb.warp_num = warp_num_s2;
assign vx_csr_wb.rd = rd_s2;
assign vx_csr_wb.wb = wb_s2;
assign vx_csr_wb.csr_result = final_csr_data;
assign csr_wb_if.valid = valid_s2;
assign csr_wb_if.warp_num = warp_num_s2;
assign csr_wb_if.rd = rd_s2;
assign csr_wb_if.wb = wb_s2;
assign csr_wb_if.csr_result = final_csr_data;
endmodule

View file

@ -2,8 +2,8 @@
`include "VX_define.vh"
module VX_csr_wrapper (
VX_csr_req_if vx_csr_req,
VX_csr_wb_if vx_csr_wb
VX_csr_req_if csr_req_if,
VX_csr_wb_if csr_wb_if
);
@ -17,21 +17,21 @@ module VX_csr_wrapper (
end
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init
assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, vx_csr_req.warp_num};
assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num};
end
endgenerate
assign vx_csr_wb.valid = vx_csr_req.valid;
assign vx_csr_wb.warp_num = vx_csr_req.warp_num;
assign vx_csr_wb.rd = vx_csr_req.rd;
assign vx_csr_wb.wb = vx_csr_req.wb;
assign csr_wb_if.valid = csr_req_if.valid;
assign csr_wb_if.warp_num = csr_req_if.warp_num;
assign csr_wb_if.rd = csr_req_if.rd;
assign csr_wb_if.wb = csr_req_if.wb;
wire thread_select = vx_csr_req.csr_address == 12'h20;
wire warp_select = vx_csr_req.csr_address == 12'h21;
wire thread_select = csr_req_if.csr_address == 12'h20;
wire warp_select = csr_req_if.csr_address == 12'h21;
assign vx_csr_wb.csr_result = thread_select ? thread_ids :
assign csr_wb_if.csr_result = thread_select ? thread_ids :
warp_select ? warp_ids :
0;

View file

@ -3,22 +3,22 @@
module VX_decode(
// Fetch Inputs
VX_inst_meta_if fd_inst_meta_de,
VX_inst_meta_if fd_inst_meta_de,
// Outputs
VX_frE_to_bckE_req_if vx_frE_to_bckE_req,
VX_wstall_if vx_wstall,
VX_join_if vx_join,
VX_frE_to_bckE_req_if frE_to_bckE_req_if,
VX_wstall_if wstall_if,
VX_join_if join_if,
output wire terminate_sim
output wire terminate_sim
);
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
assign vx_frE_to_bckE_req.curr_PC = in_curr_PC;
assign frE_to_bckE_req_if.curr_PC = in_curr_PC;
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
@ -84,20 +84,20 @@ module VX_decode(
reg[2:0] temp_branch_type;
reg temp_branch_stall;
assign vx_frE_to_bckE_req.valid = fd_inst_meta_de.valid;
assign frE_to_bckE_req_if.valid = fd_inst_meta_de.valid;
assign vx_frE_to_bckE_req.warp_num = in_warp_num;
assign frE_to_bckE_req_if.warp_num = in_warp_num;
assign curr_opcode = in_instruction[6:0];
assign vx_frE_to_bckE_req.rd = in_instruction[11:7];
assign vx_frE_to_bckE_req.rs1 = in_instruction[19:15];
assign vx_frE_to_bckE_req.rs2 = in_instruction[24:20];
assign frE_to_bckE_req_if.rd = in_instruction[11:7];
assign frE_to_bckE_req_if.rs1 = in_instruction[19:15];
assign frE_to_bckE_req_if.rs2 = in_instruction[24:20];
assign func3 = in_instruction[14:12];
assign func7 = in_instruction[31:25];
assign u_12 = in_instruction[31:20];
assign vx_frE_to_bckE_req.PC_next = in_curr_PC + 32'h4;
assign frE_to_bckE_req_if.PC_next = in_curr_PC + 32'h4;
// Write Back sigal
assign is_rtype = (curr_opcode == `R_INST);
@ -123,43 +123,43 @@ module VX_decode(
assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE
assign vx_join.is_join = is_join;
assign vx_join.join_warp_num = in_warp_num;
assign join_if.is_join = is_join;
assign join_if.join_warp_num = in_warp_num;
assign vx_frE_to_bckE_req.is_wspawn = is_wspawn;
assign vx_frE_to_bckE_req.is_tmc = is_tmc;
assign vx_frE_to_bckE_req.is_split = is_split;
assign vx_frE_to_bckE_req.is_barrier = is_barrier;
assign frE_to_bckE_req_if.is_wspawn = is_wspawn;
assign frE_to_bckE_req_if.is_tmc = is_tmc;
assign frE_to_bckE_req_if.is_split = is_split;
assign frE_to_bckE_req_if.is_barrier = is_barrier;
assign vx_frE_to_bckE_req.csr_immed = is_csr_immed;
assign vx_frE_to_bckE_req.is_csr = is_csr;
assign frE_to_bckE_req_if.csr_immed = is_csr_immed;
assign frE_to_bckE_req_if.is_csr = is_csr;
assign vx_frE_to_bckE_req.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
is_linst ? `WB_MEM :
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
`NO_WB;
assign vx_frE_to_bckE_req.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
// MEM signals
assign vx_frE_to_bckE_req.mem_read = (is_linst) ? func3 : `NO_MEM_READ;
assign vx_frE_to_bckE_req.mem_write = (is_stype) ? func3 : `NO_MEM_WRITE;
assign frE_to_bckE_req_if.mem_read = (is_linst) ? func3 : `NO_MEM_READ;
assign frE_to_bckE_req_if.mem_write = (is_stype) ? func3 : `NO_MEM_WRITE;
// UPPER IMMEDIATE
always @(*) begin
case(curr_opcode)
`LUI_INST: temp_upper_immed = {func7, vx_frE_to_bckE_req.rs2, vx_frE_to_bckE_req.rs1, func3};
`AUIPC_INST: temp_upper_immed = {func7, vx_frE_to_bckE_req.rs2, vx_frE_to_bckE_req.rs1, func3};
`LUI_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
`AUIPC_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
default: temp_upper_immed = 20'h0;
endcase // curr_opcode
end
assign vx_frE_to_bckE_req.upper_immed = temp_upper_immed;
assign frE_to_bckE_req_if.upper_immed = temp_upper_immed;
assign jal_b_19_to_12 = in_instruction[19:12];
@ -171,7 +171,7 @@ module VX_decode(
assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset};
assign jalr_immed = {func7, vx_frE_to_bckE_req.rs2};
assign jalr_immed = {func7, frE_to_bckE_req_if.rs2};
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
@ -208,16 +208,16 @@ module VX_decode(
endcase
end
assign vx_frE_to_bckE_req.jalQual = is_jal;
assign vx_frE_to_bckE_req.jal = temp_jal;
assign vx_frE_to_bckE_req.jal_offset = temp_jal_offset;
assign frE_to_bckE_req_if.jalQual = is_jal;
assign frE_to_bckE_req_if.jal = temp_jal;
assign frE_to_bckE_req_if.jal_offset = temp_jal_offset;
// wire is_ebreak;
// assign is_ebreak = is_e_inst;
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
assign vx_frE_to_bckE_req.ebreak = ebreak;
assign frE_to_bckE_req_if.ebreak = ebreak;
assign terminate_sim = is_e_inst;
@ -226,26 +226,26 @@ module VX_decode(
assign csr_cond1 = func3 != 3'h0;
assign csr_cond2 = u_12 >= 12'h2;
assign vx_frE_to_bckE_req.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55;
assign frE_to_bckE_req_if.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55;
// ITYPE IMEED
assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
assign alu_shift_i_immed = {{7{1'b0}}, vx_frE_to_bckE_req.rs2};
assign alu_shift_i_immed = {{7{1'b0}}, frE_to_bckE_req_if.rs2};
assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12;
always @(*) begin
case(curr_opcode)
`ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
`S_INST: temp_itype_immed = {{20{func7[6]}}, func7, vx_frE_to_bckE_req.rd};
`S_INST: temp_itype_immed = {{20{func7[6]}}, func7, frE_to_bckE_req_if.rd};
`L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12};
`B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]};
default: temp_itype_immed = 32'hdeadbeef;
endcase
end
assign vx_frE_to_bckE_req.itype_immed = temp_itype_immed;
assign frE_to_bckE_req_if.itype_immed = temp_itype_immed;
always @(*) begin
case(curr_opcode)
@ -282,10 +282,10 @@ module VX_decode(
endcase
end
assign vx_frE_to_bckE_req.branch_type = temp_branch_type;
assign frE_to_bckE_req_if.branch_type = temp_branch_type;
assign vx_wstall.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid);
assign vx_wstall.warp_num = in_warp_num;
assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid);
assign wstall_if.warp_num = in_warp_num;
always @(*) begin
// ALU OP
@ -330,14 +330,14 @@ module VX_decode(
wire[4:0] temp_final_alu;
assign temp_final_alu = is_btype ? ((vx_frE_to_bckE_req.branch_type < `BLTU) ? `SUB : `SUBU) :
assign temp_final_alu = is_btype ? ((frE_to_bckE_req_if.branch_type < `BLTU) ? `SUB : `SUBU) :
is_lui ? `LUI_ALU :
is_auipc ? `AUIPC_ALU :
is_csr ? csr_alu :
(is_stype || is_linst) ? `ADD :
alu_op;
assign vx_frE_to_bckE_req.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
assign frE_to_bckE_req_if.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
endmodule

View file

@ -5,69 +5,69 @@ module VX_dmem_controller (
input wire reset,
// Dram <-> Dcache
VX_gpu_dcache_dram_req_if vx_gpu_dcache_dram_req,
VX_gpu_dcache_dram_rsp_if vx_gpu_dcache_dram_res,
VX_gpu_snp_req_rsp_if vx_gpu_dcache_snp_req,
VX_gpu_dcache_dram_req_if gpu_dcache_dram_req_if,
VX_gpu_dcache_dram_rsp_if gpu_dcache_dram_res_if,
VX_gpu_snp_req_rsp_if gpu_dcache_snp_req_if,
// Dram <-> Icache
VX_gpu_dcache_dram_req_if vx_gpu_icache_dram_req,
VX_gpu_dcache_dram_rsp_if vx_gpu_icache_dram_res,
VX_gpu_snp_req_rsp_if vx_gpu_icache_snp_req,
VX_gpu_dcache_dram_req_if gpu_icache_dram_req_if,
VX_gpu_dcache_dram_rsp_if gpu_icache_dram_res_if,
VX_gpu_snp_req_rsp_if gpu_icache_snp_req_if,
// Core <-> Dcache
VX_gpu_dcache_rsp_if vx_dcache_rsp,
VX_gpu_dcache_req_if vx_dcache_req,
VX_gpu_dcache_rsp_if dcache_rsp_if,
VX_gpu_dcache_req_if dcache_req_if,
// Core <-> Icache
VX_gpu_dcache_rsp_if vx_icache_rsp,
VX_gpu_dcache_req_if vx_icache_req
VX_gpu_dcache_rsp_if icache_rsp_if,
VX_gpu_dcache_req_if icache_req_if
);
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_rsp_smem();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req_smem();
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_smem_if();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_smem_if();
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_rsp_dcache();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req_dcache();
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_dcache_if();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_dcache_if();
wire to_shm = vx_dcache_req.core_req_addr[0][31:24] == 8'hFF;
wire dcache_wants_wb = (|vx_dcache_rsp_dcache.core_wb_valid);
wire to_shm = dcache_req_if.core_req_addr[0][31:24] == 8'hFF;
wire dcache_wants_wb = (|dcache_rsp_dcache_if.core_wb_valid);
// Dcache Request
assign vx_dcache_req_dcache.core_req_valid = vx_dcache_req.core_req_valid & {`NUM_THREADS{~to_shm}};
assign vx_dcache_req_dcache.core_req_addr = vx_dcache_req.core_req_addr;
assign vx_dcache_req_dcache.core_req_writedata = vx_dcache_req.core_req_writedata;
assign vx_dcache_req_dcache.core_req_mem_read = vx_dcache_req.core_req_mem_read;
assign vx_dcache_req_dcache.core_req_mem_write = vx_dcache_req.core_req_mem_write;
assign vx_dcache_req_dcache.core_req_rd = vx_dcache_req.core_req_rd;
assign vx_dcache_req_dcache.core_req_wb = vx_dcache_req.core_req_wb;
assign vx_dcache_req_dcache.core_req_warp_num = vx_dcache_req.core_req_warp_num;
assign vx_dcache_req_dcache.core_req_pc = vx_dcache_req.core_req_pc;
assign vx_dcache_req_dcache.core_no_wb_slot = vx_dcache_req.core_no_wb_slot;
assign dcache_req_dcache_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{~to_shm}};
assign dcache_req_dcache_if.core_req_addr = dcache_req_if.core_req_addr;
assign dcache_req_dcache_if.core_req_writedata = dcache_req_if.core_req_writedata;
assign dcache_req_dcache_if.core_req_mem_read = dcache_req_if.core_req_mem_read;
assign dcache_req_dcache_if.core_req_mem_write = dcache_req_if.core_req_mem_write;
assign dcache_req_dcache_if.core_req_rd = dcache_req_if.core_req_rd;
assign dcache_req_dcache_if.core_req_wb = dcache_req_if.core_req_wb;
assign dcache_req_dcache_if.core_req_warp_num = dcache_req_if.core_req_warp_num;
assign dcache_req_dcache_if.core_req_pc = dcache_req_if.core_req_pc;
assign dcache_req_dcache_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot;
// Shred Memory Request
assign vx_dcache_req_smem.core_req_valid = vx_dcache_req.core_req_valid & {`NUM_THREADS{to_shm}};
assign vx_dcache_req_smem.core_req_addr = vx_dcache_req.core_req_addr;
assign vx_dcache_req_smem.core_req_writedata = vx_dcache_req.core_req_writedata;
assign vx_dcache_req_smem.core_req_mem_read = vx_dcache_req.core_req_mem_read;
assign vx_dcache_req_smem.core_req_mem_write = vx_dcache_req.core_req_mem_write;
assign vx_dcache_req_smem.core_req_rd = vx_dcache_req.core_req_rd;
assign vx_dcache_req_smem.core_req_wb = vx_dcache_req.core_req_wb;
assign vx_dcache_req_smem.core_req_warp_num = vx_dcache_req.core_req_warp_num;
assign vx_dcache_req_smem.core_req_pc = vx_dcache_req.core_req_pc;
assign vx_dcache_req_smem.core_no_wb_slot = vx_dcache_req.core_no_wb_slot || dcache_wants_wb;
assign dcache_req_smem_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{to_shm}};
assign dcache_req_smem_if.core_req_addr = dcache_req_if.core_req_addr;
assign dcache_req_smem_if.core_req_writedata = dcache_req_if.core_req_writedata;
assign dcache_req_smem_if.core_req_mem_read = dcache_req_if.core_req_mem_read;
assign dcache_req_smem_if.core_req_mem_write = dcache_req_if.core_req_mem_write;
assign dcache_req_smem_if.core_req_rd = dcache_req_if.core_req_rd;
assign dcache_req_smem_if.core_req_wb = dcache_req_if.core_req_wb;
assign dcache_req_smem_if.core_req_warp_num = dcache_req_if.core_req_warp_num;
assign dcache_req_smem_if.core_req_pc = dcache_req_if.core_req_pc;
assign dcache_req_smem_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot || dcache_wants_wb;
// Dcache Response
assign vx_dcache_rsp.core_wb_valid = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_valid : vx_dcache_rsp_smem.core_wb_valid;
assign vx_dcache_rsp.core_wb_req_rd = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_req_rd : vx_dcache_rsp_smem.core_wb_req_rd;
assign vx_dcache_rsp.core_wb_req_wb = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_req_wb : vx_dcache_rsp_smem.core_wb_req_wb;
assign vx_dcache_rsp.core_wb_warp_num = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_warp_num : vx_dcache_rsp_smem.core_wb_warp_num;
assign vx_dcache_rsp.core_wb_readdata = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_readdata : vx_dcache_rsp_smem.core_wb_readdata;
assign vx_dcache_rsp.core_wb_pc = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_pc : vx_dcache_rsp_smem.core_wb_pc;
assign dcache_rsp_if.core_wb_valid = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_valid : dcache_rsp_smem_if.core_wb_valid;
assign dcache_rsp_if.core_wb_req_rd = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_req_rd : dcache_rsp_smem_if.core_wb_req_rd;
assign dcache_rsp_if.core_wb_req_wb = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_req_wb : dcache_rsp_smem_if.core_wb_req_wb;
assign dcache_rsp_if.core_wb_warp_num = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_warp_num : dcache_rsp_smem_if.core_wb_warp_num;
assign dcache_rsp_if.core_wb_readdata = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_readdata : dcache_rsp_smem_if.core_wb_readdata;
assign dcache_rsp_if.core_wb_pc = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_pc : dcache_rsp_smem_if.core_wb_pc;
assign vx_dcache_rsp.delay_req = to_shm ? vx_dcache_rsp_smem.delay_req : vx_dcache_rsp_dcache.delay_req;
assign dcache_rsp_if.delay_req = to_shm ? dcache_rsp_smem_if.delay_req : dcache_rsp_dcache_if.delay_req;
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_smem_dram_req();
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_smem_dram_res();
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_req_if();
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_res_if();
VX_cache #(
.CACHE_SIZE_BYTES (`SCACHE_SIZE_BYTES),
@ -95,46 +95,46 @@ module VX_dmem_controller (
.reset (reset),
// Core req
.core_req_valid (vx_dcache_req_smem.core_req_valid),
.core_req_mem_read (vx_dcache_req_smem.core_req_mem_read),
.core_req_mem_write(vx_dcache_req_smem.core_req_mem_write),
.core_req_addr (vx_dcache_req_smem.core_req_addr),
.core_req_writedata(vx_dcache_req_smem.core_req_writedata),
.core_req_rd (vx_dcache_req_smem.core_req_rd),
.core_req_wb (vx_dcache_req_smem.core_req_wb),
.core_req_warp_num (vx_dcache_req_smem.core_req_warp_num),
.core_req_pc (vx_dcache_req_smem.core_req_pc),
.core_req_valid (dcache_req_smem_if.core_req_valid),
.core_req_mem_read (dcache_req_smem_if.core_req_mem_read),
.core_req_mem_write(dcache_req_smem_if.core_req_mem_write),
.core_req_addr (dcache_req_smem_if.core_req_addr),
.core_req_writedata(dcache_req_smem_if.core_req_writedata),
.core_req_rd (dcache_req_smem_if.core_req_rd),
.core_req_wb (dcache_req_smem_if.core_req_wb),
.core_req_warp_num (dcache_req_smem_if.core_req_warp_num),
.core_req_pc (dcache_req_smem_if.core_req_pc),
// Delay Core Req
.delay_req (vx_dcache_rsp_smem.delay_req),
.delay_req (dcache_rsp_smem_if.delay_req),
// Core Cache Can't WB
.core_no_wb_slot (vx_dcache_req_smem.core_no_wb_slot),
.core_no_wb_slot (dcache_req_smem_if.core_no_wb_slot),
// Cache CWB
.core_wb_valid (vx_dcache_rsp_smem.core_wb_valid),
.core_wb_req_rd (vx_dcache_rsp_smem.core_wb_req_rd),
.core_wb_req_wb (vx_dcache_rsp_smem.core_wb_req_wb),
.core_wb_warp_num (vx_dcache_rsp_smem.core_wb_warp_num),
.core_wb_readdata (vx_dcache_rsp_smem.core_wb_readdata),
.core_wb_pc (vx_dcache_rsp_smem.core_wb_pc),
.core_wb_valid (dcache_rsp_smem_if.core_wb_valid),
.core_wb_req_rd (dcache_rsp_smem_if.core_wb_req_rd),
.core_wb_req_wb (dcache_rsp_smem_if.core_wb_req_wb),
.core_wb_warp_num (dcache_rsp_smem_if.core_wb_warp_num),
.core_wb_readdata (dcache_rsp_smem_if.core_wb_readdata),
.core_wb_pc (dcache_rsp_smem_if.core_wb_pc),
`IGNORE_WARNINGS_BEGIN
.core_wb_address (),
`IGNORE_WARNINGS_END
// DRAM response
.dram_rsp_valid (vx_gpu_smem_dram_res.dram_rsp_valid),
.dram_rsp_addr (vx_gpu_smem_dram_res.dram_rsp_addr),
.dram_rsp_data (vx_gpu_smem_dram_res.dram_rsp_data),
.dram_rsp_valid (gpu_smem_dram_res_if.dram_rsp_valid),
.dram_rsp_addr (gpu_smem_dram_res_if.dram_rsp_addr),
.dram_rsp_data (gpu_smem_dram_res_if.dram_rsp_data),
// DRAM accept response
.dram_rsp_ready (vx_gpu_smem_dram_req.dram_rsp_ready),
.dram_rsp_ready (gpu_smem_dram_req_if.dram_rsp_ready),
// DRAM Req
.dram_req_read (vx_gpu_smem_dram_req.dram_req_read),
.dram_req_write (vx_gpu_smem_dram_req.dram_req_write),
.dram_req_addr (vx_gpu_smem_dram_req.dram_req_addr),
.dram_req_data (vx_gpu_smem_dram_req.dram_req_data),
.dram_req_read (gpu_smem_dram_req_if.dram_req_read),
.dram_req_write (gpu_smem_dram_req_if.dram_req_write),
.dram_req_addr (gpu_smem_dram_req_if.dram_req_addr),
.dram_req_data (gpu_smem_dram_req_if.dram_req_data),
.dram_req_full (1),
// Snoop Request
@ -178,52 +178,52 @@ module VX_dmem_controller (
.reset (reset),
// Core req
.core_req_valid (vx_dcache_req_dcache.core_req_valid),
.core_req_mem_read (vx_dcache_req_dcache.core_req_mem_read),
.core_req_mem_write(vx_dcache_req_dcache.core_req_mem_write),
.core_req_addr (vx_dcache_req_dcache.core_req_addr),
.core_req_writedata(vx_dcache_req_dcache.core_req_writedata),
.core_req_rd (vx_dcache_req_dcache.core_req_rd),
.core_req_wb (vx_dcache_req_dcache.core_req_wb),
.core_req_warp_num (vx_dcache_req_dcache.core_req_warp_num),
.core_req_pc (vx_dcache_req_dcache.core_req_pc),
.core_req_valid (dcache_req_dcache_if.core_req_valid),
.core_req_mem_read (dcache_req_dcache_if.core_req_mem_read),
.core_req_mem_write(dcache_req_dcache_if.core_req_mem_write),
.core_req_addr (dcache_req_dcache_if.core_req_addr),
.core_req_writedata(dcache_req_dcache_if.core_req_writedata),
.core_req_rd (dcache_req_dcache_if.core_req_rd),
.core_req_wb (dcache_req_dcache_if.core_req_wb),
.core_req_warp_num (dcache_req_dcache_if.core_req_warp_num),
.core_req_pc (dcache_req_dcache_if.core_req_pc),
// Delay Core Req
.delay_req (vx_dcache_rsp_dcache.delay_req),
.delay_req (dcache_rsp_dcache_if.delay_req),
// Core Cache Can't WB
.core_no_wb_slot (vx_dcache_req_dcache.core_no_wb_slot),
.core_no_wb_slot (dcache_req_dcache_if.core_no_wb_slot),
// Cache CWB
.core_wb_valid (vx_dcache_rsp_dcache.core_wb_valid),
.core_wb_req_rd (vx_dcache_rsp_dcache.core_wb_req_rd),
.core_wb_req_wb (vx_dcache_rsp_dcache.core_wb_req_wb),
.core_wb_warp_num (vx_dcache_rsp_dcache.core_wb_warp_num),
.core_wb_readdata (vx_dcache_rsp_dcache.core_wb_readdata),
.core_wb_pc (vx_dcache_rsp_dcache.core_wb_pc),
.core_wb_valid (dcache_rsp_dcache_if.core_wb_valid),
.core_wb_req_rd (dcache_rsp_dcache_if.core_wb_req_rd),
.core_wb_req_wb (dcache_rsp_dcache_if.core_wb_req_wb),
.core_wb_warp_num (dcache_rsp_dcache_if.core_wb_warp_num),
.core_wb_readdata (dcache_rsp_dcache_if.core_wb_readdata),
.core_wb_pc (dcache_rsp_dcache_if.core_wb_pc),
`IGNORE_WARNINGS_BEGIN
.core_wb_address (),
`IGNORE_WARNINGS_END
// DRAM response
.dram_rsp_valid (vx_gpu_dcache_dram_res.dram_rsp_valid),
.dram_rsp_addr (vx_gpu_dcache_dram_res.dram_rsp_addr),
.dram_rsp_data (vx_gpu_dcache_dram_res.dram_rsp_data),
.dram_rsp_valid (gpu_dcache_dram_res_if.dram_rsp_valid),
.dram_rsp_addr (gpu_dcache_dram_res_if.dram_rsp_addr),
.dram_rsp_data (gpu_dcache_dram_res_if.dram_rsp_data),
// DRAM accept response
.dram_rsp_ready (vx_gpu_dcache_dram_req.dram_rsp_ready),
.dram_rsp_ready (gpu_dcache_dram_req_if.dram_rsp_ready),
// DRAM Req
.dram_req_read (vx_gpu_dcache_dram_req.dram_req_read),
.dram_req_write (vx_gpu_dcache_dram_req.dram_req_write),
.dram_req_addr (vx_gpu_dcache_dram_req.dram_req_addr),
.dram_req_data (vx_gpu_dcache_dram_req.dram_req_data),
.dram_req_full (vx_gpu_dcache_dram_req.dram_req_full),
.dram_req_read (gpu_dcache_dram_req_if.dram_req_read),
.dram_req_write (gpu_dcache_dram_req_if.dram_req_write),
.dram_req_addr (gpu_dcache_dram_req_if.dram_req_addr),
.dram_req_data (gpu_dcache_dram_req_if.dram_req_data),
.dram_req_full (gpu_dcache_dram_req_if.dram_req_full),
// Snoop Request
.snp_req_valid (vx_gpu_dcache_snp_req.snp_req_valid),
.snp_req_addr (vx_gpu_dcache_snp_req.snp_req_addr),
.snp_req_full (vx_gpu_dcache_snp_req.snp_req_full),
.snp_req_valid (gpu_dcache_snp_req_if.snp_req_valid),
.snp_req_addr (gpu_dcache_snp_req_if.snp_req_addr),
.snp_req_full (gpu_dcache_snp_req_if.snp_req_full),
// Snoop Forward
`IGNORE_WARNINGS_BEGIN
@ -259,52 +259,52 @@ module VX_dmem_controller (
.reset (reset),
// Core req
.core_req_valid (vx_icache_req.core_req_valid),
.core_req_mem_read (vx_icache_req.core_req_mem_read),
.core_req_mem_write (vx_icache_req.core_req_mem_write),
.core_req_addr (vx_icache_req.core_req_addr),
.core_req_writedata (vx_icache_req.core_req_writedata),
.core_req_rd (vx_icache_req.core_req_rd),
.core_req_wb (vx_icache_req.core_req_wb),
.core_req_warp_num (vx_icache_req.core_req_warp_num),
.core_req_pc (vx_icache_req.core_req_pc),
.core_req_valid (icache_req_if.core_req_valid),
.core_req_mem_read (icache_req_if.core_req_mem_read),
.core_req_mem_write (icache_req_if.core_req_mem_write),
.core_req_addr (icache_req_if.core_req_addr),
.core_req_writedata (icache_req_if.core_req_writedata),
.core_req_rd (icache_req_if.core_req_rd),
.core_req_wb (icache_req_if.core_req_wb),
.core_req_warp_num (icache_req_if.core_req_warp_num),
.core_req_pc (icache_req_if.core_req_pc),
// Delay Core Req
.delay_req (vx_icache_rsp.delay_req),
.delay_req (icache_rsp_if.delay_req),
// Core Cache Can't WB
.core_no_wb_slot (vx_icache_req.core_no_wb_slot),
.core_no_wb_slot (icache_req_if.core_no_wb_slot),
// Cache CWB
.core_wb_valid (vx_icache_rsp.core_wb_valid),
.core_wb_req_rd (vx_icache_rsp.core_wb_req_rd),
.core_wb_req_wb (vx_icache_rsp.core_wb_req_wb),
.core_wb_warp_num (vx_icache_rsp.core_wb_warp_num),
.core_wb_readdata (vx_icache_rsp.core_wb_readdata),
.core_wb_pc (vx_icache_rsp.core_wb_pc),
.core_wb_valid (icache_rsp_if.core_wb_valid),
.core_wb_req_rd (icache_rsp_if.core_wb_req_rd),
.core_wb_req_wb (icache_rsp_if.core_wb_req_wb),
.core_wb_warp_num (icache_rsp_if.core_wb_warp_num),
.core_wb_readdata (icache_rsp_if.core_wb_readdata),
.core_wb_pc (icache_rsp_if.core_wb_pc),
`IGNORE_WARNINGS_BEGIN
.core_wb_address (),
`IGNORE_WARNINGS_END
// DRAM response
.dram_rsp_valid (vx_gpu_icache_dram_res.dram_rsp_valid),
.dram_rsp_addr (vx_gpu_icache_dram_res.dram_rsp_addr),
.dram_rsp_data (vx_gpu_icache_dram_res.dram_rsp_data),
.dram_rsp_valid (gpu_icache_dram_res_if.dram_rsp_valid),
.dram_rsp_addr (gpu_icache_dram_res_if.dram_rsp_addr),
.dram_rsp_data (gpu_icache_dram_res_if.dram_rsp_data),
// DRAM accept response
.dram_rsp_ready (vx_gpu_icache_dram_req.dram_rsp_ready),
.dram_rsp_ready (gpu_icache_dram_req_if.dram_rsp_ready),
// DRAM Req
.dram_req_read (vx_gpu_icache_dram_req.dram_req_read),
.dram_req_write (vx_gpu_icache_dram_req.dram_req_write),
.dram_req_addr (vx_gpu_icache_dram_req.dram_req_addr),
.dram_req_data (vx_gpu_icache_dram_req.dram_req_data),
.dram_req_full (vx_gpu_icache_dram_req.dram_req_full),
.dram_req_read (gpu_icache_dram_req_if.dram_req_read),
.dram_req_write (gpu_icache_dram_req_if.dram_req_write),
.dram_req_addr (gpu_icache_dram_req_if.dram_req_addr),
.dram_req_data (gpu_icache_dram_req_if.dram_req_data),
.dram_req_full (gpu_icache_dram_req_if.dram_req_full),
// Snoop Request
.snp_req_valid (vx_gpu_icache_snp_req.snp_req_valid),
.snp_req_addr (vx_gpu_icache_snp_req.snp_req_addr),
.snp_req_full (vx_gpu_icache_snp_req.snp_req_full),
.snp_req_valid (gpu_icache_snp_req_if.snp_req_valid),
.snp_req_addr (gpu_icache_snp_req_if.snp_req_addr),
.snp_req_full (gpu_icache_snp_req_if.snp_req_full),
// Snoop Forward
`IGNORE_WARNINGS_BEGIN

View file

@ -1,21 +1,21 @@
`include "VX_define.vh"
module VX_execute_unit (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// Request
VX_exec_unit_req_if vx_exec_unit_req,
VX_exec_unit_req_if exec_unit_req_if,
// Output
// Writeback
VX_inst_exec_wb_if vx_inst_exec_wb,
VX_inst_exec_wb_if inst_exec_wb_if,
// JAL Response
VX_jal_response_if vx_jal_rsp,
VX_jal_response_if jal_rsp_if,
// Branch Response
VX_branch_response_if vx_branch_rsp,
VX_branch_response_if branch_rsp_if,
input wire no_slot_exec,
output wire out_delay
input wire no_slot_exec,
output wire out_delay
);
wire[`NUM_THREADS-1:0][31:0] in_a_reg_data;
@ -31,23 +31,23 @@ module VX_execute_unit (
wire[31:0] in_jal_offset;
wire[31:0] in_curr_PC;
assign in_a_reg_data = vx_exec_unit_req.a_reg_data;
assign in_b_reg_data = vx_exec_unit_req.b_reg_data;
assign in_alu_op = vx_exec_unit_req.alu_op;
assign in_rs2_src = vx_exec_unit_req.rs2_src;
assign in_itype_immed = vx_exec_unit_req.itype_immed;
assign in_branch_type = vx_exec_unit_req.branch_type;
assign in_upper_immed = vx_exec_unit_req.upper_immed;
assign in_jal = vx_exec_unit_req.jal;
assign in_jal_offset = vx_exec_unit_req.jal_offset;
assign in_curr_PC = vx_exec_unit_req.curr_PC;
assign in_a_reg_data = exec_unit_req_if.a_reg_data;
assign in_b_reg_data = exec_unit_req_if.b_reg_data;
assign in_alu_op = exec_unit_req_if.alu_op;
assign in_rs2_src = exec_unit_req_if.rs2_src;
assign in_itype_immed = exec_unit_req_if.itype_immed;
assign in_branch_type = exec_unit_req_if.branch_type;
assign in_upper_immed = exec_unit_req_if.upper_immed;
assign in_jal = exec_unit_req_if.jal;
assign in_jal_offset = exec_unit_req_if.jal_offset;
assign in_curr_PC = exec_unit_req_if.curr_PC;
wire[`NUM_THREADS-1:0][31:0] alu_result;
wire[`NUM_THREADS-1:0] alu_stall;
genvar index_out_reg;
generate
for (index_out_reg = 0; index_out_reg < `NUM_THREADS; index_out_reg = index_out_reg + 1) begin : alu_defs
VX_alu vx_alu(
VX_alu alu(
.clk(clk),
.reset(reset),
// .in_reg_data (in_reg_data[1:0]),
@ -77,17 +77,17 @@ module VX_execute_unit (
VX_generic_priority_encoder #(
.N(`NUM_THREADS)
) choose_alu_result (
.valids(vx_exec_unit_req.valid),
.valids(exec_unit_req_if.valid),
.index (jal_branch_use_index),
.found (jal_branch_found_valid)
);
);
wire[31:0] branch_use_alu_result = alu_result[jal_branch_use_index];
reg temp_branch_dir;
always @(*)
begin
case (vx_exec_unit_req.branch_type)
case (exec_unit_req_if.branch_type)
`BEQ: temp_branch_dir = (branch_use_alu_result == 0) ? `TAKEN : `NOT_TAKEN;
`BNE: temp_branch_dir = (branch_use_alu_result == 0) ? `NOT_TAKEN : `TAKEN;
`BLT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN;
@ -104,35 +104,35 @@ module VX_execute_unit (
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup
assign duplicate_PC_data[i] = vx_exec_unit_req.PC_next;
assign duplicate_PC_data[i] = exec_unit_req_if.PC_next;
end
endgenerate
// VX_inst_exec_wb_if vx_inst_exec_wb_temp();
// VX_inst_exec_wb_if inst_exec_wb_temp_if();
// JAL Response
VX_jal_response_if vx_jal_rsp_temp();
VX_jal_response_if jal_rsp_temp_if();
// Branch Response
VX_branch_response_if vx_branch_rsp_temp();
VX_branch_response_if branch_rsp_temp_if();
// Actual Writeback
assign vx_inst_exec_wb.rd = vx_exec_unit_req.rd;
assign vx_inst_exec_wb.wb = vx_exec_unit_req.wb;
assign vx_inst_exec_wb.wb_valid = vx_exec_unit_req.valid & {`NUM_THREADS{!internal_stall}};
assign vx_inst_exec_wb.wb_warp_num = vx_exec_unit_req.warp_num;
assign vx_inst_exec_wb.alu_result = vx_exec_unit_req.jal ? duplicate_PC_data : alu_result;
assign inst_exec_wb_if.rd = exec_unit_req_if.rd;
assign inst_exec_wb_if.wb = exec_unit_req_if.wb;
assign inst_exec_wb_if.wb_valid = exec_unit_req_if.valid & {`NUM_THREADS{!internal_stall}};
assign inst_exec_wb_if.wb_warp_num = exec_unit_req_if.warp_num;
assign inst_exec_wb_if.alu_result = exec_unit_req_if.jal ? duplicate_PC_data : alu_result;
assign vx_inst_exec_wb.exec_wb_pc = in_curr_PC;
assign inst_exec_wb_if.exec_wb_pc = in_curr_PC;
// Jal rsp
assign vx_jal_rsp_temp.jal = in_jal;
assign vx_jal_rsp_temp.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset);
assign vx_jal_rsp_temp.jal_warp_num = vx_exec_unit_req.warp_num;
assign jal_rsp_temp_if.jal = in_jal;
assign jal_rsp_temp_if.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset);
assign jal_rsp_temp_if.jal_warp_num = exec_unit_req_if.warp_num;
// Branch rsp
assign vx_branch_rsp_temp.valid_branch = (vx_exec_unit_req.branch_type != `NO_BRANCH) && (|vx_exec_unit_req.valid);
assign vx_branch_rsp_temp.branch_dir = temp_branch_dir;
assign vx_branch_rsp_temp.branch_warp_num = vx_exec_unit_req.warp_num;
assign vx_branch_rsp_temp.branch_dest = $signed(vx_exec_unit_req.curr_PC) + ($signed(vx_exec_unit_req.itype_immed) << 1); // itype_immed = branch_offset
assign branch_rsp_temp_if.valid_branch = (exec_unit_req_if.branch_type != `NO_BRANCH) && (|exec_unit_req_if.valid);
assign branch_rsp_temp_if.branch_dir = temp_branch_dir;
assign branch_rsp_temp_if.branch_warp_num = exec_unit_req_if.warp_num;
assign branch_rsp_temp_if.branch_dest = $signed(exec_unit_req_if.curr_PC) + ($signed(exec_unit_req_if.itype_immed) << 1); // itype_immed = branch_offset
wire zero = 0;
@ -142,8 +142,8 @@ module VX_execute_unit (
// .reset(reset),
// .stall(zero),
// .flush(zero),
// .in ({vx_inst_exec_wb_temp.rd, vx_inst_exec_wb_temp.wb, vx_inst_exec_wb_temp.wb_valid, vx_inst_exec_wb_temp.wb_warp_num, vx_inst_exec_wb_temp.alu_result, vx_inst_exec_wb_temp.exec_wb_pc}),
// .out ({vx_inst_exec_wb.rd , vx_inst_exec_wb.wb , vx_inst_exec_wb.wb_valid , vx_inst_exec_wb.wb_warp_num , vx_inst_exec_wb.alu_result , vx_inst_exec_wb.exec_wb_pc })
// .in ({inst_exec_wb_temp_if.rd, inst_exec_wb_temp_if.wb, inst_exec_wb_temp_if.wb_valid, inst_exec_wb_temp_if.wb_warp_num, inst_exec_wb_temp_if.alu_result, inst_exec_wb_temp_if.exec_wb_pc}),
// .out ({inst_exec_wb_if.rd , inst_exec_wb_if.wb , inst_exec_wb_if.wb_valid , inst_exec_wb_if.wb_warp_num , inst_exec_wb_if.alu_result , inst_exec_wb_if.exec_wb_pc })
// );
VX_generic_register #(
@ -153,8 +153,8 @@ module VX_execute_unit (
.reset(reset),
.stall(zero),
.flush(zero),
.in ({vx_jal_rsp_temp.jal, vx_jal_rsp_temp.jal_dest, vx_jal_rsp_temp.jal_warp_num}),
.out ({vx_jal_rsp.jal , vx_jal_rsp.jal_dest , vx_jal_rsp.jal_warp_num})
.in ({jal_rsp_temp_if.jal, jal_rsp_temp_if.jal_dest, jal_rsp_temp_if.jal_warp_num}),
.out ({jal_rsp_if.jal , jal_rsp_if.jal_dest , jal_rsp_if.jal_warp_num})
);
VX_generic_register #(
@ -164,8 +164,8 @@ module VX_execute_unit (
.reset(reset),
.stall(zero),
.flush(zero),
.in ({vx_branch_rsp_temp.valid_branch, vx_branch_rsp_temp.branch_dir, vx_branch_rsp_temp.branch_warp_num, vx_branch_rsp_temp.branch_dest}),
.out ({vx_branch_rsp.valid_branch , vx_branch_rsp.branch_dir , vx_branch_rsp.branch_warp_num , vx_branch_rsp.branch_dest })
.in ({branch_rsp_temp_if.valid_branch, branch_rsp_temp_if.branch_dir, branch_rsp_temp_if.branch_warp_num, branch_rsp_temp_if.branch_dest}),
.out ({branch_rsp_if.valid_branch , branch_rsp_if.branch_dir , branch_rsp_if.branch_warp_num , branch_rsp_if.branch_dest })
);
// always @(*) begin
@ -178,7 +178,7 @@ module VX_execute_unit (
// end
// assign out_is_csr = vx_exec_unit_req.is_csr;
// assign out_csr_address = vx_exec_unit_req.csr_address;
// assign out_is_csr = exec_unit_req_if.is_csr;
// assign out_csr_address = exec_unit_req_if.csr_address;
endmodule : VX_execute_unit

View file

@ -1,20 +1,20 @@
`include "VX_define.vh"
module VX_fetch (
input wire clk,
input wire reset,
VX_wstall_if vx_wstall,
VX_join_if vx_join,
input wire schedule_delay,
input wire icache_stage_delay,
input wire[`NW_BITS-1:0] icache_stage_wid,
input wire[`NUM_THREADS-1:0] icache_stage_valids,
input wire clk,
input wire reset,
VX_wstall_if wstall_if,
VX_join_if join_if,
input wire schedule_delay,
input wire icache_stage_delay,
input wire[`NW_BITS-1:0] icache_stage_wid,
input wire[`NUM_THREADS-1:0] icache_stage_valids,
output wire out_ebreak,
VX_jal_response_if vx_jal_rsp,
VX_branch_response_if vx_branch_rsp,
output wire out_ebreak,
VX_jal_response_if jal_rsp_if,
VX_branch_response_if branch_rsp_if,
VX_inst_meta_if fe_inst_meta_fi,
VX_warp_ctl_if vx_warp_ctl
VX_warp_ctl_if warp_ctl_if
);
wire[`NUM_THREADS-1:0] thread_mask;
@ -22,15 +22,12 @@ module VX_fetch (
wire[31:0] warp_pc;
wire scheduled_warp;
wire pipe_stall;
// Only reason this is there is because there is a hidden assumption that decode is exactly after fetch
// Locals
assign pipe_stall = schedule_delay || icache_stage_delay;
VX_warp_scheduler warp_scheduler(
@ -38,52 +35,52 @@ module VX_fetch (
.reset (reset),
.stall (pipe_stall),
.is_barrier (vx_warp_ctl.is_barrier),
.barrier_id (vx_warp_ctl.barrier_id),
.num_warps (vx_warp_ctl.num_warps),
.barrier_warp_num (vx_warp_ctl.warp_num),
.is_barrier (warp_ctl_if.is_barrier),
.barrier_id (warp_ctl_if.barrier_id),
.num_warps (warp_ctl_if.num_warps),
.barrier_warp_num (warp_ctl_if.warp_num),
// Wspawn
.wspawn (vx_warp_ctl.wspawn),
.wsapwn_pc (vx_warp_ctl.wspawn_pc),
.wspawn_new_active(vx_warp_ctl.wspawn_new_active),
.wspawn (warp_ctl_if.wspawn),
.wsapwn_pc (warp_ctl_if.wspawn_pc),
.wspawn_new_active(warp_ctl_if.wspawn_new_active),
// CTM
.ctm (vx_warp_ctl.change_mask),
.ctm_mask (vx_warp_ctl.thread_mask),
.ctm_warp_num (vx_warp_ctl.warp_num),
.ctm (warp_ctl_if.change_mask),
.ctm_mask (warp_ctl_if.thread_mask),
.ctm_warp_num (warp_ctl_if.warp_num),
// WHALT
.whalt (vx_warp_ctl.ebreak),
.whalt_warp_num (vx_warp_ctl.warp_num),
.whalt (warp_ctl_if.ebreak),
.whalt_warp_num (warp_ctl_if.warp_num),
// Wstall
.wstall (vx_wstall.wstall),
.wstall_warp_num (vx_wstall.warp_num),
.wstall (wstall_if.wstall),
.wstall_warp_num (wstall_if.warp_num),
// Lock/release Stuff
.icache_stage_valids(icache_stage_valids),
.icache_stage_wid (icache_stage_wid),
// Join
.is_join (vx_join.is_join),
.join_warp_num (vx_join.join_warp_num),
.is_join (join_if.is_join),
.join_warp_num (join_if.join_warp_num),
// Split
.is_split (vx_warp_ctl.is_split),
.dont_split (vx_warp_ctl.dont_split),
.split_new_mask (vx_warp_ctl.split_new_mask),
.split_later_mask (vx_warp_ctl.split_later_mask),
.split_save_pc (vx_warp_ctl.split_save_pc),
.split_warp_num (vx_warp_ctl.warp_num),
.is_split (warp_ctl_if.is_split),
.dont_split (warp_ctl_if.dont_split),
.split_new_mask (warp_ctl_if.split_new_mask),
.split_later_mask (warp_ctl_if.split_later_mask),
.split_save_pc (warp_ctl_if.split_save_pc),
.split_warp_num (warp_ctl_if.warp_num),
// JAL
.jal (vx_jal_rsp.jal),
.jal_dest (vx_jal_rsp.jal_dest),
.jal_warp_num (vx_jal_rsp.jal_warp_num),
.jal (jal_rsp_if.jal),
.jal_dest (jal_rsp_if.jal_dest),
.jal_warp_num (jal_rsp_if.jal_warp_num),
// Branch
.branch_valid (vx_branch_rsp.valid_branch),
.branch_dir (vx_branch_rsp.branch_dir),
.branch_dest (vx_branch_rsp.branch_dest),
.branch_warp_num (vx_branch_rsp.branch_warp_num),
.branch_valid (branch_rsp_if.valid_branch),
.branch_dir (branch_rsp_if.branch_dir),
.branch_dest (branch_rsp_if.branch_dest),
.branch_warp_num (branch_rsp_if.branch_warp_num),
// Outputs
.thread_mask (thread_mask),

View file

@ -6,73 +6,68 @@ module VX_front_end (
input wire schedule_delay,
VX_warp_ctl_if vx_warp_ctl,
VX_warp_ctl_if warp_ctl_if,
VX_gpu_dcache_rsp_if vx_icache_rsp,
VX_gpu_dcache_req_if vx_icache_req,
VX_gpu_dcache_rsp_if icache_rsp_if,
VX_gpu_dcache_req_if icache_req_if,
VX_jal_response_if vx_jal_rsp,
VX_branch_response_if vx_branch_rsp,
VX_jal_response_if jal_rsp_if,
VX_branch_response_if branch_rsp_if,
VX_frE_to_bckE_req_if vx_bckE_req,
VX_frE_to_bckE_req_if bckE_req_if,
output wire fetch_ebreak
);
VX_inst_meta_if fe_inst_meta_fi();
VX_inst_meta_if fe_inst_meta_fi2();
VX_inst_meta_if fe_inst_meta_id();
VX_inst_meta_if fe_inst_meta_fi();
VX_inst_meta_if fe_inst_meta_fi2();
VX_inst_meta_if fe_inst_meta_id();
VX_frE_to_bckE_req_if frE_to_bckE_req_if();
VX_inst_meta_if fd_inst_meta_de();
VX_frE_to_bckE_req_if vx_frE_to_bckE_req();
VX_inst_meta_if fd_inst_meta_de();
wire total_freeze = schedule_delay;
wire icache_stage_delay;
wire total_freeze = schedule_delay;
wire icache_stage_delay;
wire vortex_ebreak;
wire terminate_sim;
wire vortex_ebreak;
wire terminate_sim;
wire[`NW_BITS-1:0] icache_stage_wid;
wire[`NUM_THREADS-1:0] icache_stage_valids;
wire[`NW_BITS-1:0] icache_stage_wid;
wire[`NUM_THREADS-1:0] icache_stage_valids;
reg old_ebreak; // This should be eventually removed
always @(posedge clk) begin
if (reset) begin
old_ebreak <= 0;
end else begin
old_ebreak <= old_ebreak || fetch_ebreak;
reg old_ebreak; // This should be eventually removed
always @(posedge clk) begin
if (reset) begin
old_ebreak <= 0;
end else begin
old_ebreak <= old_ebreak || fetch_ebreak;
end
end
end
assign fetch_ebreak = vortex_ebreak || terminate_sim || old_ebreak;
assign fetch_ebreak = vortex_ebreak || terminate_sim || old_ebreak;
VX_wstall_if wstall_if();
VX_join_if join_if();
VX_wstall_if vx_wstall();
VX_join_if vx_join();
VX_fetch vx_fetch(
VX_fetch fetch(
.clk (clk),
.reset (reset),
.icache_stage_wid (icache_stage_wid),
.icache_stage_valids(icache_stage_valids),
.vx_wstall (vx_wstall),
.vx_join (vx_join),
.wstall_if (wstall_if),
.join_if (join_if),
.schedule_delay (schedule_delay),
.vx_jal_rsp (vx_jal_rsp),
.vx_warp_ctl (vx_warp_ctl),
.jal_rsp_if (jal_rsp_if),
.warp_ctl_if (warp_ctl_if),
.icache_stage_delay (icache_stage_delay),
.vx_branch_rsp (vx_branch_rsp),
.branch_rsp_if (branch_rsp_if),
.out_ebreak (vortex_ebreak), // fetch_ebreak
.fe_inst_meta_fi (fe_inst_meta_fi)
);
wire freeze_fi_reg = total_freeze || icache_stage_delay;
wire freeze_fi_reg = total_freeze || icache_stage_delay;
VX_f_d_reg vx_f_i_reg(
VX_f_d_reg f_i_reg(
.clk (clk),
.reset (reset),
.in_freeze (freeze_fi_reg),
@ -80,46 +75,46 @@ VX_f_d_reg vx_f_i_reg(
.fd_inst_meta_de(fe_inst_meta_fi2)
);
VX_icache_stage vx_icache_stage(
.clk (clk),
.reset (reset),
.total_freeze (total_freeze),
.icache_stage_delay (icache_stage_delay),
.icache_stage_valids(icache_stage_valids),
.icache_stage_wid (icache_stage_wid),
.fe_inst_meta_fi (fe_inst_meta_fi2),
.fe_inst_meta_id (fe_inst_meta_id),
.vx_icache_rsp (vx_icache_rsp),
.vx_icache_req (vx_icache_req)
VX_icache_stage icache_stage(
.clk (clk),
.reset (reset),
.total_freeze (total_freeze),
.icache_stage_delay (icache_stage_delay),
.icache_stage_valids(icache_stage_valids),
.icache_stage_wid (icache_stage_wid),
.fe_inst_meta_fi (fe_inst_meta_fi2),
.fe_inst_meta_id (fe_inst_meta_id),
.icache_rsp_if (icache_rsp_if),
.icache_req_if (icache_req_if)
);
VX_i_d_reg vx_i_d_reg(
.clk (clk),
.reset (reset),
.in_freeze (total_freeze),
.fe_inst_meta_fd(fe_inst_meta_id),
.fd_inst_meta_de(fd_inst_meta_de)
VX_i_d_reg i_d_reg(
.clk (clk),
.reset (reset),
.in_freeze (total_freeze),
.fe_inst_meta_fd (fe_inst_meta_id),
.fd_inst_meta_de (fd_inst_meta_de)
);
VX_decode vx_decode(
.fd_inst_meta_de (fd_inst_meta_de),
.vx_frE_to_bckE_req(vx_frE_to_bckE_req),
.vx_wstall (vx_wstall),
.vx_join (vx_join),
.terminate_sim (terminate_sim)
VX_decode decode(
.fd_inst_meta_de (fd_inst_meta_de),
.frE_to_bckE_req_if (frE_to_bckE_req_if),
.wstall_if (wstall_if),
.join_if (join_if),
.terminate_sim (terminate_sim)
);
wire no_br_stall = 0;
wire no_br_stall = 0;
VX_d_e_reg vx_d_e_reg(
.clk (clk),
.reset (reset),
.in_branch_stall(no_br_stall),
.in_freeze (total_freeze),
.vx_frE_to_bckE_req(vx_frE_to_bckE_req),
.vx_bckE_req (vx_bckE_req)
VX_d_e_reg d_e_reg(
.clk (clk),
.reset (reset),
.in_branch_stall (no_br_stall),
.in_freeze (total_freeze),
.frE_to_bckE_req_if (frE_to_bckE_req_if),
.bckE_req_if (bckE_req_if)
);
endmodule

View file

@ -2,57 +2,57 @@
module VX_gpgpu_inst (
// Input
VX_gpu_inst_req_if vx_gpu_inst_req,
VX_gpu_inst_req_if gpu_inst_req_if,
// Output
VX_warp_ctl_if vx_warp_ctl
VX_warp_ctl_if warp_ctl_if
);
wire[`NUM_THREADS-1:0] curr_valids = vx_gpu_inst_req.valid;
wire is_split = (vx_gpu_inst_req.is_split);
wire[`NUM_THREADS-1:0] curr_valids = gpu_inst_req_if.valid;
wire is_split = (gpu_inst_req_if.is_split);
wire[`NUM_THREADS-1:0] tmc_new_mask;
wire all_threads = `NUM_THREADS < vx_gpu_inst_req.a_reg_data[0];
wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0];
genvar curr_t;
generate
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < vx_gpu_inst_req.a_reg_data[0];
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < gpu_inst_req_if.a_reg_data[0];
end
endgenerate
wire valid_inst = (|curr_valids);
assign vx_warp_ctl.warp_num = vx_gpu_inst_req.warp_num;
assign vx_warp_ctl.change_mask = (vx_gpu_inst_req.is_tmc) && valid_inst;
assign vx_warp_ctl.thread_mask = vx_gpu_inst_req.is_tmc ? tmc_new_mask : 0;
assign warp_ctl_if.warp_num = gpu_inst_req_if.warp_num;
assign warp_ctl_if.change_mask = (gpu_inst_req_if.is_tmc) && valid_inst;
assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0;
// assign vx_warp_ctl.ebreak = (vx_gpu_inst_req.a_reg_data[0] == 0) && valid_inst;
assign vx_warp_ctl.ebreak = vx_warp_ctl.change_mask && (vx_warp_ctl.thread_mask == 0);
// assign warp_ctl_if.ebreak = (gpu_inst_req_if.a_reg_data[0] == 0) && valid_inst;
assign warp_ctl_if.ebreak = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0);
wire wspawn = vx_gpu_inst_req.is_wspawn;
wire[31:0] wspawn_pc = vx_gpu_inst_req.rd2;
wire all_active = `NUM_WARPS < vx_gpu_inst_req.a_reg_data[0];
wire wspawn = gpu_inst_req_if.is_wspawn;
wire[31:0] wspawn_pc = gpu_inst_req_if.rd2;
wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0];
wire[`NUM_WARPS-1:0] wspawn_new_active;
genvar curr_w;
generate
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < vx_gpu_inst_req.a_reg_data[0];
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < gpu_inst_req_if.a_reg_data[0];
end
endgenerate
assign vx_warp_ctl.is_barrier = vx_gpu_inst_req.is_barrier && valid_inst;
assign vx_warp_ctl.barrier_id = vx_gpu_inst_req.a_reg_data[0];
assign warp_ctl_if.is_barrier = gpu_inst_req_if.is_barrier && valid_inst;
assign warp_ctl_if.barrier_id = gpu_inst_req_if.a_reg_data[0];
`DEBUG_BEGIN
wire[31:0] num_warps_m1 = vx_gpu_inst_req.rd2 - 1;
wire[31:0] num_warps_m1 = gpu_inst_req_if.rd2 - 1;
`DEBUG_END
assign vx_warp_ctl.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0];
assign warp_ctl_if.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0];
assign vx_warp_ctl.wspawn = wspawn;
assign vx_warp_ctl.wspawn_pc = wspawn_pc;
assign vx_warp_ctl.wspawn_new_active = wspawn_new_active;
assign warp_ctl_if.wspawn = wspawn;
assign warp_ctl_if.wspawn_pc = wspawn_pc;
assign warp_ctl_if.wspawn_new_active = wspawn_new_active;
wire[`NUM_THREADS-1:0] split_new_use_mask;
wire[`NUM_THREADS-1:0] split_new_later_mask;
@ -61,7 +61,7 @@ module VX_gpgpu_inst (
genvar curr_s_t;
generate
for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init
wire curr_bool = (vx_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1);
wire curr_bool = (gpu_inst_req_if.a_reg_data[curr_s_t] == 32'b1);
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool);
@ -79,15 +79,15 @@ module VX_gpgpu_inst (
// wire[`NW_BITS-1:0] num_valids = $countones(curr_valids);
assign vx_warp_ctl.is_split = is_split && (num_valids > 1);
assign vx_warp_ctl.dont_split = vx_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
assign vx_warp_ctl.split_new_mask = split_new_use_mask;
assign vx_warp_ctl.split_later_mask = split_new_later_mask;
assign vx_warp_ctl.split_save_pc = vx_gpu_inst_req.pc_next;
assign vx_warp_ctl.split_warp_num = vx_gpu_inst_req.warp_num;
assign warp_ctl_if.is_split = is_split && (num_valids > 1);
assign warp_ctl_if.dont_split = warp_ctl_if.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
assign warp_ctl_if.split_new_mask = split_new_use_mask;
assign warp_ctl_if.split_later_mask = split_new_later_mask;
assign warp_ctl_if.split_save_pc = gpu_inst_req_if.pc_next;
assign warp_ctl_if.split_warp_num = gpu_inst_req_if.warp_num;
// vx_gpu_inst_req.is_wspawn
// vx_gpu_inst_req.is_split
// vx_gpu_inst_req.is_barrier
// gpu_inst_req_if.is_wspawn
// gpu_inst_req_if.is_split
// gpu_inst_req_if.is_barrier
endmodule

View file

@ -4,8 +4,8 @@ module VX_gpr (
input wire clk,
input wire reset,
input wire valid_write_request,
VX_gpr_read_if vx_gpr_read,
VX_wb_if vx_writeback_if,
VX_gpr_read_if gpr_read_if,
VX_wb_if writeback_if,
output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] out_a_reg_data,
output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] out_b_reg_data
@ -13,36 +13,36 @@ module VX_gpr (
wire write_enable;
`ifndef ASIC
assign write_enable = valid_write_request && ((vx_writeback_if.wb != 0)) && (vx_writeback_if.rd != 0);
assign write_enable = valid_write_request && ((writeback_if.wb != 0)) && (writeback_if.rd != 0);
byte_enabled_simple_dual_port_ram first_ram(
.we (write_enable),
.clk (clk),
.reset (reset),
.waddr (vx_writeback_if.rd),
.raddr1(vx_gpr_read.rs1),
.raddr2(vx_gpr_read.rs2),
.be (vx_writeback_if.wb_valid),
.wdata (vx_writeback_if.write_data),
.waddr (writeback_if.rd),
.raddr1(gpr_read_if.rs1),
.raddr2(gpr_read_if.rs2),
.be (writeback_if.wb_valid),
.wdata (writeback_if.write_data),
.q1 (out_a_reg_data),
.q2 (out_b_reg_data)
);
`else
assign write_enable = valid_write_request && ((vx_writeback_if.wb != 0));
wire going_to_write = write_enable & (|vx_writeback_if.wb_valid);
assign write_enable = valid_write_request && ((writeback_if.wb != 0));
wire going_to_write = write_enable & (|writeback_if.wb_valid);
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
genvar curr_t;
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin
wire local_write = write_enable & vx_writeback_if.wb_valid[curr_t];
wire local_write = write_enable & writeback_if.wb_valid[curr_t];
assign write_bit_mask[curr_t] = {`NUM_GPRS{~local_write}};
end
// wire cenb = !going_to_write;
wire cenb = 0;
// wire cena_1 = (vx_gpr_read.rs1 == 0);
// wire cena_2 = (vx_gpr_read.rs2 == 0);
// wire cena_1 = (gpr_read_if.rs1 == 0);
// wire cena_2 = (gpr_read_if.rs2 == 0);
wire cena_1 = 0;
wire cena_2 = 0;
@ -65,7 +65,7 @@ module VX_gpr (
assign out_b_reg_data = temp_b;
`endif
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (vx_writeback_if.rd != 0) ? vx_writeback_if.write_data : 0;
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (writeback_if.rd != 0) ? writeback_if.write_data : 0;
genvar curr_base_thread;
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)
@ -82,11 +82,11 @@ module VX_gpr (
.SOB(),
.CLKA(clk),
.CENA(cena_1),
.AA(vx_gpr_read.rs1[(curr_base_thread+3):(curr_base_thread)]),
.AA(gpr_read_if.rs1[(curr_base_thread+3):(curr_base_thread)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
.AB(vx_writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
.AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
.EMAA(3'b011),
.EMASA(1'b0),
@ -121,11 +121,11 @@ module VX_gpr (
.SOB(),
.CLKA(clk),
.CENA(cena_2),
.AA(vx_gpr_read.rs2[(curr_base_thread+3):(curr_base_thread)]),
.AA(gpr_read_if.rs2[(curr_base_thread+3):(curr_base_thread)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
.AB(vx_writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
.AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
.EMAA(3'b011),
.EMASA(1'b0),

View file

@ -12,72 +12,72 @@ module VX_gpr_stage (
// inputs
// Instruction Information
VX_frE_to_bckE_req_if vx_bckE_req,
VX_frE_to_bckE_req_if bckE_req_if,
// WriteBack inputs
VX_wb_if vx_writeback_if,
VX_wb_if writeback_if,
// Outputs
VX_exec_unit_req_if vx_exec_unit_req,
VX_lsu_req_if vx_lsu_req,
VX_gpu_inst_req_if vx_gpu_inst_req,
VX_csr_req_if vx_csr_req
VX_exec_unit_req_if exec_unit_req_if,
VX_lsu_req_if lsu_req_if,
VX_gpu_inst_req_if gpu_inst_req_if,
VX_csr_req_if csr_req_if
);
`DEBUG_BEGIN
wire[31:0] curr_PC = vx_bckE_req.curr_PC;
wire[2:0] branchType = vx_bckE_req.branch_type;
wire is_store = (vx_bckE_req.mem_write != `NO_MEM_WRITE);
wire is_load = (vx_bckE_req.mem_read != `NO_MEM_READ);
wire jalQual = vx_bckE_req.jalQual;
wire[31:0] curr_PC = bckE_req_if.curr_PC;
wire[2:0] branchType = bckE_req_if.branch_type;
wire is_store = (bckE_req_if.mem_write != `NO_MEM_WRITE);
wire is_load = (bckE_req_if.mem_read != `NO_MEM_READ);
wire jalQual = bckE_req_if.jalQual;
`DEBUG_END
VX_gpr_read_if vx_gpr_read();
assign vx_gpr_read.rs1 = vx_bckE_req.rs1;
assign vx_gpr_read.rs2 = vx_bckE_req.rs2;
assign vx_gpr_read.warp_num = vx_bckE_req.warp_num;
VX_gpr_read_if gpr_read_if();
assign gpr_read_if.rs1 = bckE_req_if.rs1;
assign gpr_read_if.rs2 = bckE_req_if.rs2;
assign gpr_read_if.warp_num = bckE_req_if.warp_num;
`ifndef ASIC
VX_gpr_jal_if vx_gpr_jal();
assign vx_gpr_jal.is_jal = vx_bckE_req.jalQual;
assign vx_gpr_jal.curr_PC = vx_bckE_req.curr_PC;
VX_gpr_jal_if gpr_jal_if();
assign gpr_jal_if.is_jal = bckE_req_if.jalQual;
assign gpr_jal_if.curr_PC = bckE_req_if.curr_PC;
`else
VX_gpr_jal_if vx_gpr_jal();
assign vx_gpr_jal.is_jal = vx_exec_unit_req.jalQual;
assign vx_gpr_jal.curr_PC = vx_exec_unit_req.curr_PC;
VX_gpr_jal_if gpr_jal_if();
assign gpr_jal_if.is_jal = exec_unit_req_if.jalQual;
assign gpr_jal_if.curr_PC = exec_unit_req_if.curr_PC;
`endif
VX_gpr_data_if vx_gpr_datf();
VX_gpr_data_if gpr_datf_if();
VX_gpr_wrapper vx_grp_wrapper (
VX_gpr_wrapper grp_wrapper (
.clk (clk),
.reset (reset),
.vx_writeback_if(vx_writeback_if),
.vx_gpr_read (vx_gpr_read),
.vx_gpr_jal (vx_gpr_jal),
.writeback_if(writeback_if),
.gpr_read_if (gpr_read_if),
.gpr_jal_if (gpr_jal_if),
.out_a_reg_data (vx_gpr_datf.a_reg_data),
.out_b_reg_data (vx_gpr_datf.b_reg_data)
.out_a_reg_data (gpr_datf_if.a_reg_data),
.out_b_reg_data (gpr_datf_if.b_reg_data)
);
// assign vx_bckE_req.is_csr = is_csr;
// assign vx_bckE_req_out.csr_mask = (vx_bckE_req.sr_immed == 1'b1) ? {27'h0, vx_bckE_req.rs1} : vx_gpr_data.a_reg_data[0];
// assign bckE_req_if.is_csr = is_csr;
// assign bckE_req_out_if.csr_mask = (bckE_req_if.sr_immed == 1'b1) ? {27'h0, bckE_req_if.rs1} : gpr_data_if.a_reg_data[0];
// Outputs
VX_exec_unit_req_if vx_exec_unit_req_temp();
VX_lsu_req_if vx_lsu_req_temp();
VX_gpu_inst_req_if vx_gpu_inst_req_temp();
VX_csr_req_if vx_csr_req_temp();
VX_exec_unit_req_if exec_unit_req_temp_if();
VX_lsu_req_if lsu_req_temp_if();
VX_gpu_inst_req_if gpu_inst_req_temp_if();
VX_csr_req_if csr_req_temp_if();
VX_inst_multiplex vx_inst_mult(
.vx_bckE_req (vx_bckE_req),
.vx_gpr_data (vx_gpr_datf),
.vx_exec_unit_req(vx_exec_unit_req_temp),
.vx_lsu_req (vx_lsu_req_temp),
.vx_gpu_inst_req (vx_gpu_inst_req_temp),
.vx_csr_req (vx_csr_req_temp)
VX_inst_multiplex inst_mult(
.bckE_req_if (bckE_req_if),
.gpr_data_if (gpr_datf_if),
.exec_unit_req_if(exec_unit_req_temp_if),
.lsu_req_if (lsu_req_temp_if),
.gpu_inst_req_if (gpu_inst_req_temp_if),
.csr_req_if (csr_req_temp_if)
);
`DEBUG_BEGIN
wire is_lsu = (|vx_lsu_req_temp.valid);
wire is_lsu = (|lsu_req_temp_if.valid);
`DEBUG_END
wire stall_rest = 0;
wire flush_rest = schedule_delay;
@ -88,7 +88,7 @@ module VX_gpr_stage (
wire stall_exec = exec_delay;
wire flush_exec = schedule_delay && !stall_exec;
wire stall_csr = stall_gpr_csr && vx_bckE_req.is_csr && (|vx_bckE_req.valid);
wire stall_csr = stall_gpr_csr && bckE_req_if.is_csr && (|bckE_req_if.valid);
assign gpr_stage_delay = stall_lsu || stall_exec || stall_csr;
@ -125,11 +125,11 @@ module VX_gpr_stage (
.out ({temp_store_data, temp_base_address})
);
assign real_store_data = vx_lsu_req_temp.store_data;
assign real_base_address = vx_lsu_req_temp.base_address;
assign real_store_data = lsu_req_temp_if.store_data;
assign real_base_address = lsu_req_temp_if.base_address;
assign vx_lsu_req.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data;
assign vx_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address;
assign lsu_req_if.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data;
assign lsu_req_if.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address;
VX_generic_register #(
.N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS))
@ -138,8 +138,8 @@ module VX_gpr_stage (
.reset(reset),
.stall(stall_lsu),
.flush(flush_lsu),
.in ({vx_lsu_req_temp.valid, vx_lsu_req_temp.lsu_pc, vx_lsu_req_temp.warp_num, vx_lsu_req_temp.offset, vx_lsu_req_temp.mem_read, vx_lsu_req_temp.mem_write, vx_lsu_req_temp.rd, vx_lsu_req_temp.wb}),
.out ({vx_lsu_req.valid , vx_lsu_req.lsu_pc ,vx_lsu_req.warp_num , vx_lsu_req.offset , vx_lsu_req.mem_read , vx_lsu_req.mem_write , vx_lsu_req.rd , vx_lsu_req.wb })
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
.out ({lsu_req_if.valid , lsu_req_if.lsu_pc ,lsu_req_if.warp_num , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
);
VX_generic_register #(
@ -149,12 +149,12 @@ module VX_gpr_stage (
.reset(reset),
.stall(stall_exec),
.flush(flush_exec),
.in ({vx_exec_unit_req_temp.valid, vx_exec_unit_req_temp.warp_num, vx_exec_unit_req_temp.curr_PC, vx_exec_unit_req_temp.PC_next, vx_exec_unit_req_temp.rd, vx_exec_unit_req_temp.wb, vx_exec_unit_req_temp.alu_op, vx_exec_unit_req_temp.rs1, vx_exec_unit_req_temp.rs2, vx_exec_unit_req_temp.rs2_src, vx_exec_unit_req_temp.itype_immed, vx_exec_unit_req_temp.upper_immed, vx_exec_unit_req_temp.branch_type, vx_exec_unit_req_temp.jalQual, vx_exec_unit_req_temp.jal, vx_exec_unit_req_temp.jal_offset, vx_exec_unit_req_temp.ebreak, vx_exec_unit_req_temp.wspawn, vx_exec_unit_req_temp.is_csr, vx_exec_unit_req_temp.csr_address, vx_exec_unit_req_temp.csr_immed, vx_exec_unit_req_temp.csr_mask}),
.out ({vx_exec_unit_req.valid , vx_exec_unit_req.warp_num , vx_exec_unit_req.curr_PC , vx_exec_unit_req.PC_next , vx_exec_unit_req.rd , vx_exec_unit_req.wb , vx_exec_unit_req.alu_op , vx_exec_unit_req.rs1 , vx_exec_unit_req.rs2 , vx_exec_unit_req.rs2_src , vx_exec_unit_req.itype_immed , vx_exec_unit_req.upper_immed , vx_exec_unit_req.branch_type , vx_exec_unit_req.jalQual , vx_exec_unit_req.jal , vx_exec_unit_req.jal_offset , vx_exec_unit_req.ebreak , vx_exec_unit_req.wspawn , vx_exec_unit_req.is_csr , vx_exec_unit_req.csr_address , vx_exec_unit_req.csr_immed , vx_exec_unit_req.csr_mask })
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
);
assign vx_exec_unit_req.a_reg_data = real_base_address;
assign vx_exec_unit_req.b_reg_data = real_store_data;
assign exec_unit_req_if.a_reg_data = real_base_address;
assign exec_unit_req_if.b_reg_data = real_store_data;
VX_generic_register #(
.N(36 + `NW_BITS-1 + 1 + (`NUM_THREADS))
@ -163,12 +163,12 @@ module VX_gpr_stage (
.reset(reset),
.stall(stall_rest),
.flush(flush_rest),
.in ({vx_gpu_inst_req_temp.valid, vx_gpu_inst_req_temp.warp_num, vx_gpu_inst_req_temp.is_wspawn, vx_gpu_inst_req_temp.is_tmc, vx_gpu_inst_req_temp.is_split, vx_gpu_inst_req_temp.is_barrier, vx_gpu_inst_req_temp.pc_next}),
.out ({vx_gpu_inst_req.valid , vx_gpu_inst_req.warp_num , vx_gpu_inst_req.is_wspawn , vx_gpu_inst_req.is_tmc , vx_gpu_inst_req.is_split , vx_gpu_inst_req.is_barrier , vx_gpu_inst_req.pc_next })
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next}),
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next })
);
assign vx_gpu_inst_req.a_reg_data = real_base_address;
assign vx_gpu_inst_req.rd2 = real_store_data;
assign gpu_inst_req_if.a_reg_data = real_base_address;
assign gpu_inst_req_if.rd2 = real_store_data;
VX_generic_register #(
.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)
@ -177,8 +177,8 @@ module VX_gpr_stage (
.reset(reset),
.stall(stall_gpr_csr),
.flush(flush_rest),
.in ({vx_csr_req_temp.valid, vx_csr_req_temp.warp_num, vx_csr_req_temp.rd, vx_csr_req_temp.wb, vx_csr_req_temp.alu_op, vx_csr_req_temp.is_csr, vx_csr_req_temp.csr_address, vx_csr_req_temp.csr_immed, vx_csr_req_temp.csr_mask}),
.out ({vx_csr_req.valid , vx_csr_req.warp_num , vx_csr_req.rd , vx_csr_req.wb , vx_csr_req.alu_op , vx_csr_req.is_csr , vx_csr_req.csr_address , vx_csr_req.csr_immed , vx_csr_req.csr_mask })
.in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_address, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}),
.out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_address , csr_req_if.csr_immed , csr_req_if.csr_mask })
);
`else
@ -191,8 +191,8 @@ module VX_gpr_stage (
.reset(reset),
.stall(stall_lsu),
.flush(flush_lsu),
.in ({vx_lsu_req_temp.valid, vx_lsu_req_temp.lsu_pc, vx_lsu_req_temp.warp_num, vx_lsu_req_temp.store_data, vx_lsu_req_temp.base_address, vx_lsu_req_temp.offset, vx_lsu_req_temp.mem_read, vx_lsu_req_temp.mem_write, vx_lsu_req_temp.rd, vx_lsu_req_temp.wb}),
.out ({vx_lsu_req.valid , vx_lsu_req.lsu_pc , vx_lsu_req.warp_num , vx_lsu_req.store_data , vx_lsu_req.base_address , vx_lsu_req.offset , vx_lsu_req.mem_read , vx_lsu_req.mem_write , vx_lsu_req.rd , vx_lsu_req.wb })
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.store_data, lsu_req_temp_if.base_address, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
.out ({lsu_req_if.valid , lsu_req_if.lsu_pc , lsu_req_if.warp_num , lsu_req_if.store_data , lsu_req_if.base_address , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
);
VX_generic_register #(
@ -202,8 +202,8 @@ module VX_gpr_stage (
.reset(reset),
.stall(stall_exec),
.flush(flush_exec),
.in ({vx_exec_unit_req_temp.valid, vx_exec_unit_req_temp.warp_num, vx_exec_unit_req_temp.curr_PC, vx_exec_unit_req_temp.PC_next, vx_exec_unit_req_temp.rd, vx_exec_unit_req_temp.wb, vx_exec_unit_req_temp.a_reg_data, vx_exec_unit_req_temp.b_reg_data, vx_exec_unit_req_temp.alu_op, vx_exec_unit_req_temp.rs1, vx_exec_unit_req_temp.rs2, vx_exec_unit_req_temp.rs2_src, vx_exec_unit_req_temp.itype_immed, vx_exec_unit_req_temp.upper_immed, vx_exec_unit_req_temp.branch_type, vx_exec_unit_req_temp.jalQual, vx_exec_unit_req_temp.jal, vx_exec_unit_req_temp.jal_offset, vx_exec_unit_req_temp.ebreak, vx_exec_unit_req_temp.wspawn, vx_exec_unit_req_temp.is_csr, vx_exec_unit_req_temp.csr_address, vx_exec_unit_req_temp.csr_immed, vx_exec_unit_req_temp.csr_mask}),
.out ({vx_exec_unit_req.valid , vx_exec_unit_req.warp_num , vx_exec_unit_req.curr_PC , vx_exec_unit_req.PC_next , vx_exec_unit_req.rd , vx_exec_unit_req.wb , vx_exec_unit_req.a_reg_data , vx_exec_unit_req.b_reg_data , vx_exec_unit_req.alu_op , vx_exec_unit_req.rs1 , vx_exec_unit_req.rs2 , vx_exec_unit_req.rs2_src , vx_exec_unit_req.itype_immed , vx_exec_unit_req.upper_immed , vx_exec_unit_req.branch_type , vx_exec_unit_req.jalQual , vx_exec_unit_req.jal , vx_exec_unit_req.jal_offset , vx_exec_unit_req.ebreak , vx_exec_unit_req.wspawn , vx_exec_unit_req.is_csr , vx_exec_unit_req.csr_address , vx_exec_unit_req.csr_immed , vx_exec_unit_req.csr_mask })
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
);
VX_generic_register #(
@ -213,8 +213,8 @@ module VX_gpr_stage (
.reset(reset),
.stall(stall_rest),
.flush(flush_rest),
.in ({vx_gpu_inst_req_temp.valid, vx_gpu_inst_req_temp.warp_num, vx_gpu_inst_req_temp.is_wspawn, vx_gpu_inst_req_temp.is_tmc, vx_gpu_inst_req_temp.is_split, vx_gpu_inst_req_temp.is_barrier, vx_gpu_inst_req_temp.pc_next, vx_gpu_inst_req_temp.a_reg_data, vx_gpu_inst_req_temp.rd2}),
.out ({vx_gpu_inst_req.valid , vx_gpu_inst_req.warp_num , vx_gpu_inst_req.is_wspawn , vx_gpu_inst_req.is_tmc , vx_gpu_inst_req.is_split , vx_gpu_inst_req.is_barrier , vx_gpu_inst_req.pc_next , vx_gpu_inst_req.a_reg_data , vx_gpu_inst_req.rd2 })
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next, gpu_inst_req_temp_if.a_reg_data, gpu_inst_req_temp_if.rd2}),
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next , gpu_inst_req_if.a_reg_data , gpu_inst_req_if.rd2 })
);
VX_generic_register #(
@ -224,8 +224,8 @@ module VX_gpr_stage (
.reset(reset),
.stall(stall_gpr_csr),
.flush(flush_rest),
.in ({vx_csr_req_temp.valid, vx_csr_req_temp.warp_num, vx_csr_req_temp.rd, vx_csr_req_temp.wb, vx_csr_req_temp.alu_op, vx_csr_req_temp.is_csr, vx_csr_req_temp.csr_address, vx_csr_req_temp.csr_immed, vx_csr_req_temp.csr_mask}),
.out ({vx_csr_req.valid , vx_csr_req.warp_num , vx_csr_req.rd , vx_csr_req.wb , vx_csr_req.alu_op , vx_csr_req.is_csr , vx_csr_req.csr_address , vx_csr_req.csr_immed , vx_csr_req.csr_mask })
.in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_address, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}),
.out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_address , csr_req_if.csr_immed , csr_req_if.csr_mask })
);
`endif

View file

@ -3,9 +3,9 @@
module VX_gpr_wrapper (
input wire clk,
input wire reset,
VX_gpr_read_if vx_gpr_read,
VX_wb_if vx_writeback_if,
VX_gpr_jal_if vx_gpr_jal,
VX_gpr_read_if gpr_read_if,
VX_wb_if writeback_if,
VX_gpr_jal_if gpr_jal_if,
output wire[`NUM_THREADS-1:0][31:0] out_a_reg_data,
output wire[`NUM_THREADS-1:0][31:0] out_b_reg_data
@ -19,13 +19,13 @@ module VX_gpr_wrapper (
genvar index;
generate
for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign
assign jal_data[index] = vx_gpr_jal.curr_PC;
assign jal_data[index] = gpr_jal_if.curr_PC;
end
endgenerate
`ifndef ASIC
assign out_a_reg_data = (vx_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[vx_gpr_read.warp_num]));
assign out_b_reg_data = (temp_b_reg_data[vx_gpr_read.warp_num]);
assign out_a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[gpr_read_if.warp_num]));
assign out_b_reg_data = (temp_b_reg_data[gpr_read_if.warp_num]);
`else
wire zer = 0;
@ -38,31 +38,29 @@ module VX_gpr_wrapper (
.reset(reset),
.stall(zer),
.flush(zer),
.in (vx_gpr_read.warp_num),
.in (gpr_read_if.warp_num),
.out (old_warp_num)
);
assign out_a_reg_data = (vx_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[old_warp_num]));
assign out_a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[old_warp_num]));
assign out_b_reg_data = (temp_b_reg_data[old_warp_num]);
`endif
genvar warp_index;
generate
for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs
wire valid_write_request = warp_index == vx_writeback_if.wb_warp_num;
VX_gpr vx_gpr(
wire valid_write_request = warp_index == writeback_if.wb_warp_num;
VX_gpr gpr(
.clk (clk),
.reset (reset),
.valid_write_request(valid_write_request),
.vx_gpr_read (vx_gpr_read),
.vx_writeback_if (vx_writeback_if),
.gpr_read_if (gpr_read_if),
.writeback_if (writeback_if),
.out_a_reg_data (temp_a_reg_data[warp_index]),
.out_b_reg_data (temp_b_reg_data[warp_index])
);
);
end
endgenerate

View file

@ -1,17 +1,17 @@
`include "VX_define.vh"
module VX_icache_stage (
input wire clk,
input wire reset,
input wire total_freeze,
output wire icache_stage_delay,
input wire clk,
input wire reset,
input wire total_freeze,
output wire icache_stage_delay,
output wire[`NW_BITS-1:0] icache_stage_wid,
output wire[`NUM_THREADS-1:0] icache_stage_valids,
VX_inst_meta_if fe_inst_meta_fi,
VX_inst_meta_if fe_inst_meta_id,
VX_inst_meta_if fe_inst_meta_fi,
VX_inst_meta_if fe_inst_meta_id,
VX_gpu_dcache_rsp_if vx_icache_rsp,
VX_gpu_dcache_req_if vx_icache_req
VX_gpu_dcache_rsp_if icache_rsp_if,
VX_gpu_dcache_req_if icache_req_if
);
reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0];
@ -19,30 +19,30 @@ module VX_icache_stage (
wire valid_inst = (|fe_inst_meta_fi.valid);
// Icache Request
assign vx_icache_req.core_req_valid = valid_inst && !total_freeze;
assign vx_icache_req.core_req_addr = fe_inst_meta_fi.inst_pc;
assign vx_icache_req.core_req_writedata = 32'b0;
assign vx_icache_req.core_req_mem_read = `LW_MEM_READ;
assign vx_icache_req.core_req_mem_write = `NO_MEM_WRITE;
assign vx_icache_req.core_req_rd = 5'b0;
assign vx_icache_req.core_req_wb = {1{2'b1}};
assign vx_icache_req.core_req_warp_num = fe_inst_meta_fi.warp_num;
assign vx_icache_req.core_req_pc = fe_inst_meta_fi.inst_pc;
assign icache_req_if.core_req_valid = valid_inst && !total_freeze;
assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc;
assign icache_req_if.core_req_writedata = 32'b0;
assign icache_req_if.core_req_mem_read = `LW_MEM_READ;
assign icache_req_if.core_req_mem_write = `NO_MEM_WRITE;
assign icache_req_if.core_req_rd = 5'b0;
assign icache_req_if.core_req_wb = {1{2'b1}};
assign icache_req_if.core_req_warp_num = fe_inst_meta_fi.warp_num;
assign icache_req_if.core_req_pc = fe_inst_meta_fi.inst_pc;
assign fe_inst_meta_id.instruction = vx_icache_rsp.core_wb_readdata[0][31:0];
assign fe_inst_meta_id.inst_pc = vx_icache_rsp.core_wb_pc[0];
assign fe_inst_meta_id.warp_num = vx_icache_rsp.core_wb_warp_num;
assign fe_inst_meta_id.instruction = icache_rsp_if.core_wb_readdata[0][31:0];
assign fe_inst_meta_id.inst_pc = icache_rsp_if.core_wb_pc[0];
assign fe_inst_meta_id.warp_num = icache_rsp_if.core_wb_warp_num;
assign fe_inst_meta_id.valid = vx_icache_rsp.core_wb_valid ? threads_active[vx_icache_rsp.core_wb_warp_num] : 0;
assign fe_inst_meta_id.valid = icache_rsp_if.core_wb_valid ? threads_active[icache_rsp_if.core_wb_warp_num] : 0;
assign icache_stage_wid = fe_inst_meta_id.warp_num;
assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
// Cache can't accept request
assign icache_stage_delay = vx_icache_rsp.delay_req;
assign icache_stage_delay = icache_rsp_if.delay_req;
// Core can't accept response
assign vx_icache_req.core_no_wb_slot = total_freeze;
assign icache_req_if.core_no_wb_slot = total_freeze;
integer curr_w;
always @(posedge clk) begin

View file

@ -2,23 +2,23 @@
module VX_inst_multiplex (
// Inputs
VX_frE_to_bckE_req_if vx_bckE_req,
VX_gpr_data_if vx_gpr_data,
VX_frE_to_bckE_req_if bckE_req_if,
VX_gpr_data_if gpr_data_if,
// Outputs
VX_exec_unit_req_if vx_exec_unit_req,
VX_lsu_req_if vx_lsu_req,
VX_gpu_inst_req_if vx_gpu_inst_req,
VX_csr_req_if vx_csr_req
VX_exec_unit_req_if exec_unit_req_if,
VX_lsu_req_if lsu_req_if,
VX_gpu_inst_req_if gpu_inst_req_if,
VX_csr_req_if csr_req_if
);
wire[`NUM_THREADS-1:0] is_mem_mask;
wire[`NUM_THREADS-1:0] is_gpu_mask;
wire[`NUM_THREADS-1:0] is_csr_mask;
wire is_mem = (vx_bckE_req.mem_write != `NO_MEM_WRITE) || (vx_bckE_req.mem_read != `NO_MEM_READ);
wire is_gpu = (vx_bckE_req.is_wspawn || vx_bckE_req.is_tmc || vx_bckE_req.is_barrier || vx_bckE_req.is_split);
wire is_csr = vx_bckE_req.is_csr;
wire is_mem = (bckE_req_if.mem_write != `NO_MEM_WRITE) || (bckE_req_if.mem_read != `NO_MEM_READ);
wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split);
wire is_csr = bckE_req_if.is_csr;
// wire is_gpu = 0;
genvar currT;
@ -31,64 +31,64 @@ module VX_inst_multiplex (
endgenerate
// LSU Unit
assign vx_lsu_req.valid = vx_bckE_req.valid & is_mem_mask;
assign vx_lsu_req.warp_num = vx_bckE_req.warp_num;
assign vx_lsu_req.base_address = vx_gpr_data.a_reg_data;
assign vx_lsu_req.store_data = vx_gpr_data.b_reg_data;
assign lsu_req_if.valid = bckE_req_if.valid & is_mem_mask;
assign lsu_req_if.warp_num = bckE_req_if.warp_num;
assign lsu_req_if.base_address = gpr_data_if.a_reg_data;
assign lsu_req_if.store_data = gpr_data_if.b_reg_data;
assign vx_lsu_req.offset = vx_bckE_req.itype_immed;
assign lsu_req_if.offset = bckE_req_if.itype_immed;
assign vx_lsu_req.mem_read = vx_bckE_req.mem_read;
assign vx_lsu_req.mem_write = vx_bckE_req.mem_write;
assign vx_lsu_req.rd = vx_bckE_req.rd;
assign vx_lsu_req.wb = vx_bckE_req.wb;
assign vx_lsu_req.lsu_pc = vx_bckE_req.curr_PC;
assign lsu_req_if.mem_read = bckE_req_if.mem_read;
assign lsu_req_if.mem_write = bckE_req_if.mem_write;
assign lsu_req_if.rd = bckE_req_if.rd;
assign lsu_req_if.wb = bckE_req_if.wb;
assign lsu_req_if.lsu_pc = bckE_req_if.curr_PC;
// Execute Unit
assign vx_exec_unit_req.valid = vx_bckE_req.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
assign vx_exec_unit_req.warp_num = vx_bckE_req.warp_num;
assign vx_exec_unit_req.curr_PC = vx_bckE_req.curr_PC;
assign vx_exec_unit_req.PC_next = vx_bckE_req.PC_next;
assign vx_exec_unit_req.rd = vx_bckE_req.rd;
assign vx_exec_unit_req.wb = vx_bckE_req.wb;
assign vx_exec_unit_req.a_reg_data = vx_gpr_data.a_reg_data;
assign vx_exec_unit_req.b_reg_data = vx_gpr_data.b_reg_data;
assign vx_exec_unit_req.alu_op = vx_bckE_req.alu_op;
assign vx_exec_unit_req.rs1 = vx_bckE_req.rs1;
assign vx_exec_unit_req.rs2 = vx_bckE_req.rs2;
assign vx_exec_unit_req.rs2_src = vx_bckE_req.rs2_src;
assign vx_exec_unit_req.itype_immed = vx_bckE_req.itype_immed;
assign vx_exec_unit_req.upper_immed = vx_bckE_req.upper_immed;
assign vx_exec_unit_req.branch_type = vx_bckE_req.branch_type;
assign vx_exec_unit_req.jalQual = vx_bckE_req.jalQual;
assign vx_exec_unit_req.jal = vx_bckE_req.jal;
assign vx_exec_unit_req.jal_offset = vx_bckE_req.jal_offset;
assign vx_exec_unit_req.ebreak = vx_bckE_req.ebreak;
assign exec_unit_req_if.valid = bckE_req_if.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
assign exec_unit_req_if.warp_num = bckE_req_if.warp_num;
assign exec_unit_req_if.curr_PC = bckE_req_if.curr_PC;
assign exec_unit_req_if.PC_next = bckE_req_if.PC_next;
assign exec_unit_req_if.rd = bckE_req_if.rd;
assign exec_unit_req_if.wb = bckE_req_if.wb;
assign exec_unit_req_if.a_reg_data = gpr_data_if.a_reg_data;
assign exec_unit_req_if.b_reg_data = gpr_data_if.b_reg_data;
assign exec_unit_req_if.alu_op = bckE_req_if.alu_op;
assign exec_unit_req_if.rs1 = bckE_req_if.rs1;
assign exec_unit_req_if.rs2 = bckE_req_if.rs2;
assign exec_unit_req_if.rs2_src = bckE_req_if.rs2_src;
assign exec_unit_req_if.itype_immed = bckE_req_if.itype_immed;
assign exec_unit_req_if.upper_immed = bckE_req_if.upper_immed;
assign exec_unit_req_if.branch_type = bckE_req_if.branch_type;
assign exec_unit_req_if.jalQual = bckE_req_if.jalQual;
assign exec_unit_req_if.jal = bckE_req_if.jal;
assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset;
assign exec_unit_req_if.ebreak = bckE_req_if.ebreak;
// GPR Req
assign vx_gpu_inst_req.valid = vx_bckE_req.valid & is_gpu_mask;
assign vx_gpu_inst_req.warp_num = vx_bckE_req.warp_num;
assign vx_gpu_inst_req.is_wspawn = vx_bckE_req.is_wspawn;
assign vx_gpu_inst_req.is_tmc = vx_bckE_req.is_tmc;
assign vx_gpu_inst_req.is_split = vx_bckE_req.is_split;
assign vx_gpu_inst_req.is_barrier = vx_bckE_req.is_barrier;
assign vx_gpu_inst_req.a_reg_data = vx_gpr_data.a_reg_data;
assign vx_gpu_inst_req.rd2 = vx_gpr_data.b_reg_data[0];
assign vx_gpu_inst_req.pc_next = vx_bckE_req.PC_next;
assign gpu_inst_req_if.valid = bckE_req_if.valid & is_gpu_mask;
assign gpu_inst_req_if.warp_num = bckE_req_if.warp_num;
assign gpu_inst_req_if.is_wspawn = bckE_req_if.is_wspawn;
assign gpu_inst_req_if.is_tmc = bckE_req_if.is_tmc;
assign gpu_inst_req_if.is_split = bckE_req_if.is_split;
assign gpu_inst_req_if.is_barrier = bckE_req_if.is_barrier;
assign gpu_inst_req_if.a_reg_data = gpr_data_if.a_reg_data;
assign gpu_inst_req_if.rd2 = gpr_data_if.b_reg_data[0];
assign gpu_inst_req_if.pc_next = bckE_req_if.PC_next;
// CSR Req
assign vx_csr_req.valid = vx_bckE_req.valid & is_csr_mask;
assign vx_csr_req.warp_num = vx_bckE_req.warp_num;
assign vx_csr_req.rd = vx_bckE_req.rd;
assign vx_csr_req.wb = vx_bckE_req.wb;
assign vx_csr_req.alu_op = vx_bckE_req.alu_op;
assign vx_csr_req.is_csr = vx_bckE_req.is_csr;
assign vx_csr_req.csr_address = vx_bckE_req.csr_address;
assign vx_csr_req.csr_immed = vx_bckE_req.csr_immed;
assign vx_csr_req.csr_mask = vx_bckE_req.csr_mask;
assign csr_req_if.valid = bckE_req_if.valid & is_csr_mask;
assign csr_req_if.warp_num = bckE_req_if.warp_num;
assign csr_req_if.rd = bckE_req_if.rd;
assign csr_req_if.wb = bckE_req_if.wb;
assign csr_req_if.alu_op = bckE_req_if.alu_op;
assign csr_req_if.is_csr = bckE_req_if.is_csr;
assign csr_req_if.csr_address = bckE_req_if.csr_address;
assign csr_req_if.csr_immed = bckE_req_if.csr_immed;
assign csr_req_if.csr_mask = bckE_req_if.csr_mask;
endmodule

View file

@ -1,23 +1,23 @@
`include "VX_define.vh"
module VX_lsu (
input wire clk,
input wire reset,
input wire no_slot_mem,
VX_lsu_req_if vx_lsu_req,
input wire clk,
input wire reset,
input wire no_slot_mem,
VX_lsu_req_if lsu_req_if,
// Write back to GPR
VX_inst_mem_wb_if vx_mem_wb,
VX_inst_mem_wb_if mem_wb_if,
VX_gpu_dcache_rsp_if vx_dcache_rsp,
VX_gpu_dcache_req_if vx_dcache_req,
output wire out_delay
VX_gpu_dcache_rsp_if dcache_rsp_if,
VX_gpu_dcache_req_if dcache_req_if,
output wire out_delay
);
// Generate Addresses
wire[`NUM_THREADS-1:0][31:0] address;
VX_lsu_addr_gen VX_lsu_addr_gen (
.base_address (vx_lsu_req.base_address),
.offset (vx_lsu_req.offset),
.base_address (lsu_req_if.base_address),
.offset (lsu_req_if.offset),
.address (address)
);
@ -40,33 +40,33 @@ module VX_lsu (
.reset(reset),
.stall(out_delay),
.flush(zero),
.in ({address , vx_lsu_req.store_data, vx_lsu_req.valid, vx_lsu_req.mem_read, vx_lsu_req.mem_write, vx_lsu_req.rd, vx_lsu_req.warp_num, vx_lsu_req.wb, vx_lsu_req.lsu_pc}),
.in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.lsu_pc}),
.out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc })
);
// Core Request
assign vx_dcache_req.core_req_valid = use_valid;
assign vx_dcache_req.core_req_addr = use_address;
assign vx_dcache_req.core_req_writedata = use_store_data;
assign vx_dcache_req.core_req_mem_read = {`NUM_THREADS{use_mem_read}};
assign vx_dcache_req.core_req_mem_write = {`NUM_THREADS{use_mem_write}};
assign vx_dcache_req.core_req_rd = use_rd;
assign vx_dcache_req.core_req_wb = {`NUM_THREADS{use_wb}};
assign vx_dcache_req.core_req_warp_num = use_warp_num;
assign vx_dcache_req.core_req_pc = use_pc;
assign dcache_req_if.core_req_valid = use_valid;
assign dcache_req_if.core_req_addr = use_address;
assign dcache_req_if.core_req_writedata = use_store_data;
assign dcache_req_if.core_req_mem_read = {`NUM_THREADS{use_mem_read}};
assign dcache_req_if.core_req_mem_write = {`NUM_THREADS{use_mem_write}};
assign dcache_req_if.core_req_rd = use_rd;
assign dcache_req_if.core_req_wb = {`NUM_THREADS{use_wb}};
assign dcache_req_if.core_req_warp_num = use_warp_num;
assign dcache_req_if.core_req_pc = use_pc;
// Core can't accept response
assign vx_dcache_req.core_no_wb_slot = no_slot_mem;
assign dcache_req_if.core_no_wb_slot = no_slot_mem;
// Cache can't accept request
assign out_delay = vx_dcache_rsp.delay_req;
assign out_delay = dcache_rsp_if.delay_req;
// Core Response
assign vx_mem_wb.rd = vx_dcache_rsp.core_wb_req_rd;
assign vx_mem_wb.wb = vx_dcache_rsp.core_wb_req_wb;
assign vx_mem_wb.wb_valid = vx_dcache_rsp.core_wb_valid;
assign vx_mem_wb.wb_warp_num = vx_dcache_rsp.core_wb_warp_num;
assign vx_mem_wb.loaded_data = vx_dcache_rsp.core_wb_readdata;
assign mem_wb_if.rd = dcache_rsp_if.core_wb_req_rd;
assign mem_wb_if.wb = dcache_rsp_if.core_wb_req_wb;
assign mem_wb_if.wb_valid = dcache_rsp_if.core_wb_valid;
assign mem_wb_if.wb_warp_num = dcache_rsp_if.core_wb_warp_num;
assign mem_wb_if.loaded_data = dcache_rsp_if.core_wb_readdata;
wire[(`LOG2UP(`NUM_THREADS))-1:0] use_pc_index;
@ -75,12 +75,12 @@ module VX_lsu (
`DEBUG_END
VX_generic_priority_encoder #(.N(`NUM_THREADS)) pick_first_pc(
.valids(vx_dcache_rsp.core_wb_valid),
.valids(dcache_rsp_if.core_wb_valid),
.index (use_pc_index),
.found (found)
);
assign vx_mem_wb.mem_wb_pc = vx_dcache_rsp.core_wb_pc[use_pc_index];
assign mem_wb_if.mem_wb_pc = dcache_rsp_if.core_wb_pc[use_pc_index];
endmodule // Memory

View file

@ -1,13 +1,13 @@
`include "VX_define.vh"
module VX_scheduler (
input wire clk,
input wire reset,
input wire memory_delay,
input wire exec_delay,
input wire gpr_stage_delay,
VX_frE_to_bckE_req_if vx_bckE_req,
VX_wb_if vx_writeback_if,
input wire clk,
input wire reset,
input wire memory_delay,
input wire exec_delay,
input wire gpr_stage_delay,
VX_frE_to_bckE_req_if bckE_req_if,
VX_wb_if writeback_if,
output wire schedule_delay,
output wire is_empty
@ -18,31 +18,31 @@ module VX_scheduler (
reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
wire valid_wb = (vx_writeback_if.wb != 0) && (|vx_writeback_if.wb_valid) && (vx_writeback_if.rd != 0);
wire wb_inc = (vx_bckE_req.wb != 0) && (vx_bckE_req.rd != 0);
wire valid_wb = (writeback_if.wb != 0) && (|writeback_if.wb_valid) && (writeback_if.rd != 0);
wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0);
wire rs1_rename = rename_table[vx_bckE_req.warp_num][vx_bckE_req.rs1] != 0;
wire rs2_rename = rename_table[vx_bckE_req.warp_num][vx_bckE_req.rs2] != 0;
wire rd_rename = rename_table[vx_bckE_req.warp_num][vx_bckE_req.rd ] != 0;
wire rs1_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0;
wire rs2_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0;
wire rd_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0;
wire is_store = (vx_bckE_req.mem_write != `NO_MEM_WRITE);
wire is_load = (vx_bckE_req.mem_read != `NO_MEM_READ);
wire is_store = (bckE_req_if.mem_write != `NO_MEM_WRITE);
wire is_load = (bckE_req_if.mem_read != `NO_MEM_READ);
// classify our next instruction.
wire is_mem = is_store || is_load;
wire is_gpu = (vx_bckE_req.is_wspawn || vx_bckE_req.is_tmc || vx_bckE_req.is_barrier || vx_bckE_req.is_split);
wire is_csr = vx_bckE_req.is_csr;
wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split);
wire is_csr = bckE_req_if.is_csr;
wire is_exec = !is_mem && !is_gpu && !is_csr;
wire using_rs2 = (vx_bckE_req.rs2_src == `RS2_REG) || is_store || vx_bckE_req.is_barrier || vx_bckE_req.is_wspawn;
wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn;
wire rs1_rename_qual = ((rs1_rename) && (vx_bckE_req.rs1 != 0));
wire rs2_rename_qual = ((rs2_rename) && (vx_bckE_req.rs2 != 0 && using_rs2));
wire rd_rename_qual = ((rd_rename ) && (vx_bckE_req.rd != 0));
wire rs1_rename_qual = ((rs1_rename) && (bckE_req_if.rs1 != 0));
wire rs2_rename_qual = ((rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2));
wire rd_rename_qual = ((rd_rename ) && (bckE_req_if.rd != 0));
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
assign schedule_delay = ((rename_valid) && (|vx_bckE_req.valid))
assign schedule_delay = ((rename_valid) && (|bckE_req_if.valid))
|| (memory_delay && is_mem)
|| (gpr_stage_delay && (is_mem || is_exec))
|| (exec_delay && is_exec);
@ -59,15 +59,15 @@ module VX_scheduler (
end
end else begin
if (valid_wb) begin
rename_table[vx_writeback_if.wb_warp_num][vx_writeback_if.rd] <= rename_table[vx_writeback_if.wb_warp_num][vx_writeback_if.rd] & (~vx_writeback_if.wb_valid);
rename_table[writeback_if.wb_warp_num][writeback_if.rd] <= rename_table[writeback_if.wb_warp_num][writeback_if.rd] & (~writeback_if.wb_valid);
end
if (!schedule_delay && wb_inc) begin
rename_table[vx_bckE_req.warp_num][vx_bckE_req.rd] <= vx_bckE_req.valid;
rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid;
end
if (valid_wb
&& (0 == (rename_table[vx_writeback_if.wb_warp_num][vx_writeback_if.rd] & ~vx_writeback_if.wb_valid))) begin
&& (0 == (rename_table[writeback_if.wb_warp_num][writeback_if.rd] & ~writeback_if.wb_valid))) begin
count_valid <= count_valid - 1;
end

View file

@ -1,67 +1,66 @@
`include "VX_define.vh"
module VX_writeback (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// Mem WB info
VX_inst_mem_wb_if vx_mem_wb,
VX_inst_mem_wb_if mem_wb_if,
// EXEC Unit WB info
VX_inst_exec_wb_if vx_inst_exec_wb,
VX_inst_exec_wb_if inst_exec_wb_if,
// CSR Unit WB info
VX_csr_wb_if vx_csr_wb,
VX_csr_wb_if csr_wb_if,
// Actual WB to GPR
VX_wb_if vx_writeback_if,
output wire no_slot_mem,
output wire no_slot_exec,
output wire no_slot_csr
VX_wb_if writeback_if,
output wire no_slot_mem,
output wire no_slot_exec,
output wire no_slot_csr
);
VX_wb_if vx_writeback_tempp();
VX_wb_if writeback_tempp_if();
wire exec_wb = (vx_inst_exec_wb.wb != 0) && (|vx_inst_exec_wb.wb_valid);
wire mem_wb = (vx_mem_wb.wb != 0) && (|vx_mem_wb.wb_valid);
wire csr_wb = (vx_csr_wb.wb != 0) && (|vx_csr_wb.valid);
wire exec_wb = (inst_exec_wb_if.wb != 0) && (|inst_exec_wb_if.wb_valid);
wire mem_wb = (mem_wb_if.wb != 0) && (|mem_wb_if.wb_valid);
wire csr_wb = (csr_wb_if.wb != 0) && (|csr_wb_if.valid);
assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
assign no_slot_csr = csr_wb && (exec_wb);
assign no_slot_exec = 0;
assign vx_writeback_tempp.write_data = exec_wb ? vx_inst_exec_wb.alu_result :
csr_wb ? vx_csr_wb.csr_result :
mem_wb ? vx_mem_wb.loaded_data :
assign writeback_tempp_if.write_data = exec_wb ? inst_exec_wb_if.alu_result :
csr_wb ? csr_wb_if.csr_result :
mem_wb ? mem_wb_if.loaded_data :
0;
assign vx_writeback_tempp.wb_valid = exec_wb ? vx_inst_exec_wb.wb_valid :
csr_wb ? vx_csr_wb.valid :
mem_wb ? vx_mem_wb.wb_valid :
assign writeback_tempp_if.wb_valid = exec_wb ? inst_exec_wb_if.wb_valid :
csr_wb ? csr_wb_if.valid :
mem_wb ? mem_wb_if.wb_valid :
0;
assign vx_writeback_tempp.rd = exec_wb ? vx_inst_exec_wb.rd :
csr_wb ? vx_csr_wb.rd :
mem_wb ? vx_mem_wb.rd :
assign writeback_tempp_if.rd = exec_wb ? inst_exec_wb_if.rd :
csr_wb ? csr_wb_if.rd :
mem_wb ? mem_wb_if.rd :
0;
assign vx_writeback_tempp.wb = exec_wb ? vx_inst_exec_wb.wb :
csr_wb ? vx_csr_wb.wb :
mem_wb ? vx_mem_wb.wb :
assign writeback_tempp_if.wb = exec_wb ? inst_exec_wb_if.wb :
csr_wb ? csr_wb_if.wb :
mem_wb ? mem_wb_if.wb :
0;
assign vx_writeback_tempp.wb_warp_num = exec_wb ? vx_inst_exec_wb.wb_warp_num :
csr_wb ? vx_csr_wb.warp_num :
mem_wb ? vx_mem_wb.wb_warp_num :
assign writeback_tempp_if.wb_warp_num = exec_wb ? inst_exec_wb_if.wb_warp_num :
csr_wb ? csr_wb_if.warp_num :
mem_wb ? mem_wb_if.wb_warp_num :
0;
assign vx_writeback_tempp.wb_pc = exec_wb ? vx_inst_exec_wb.exec_wb_pc :
assign writeback_tempp_if.wb_pc = exec_wb ? inst_exec_wb_if.exec_wb_pc :
csr_wb ? 32'hdeadbeef :
mem_wb ? vx_mem_wb.mem_wb_pc :
mem_wb ? mem_wb_if.mem_wb_pc :
32'hdeadbeef;
wire zero = 0;
wire[`NUM_THREADS-1:0][31:0] use_wb_data;
@ -71,19 +70,19 @@ module VX_writeback (
.reset(reset),
.stall(zero),
.flush(zero),
.in ({vx_writeback_tempp.write_data, vx_writeback_tempp.wb_valid, vx_writeback_tempp.rd, vx_writeback_tempp.wb, vx_writeback_tempp.wb_warp_num, vx_writeback_tempp.wb_pc}),
.out ({use_wb_data , vx_writeback_if.wb_valid, vx_writeback_if.rd, vx_writeback_if.wb, vx_writeback_if.wb_warp_num, vx_writeback_if.wb_pc})
.in ({writeback_tempp_if.write_data, writeback_tempp_if.wb_valid, writeback_tempp_if.rd, writeback_tempp_if.wb, writeback_tempp_if.wb_warp_num, writeback_tempp_if.wb_pc}),
.out ({use_wb_data , writeback_if.wb_valid, writeback_if.rd, writeback_if.wb, writeback_if.wb_warp_num, writeback_if.wb_pc})
);
reg[31:0] last_data_wb /* verilator public */ ;
always @(posedge clk) begin
if ((|vx_writeback_if.wb_valid) && (vx_writeback_if.wb != 0) && (vx_writeback_if.rd == 28)) begin
if ((|writeback_if.wb_valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin
last_data_wb <= use_wb_data[0];
end
end
assign vx_writeback_if.write_data = use_wb_data;
assign writeback_if.write_data = use_wb_data;
endmodule : VX_writeback

View file

@ -99,106 +99,106 @@ module Vortex
wire schedule_delay;
// Dcache Interface
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_rsp();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req_qual();
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_if();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_if();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_qual_if();
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_dcache_dram_req();
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_dcache_dram_res();
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_dcache_dram_req_if();
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_dcache_dram_res_if();
assign vx_gpu_dcache_dram_res.dram_rsp_valid = dram_rsp_valid;
assign vx_gpu_dcache_dram_res.dram_rsp_addr = dram_rsp_addr;
assign gpu_dcache_dram_res_if.dram_rsp_valid = dram_rsp_valid;
assign gpu_dcache_dram_res_if.dram_rsp_addr = dram_rsp_addr;
assign dram_req_write = vx_gpu_dcache_dram_req.dram_req_write;
assign dram_req_read = vx_gpu_dcache_dram_req.dram_req_read;
assign dram_req_addr = vx_gpu_dcache_dram_req.dram_req_addr;
assign dram_rsp_ready = vx_gpu_dcache_dram_req.dram_rsp_ready;
assign dram_req_write = gpu_dcache_dram_req_if.dram_req_write;
assign dram_req_read = gpu_dcache_dram_req_if.dram_req_read;
assign dram_req_addr = gpu_dcache_dram_req_if.dram_req_addr;
assign dram_rsp_ready = gpu_dcache_dram_req_if.dram_rsp_ready;
assign vx_gpu_dcache_dram_req.dram_req_full = dram_req_full;
assign gpu_dcache_dram_req_if.dram_req_full = dram_req_full;
genvar i;
generate
for (i = 0; i < `DBANK_LINE_WORDS; i=i+1) begin
assign vx_gpu_dcache_dram_res.dram_rsp_data[i] = dram_rsp_data[i * 32 +: 32];
assign dram_req_data[i * 32 +: 32] = vx_gpu_dcache_dram_req.dram_req_data[i];
assign gpu_dcache_dram_res_if.dram_rsp_data[i] = dram_rsp_data[i * 32 +: 32];
assign dram_req_data[i * 32 +: 32] = gpu_dcache_dram_req_if.dram_req_data[i];
end
endgenerate
wire temp_io_valid = (!memory_delay)
&& (|vx_dcache_req.core_req_valid)
&& (vx_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE)
&& (vx_dcache_req.core_req_addr[0] == 32'h00010000);
&& (|dcache_req_if.core_req_valid)
&& (dcache_req_if.core_req_mem_write[0] != `NO_MEM_WRITE)
&& (dcache_req_if.core_req_addr[0] == 32'h00010000);
wire[31:0] temp_io_data = vx_dcache_req.core_req_writedata[0];
wire[31:0] temp_io_data = dcache_req_if.core_req_writedata[0];
assign io_valid = temp_io_valid;
assign io_data = temp_io_data;
assign vx_dcache_req_qual.core_req_valid = vx_dcache_req.core_req_valid & {`NUM_THREADS{~io_valid}};
assign vx_dcache_req_qual.core_req_addr = vx_dcache_req.core_req_addr;
assign vx_dcache_req_qual.core_req_writedata = vx_dcache_req.core_req_writedata;
assign vx_dcache_req_qual.core_req_mem_read = vx_dcache_req.core_req_mem_read;
assign vx_dcache_req_qual.core_req_mem_write = vx_dcache_req.core_req_mem_write;
assign vx_dcache_req_qual.core_req_rd = vx_dcache_req.core_req_rd;
assign vx_dcache_req_qual.core_req_wb = vx_dcache_req.core_req_wb;
assign vx_dcache_req_qual.core_req_warp_num = vx_dcache_req.core_req_warp_num;
assign vx_dcache_req_qual.core_req_pc = vx_dcache_req.core_req_pc;
assign vx_dcache_req_qual.core_no_wb_slot = vx_dcache_req.core_no_wb_slot;
assign dcache_req_qual_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{~io_valid}};
assign dcache_req_qual_if.core_req_addr = dcache_req_if.core_req_addr;
assign dcache_req_qual_if.core_req_writedata = dcache_req_if.core_req_writedata;
assign dcache_req_qual_if.core_req_mem_read = dcache_req_if.core_req_mem_read;
assign dcache_req_qual_if.core_req_mem_write = dcache_req_if.core_req_mem_write;
assign dcache_req_qual_if.core_req_rd = dcache_req_if.core_req_rd;
assign dcache_req_qual_if.core_req_wb = dcache_req_if.core_req_wb;
assign dcache_req_qual_if.core_req_warp_num = dcache_req_if.core_req_warp_num;
assign dcache_req_qual_if.core_req_pc = dcache_req_if.core_req_pc;
assign dcache_req_qual_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot;
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`INUM_REQUESTS)) vx_icache_rsp();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`INUM_REQUESTS)) vx_icache_req();
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_rsp_if();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_req_if();
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) vx_gpu_icache_dram_req();
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) vx_gpu_icache_dram_res();
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_req_if();
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_res_if();
assign vx_gpu_icache_dram_res.dram_rsp_valid = I_dram_rsp_valid;
assign vx_gpu_icache_dram_res.dram_rsp_addr = I_dram_rsp_addr;
assign gpu_icache_dram_res_if.dram_rsp_valid = I_dram_rsp_valid;
assign gpu_icache_dram_res_if.dram_rsp_addr = I_dram_rsp_addr;
assign I_dram_req_write = vx_gpu_icache_dram_req.dram_req_write;
assign I_dram_req_read = vx_gpu_icache_dram_req.dram_req_read;
assign I_dram_req_addr = vx_gpu_icache_dram_req.dram_req_addr;
assign I_dram_rsp_ready = vx_gpu_icache_dram_req.dram_rsp_ready;
assign I_dram_req_write = gpu_icache_dram_req_if.dram_req_write;
assign I_dram_req_read = gpu_icache_dram_req_if.dram_req_read;
assign I_dram_req_addr = gpu_icache_dram_req_if.dram_req_addr;
assign I_dram_rsp_ready = gpu_icache_dram_req_if.dram_rsp_ready;
assign vx_gpu_icache_dram_req.dram_req_full = I_dram_req_full;
assign gpu_icache_dram_req_if.dram_req_full = I_dram_req_full;
genvar j;
generate
for (j = 0; j < `IBANK_LINE_WORDS; j = j + 1) begin
assign vx_gpu_icache_dram_res.dram_rsp_data[j] = I_dram_rsp_data[j * 32 +: 32];
assign I_dram_req_data[j * 32 +: 32] = vx_gpu_icache_dram_req.dram_req_data[j];
assign gpu_icache_dram_res_if.dram_rsp_data[j] = I_dram_rsp_data[j * 32 +: 32];
assign I_dram_req_data[j * 32 +: 32] = gpu_icache_dram_req_if.dram_req_data[j];
end
endgenerate
/////////////////////////////////////////////////////////////////////////
// Front-end to Back-end
VX_frE_to_bckE_req_if vx_bckE_req(); // New instruction request to EXE/MEM
VX_frE_to_bckE_req_if bckE_req_if(); // New instruction request to EXE/MEM
// Back-end to Front-end
VX_wb_if vx_writeback_if(); // Writeback to GPRs
VX_branch_response_if vx_branch_rsp(); // Branch Resolution to Fetch
VX_jal_response_if vx_jal_rsp(); // Jump resolution to Fetch
VX_wb_if writeback_if(); // Writeback to GPRs
VX_branch_response_if branch_rsp_if(); // Branch Resolution to Fetch
VX_jal_response_if jal_rsp_if(); // Jump resolution to Fetch
// CSR Buses
// VX_csr_write_request_if vx_csr_w_req();
// VX_csr_write_request_if csr_w_req_if();
VX_warp_ctl_if vx_warp_ctl();
VX_gpu_snp_req_rsp_if vx_gpu_icache_snp_req();
VX_gpu_snp_req_rsp_if vx_gpu_dcache_snp_req();
VX_warp_ctl_if warp_ctl_if();
VX_gpu_snp_req_rsp_if gpu_icache_snp_req_if();
VX_gpu_snp_req_rsp_if gpu_dcache_snp_req_if();
assign vx_gpu_dcache_snp_req.snp_req_valid = snp_req_valid;
assign vx_gpu_dcache_snp_req.snp_req_addr = snp_req_addr;
assign snp_req_full = vx_gpu_dcache_snp_req.snp_req_full;
assign gpu_dcache_snp_req_if.snp_req_valid = snp_req_valid;
assign gpu_dcache_snp_req_if.snp_req_addr = snp_req_addr;
assign snp_req_full = gpu_dcache_snp_req_if.snp_req_full;
VX_front_end vx_front_end(
VX_front_end front_end(
.clk (clk),
.reset (reset),
.vx_warp_ctl (vx_warp_ctl),
.vx_bckE_req (vx_bckE_req),
.warp_ctl_if (warp_ctl_if),
.bckE_req_if (bckE_req_if),
.schedule_delay (schedule_delay),
.vx_icache_rsp (vx_icache_rsp),
.vx_icache_req (vx_icache_req),
.vx_jal_rsp (vx_jal_rsp),
.vx_branch_rsp (vx_branch_rsp),
.icache_rsp_if (icache_rsp_if),
.icache_req_if (icache_req_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.fetch_ebreak (out_ebreak)
);
@ -208,56 +208,56 @@ VX_scheduler schedule(
.memory_delay (memory_delay),
.exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay),
.vx_bckE_req (vx_bckE_req),
.vx_writeback_if (vx_writeback_if),
.bckE_req_if (bckE_req_if),
.writeback_if (writeback_if),
.schedule_delay (schedule_delay),
.is_empty (scheduler_empty)
);
VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end(
VX_back_end #(.CORE_ID(CORE_ID)) back_end(
.clk (clk),
.reset (reset),
.schedule_delay (schedule_delay),
.vx_warp_ctl (vx_warp_ctl),
.vx_bckE_req (vx_bckE_req),
.vx_jal_rsp (vx_jal_rsp),
.vx_branch_rsp (vx_branch_rsp),
.vx_dcache_rsp (vx_dcache_rsp),
.vx_dcache_req (vx_dcache_req),
.vx_writeback_if (vx_writeback_if),
.warp_ctl_if (warp_ctl_if),
.bckE_req_if (bckE_req_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.dcache_rsp_if (dcache_rsp_if),
.dcache_req_if (dcache_req_if),
.writeback_if (writeback_if),
.out_mem_delay (memory_delay),
.out_exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay)
);
VX_dmem_controller vx_dmem_controller(
VX_dmem_controller dmem_controller(
.clk (clk),
.reset (reset),
// Dram <-> Dcache
.vx_gpu_dcache_dram_req (vx_gpu_dcache_dram_req),
.vx_gpu_dcache_dram_res (vx_gpu_dcache_dram_res),
.vx_gpu_dcache_snp_req (vx_gpu_dcache_snp_req),
.gpu_dcache_dram_req_if (gpu_dcache_dram_req_if),
.gpu_dcache_dram_res_if (gpu_dcache_dram_res_if),
.gpu_dcache_snp_req_if (gpu_dcache_snp_req_if),
// Dram <-> Icache
.vx_gpu_icache_dram_req (vx_gpu_icache_dram_req),
.vx_gpu_icache_dram_res (vx_gpu_icache_dram_res),
.vx_gpu_icache_snp_req (vx_gpu_icache_snp_req),
.gpu_icache_dram_req_if (gpu_icache_dram_req_if),
.gpu_icache_dram_res_if (gpu_icache_dram_res_if),
.gpu_icache_snp_req_if (gpu_icache_snp_req_if),
// Core <-> Icache
.vx_icache_req (vx_icache_req),
.vx_icache_rsp (vx_icache_rsp),
.icache_req_if (icache_req_if),
.icache_rsp_if (icache_rsp_if),
// Core <-> Dcache
.vx_dcache_req (vx_dcache_req_qual),
.vx_dcache_rsp (vx_dcache_rsp)
.dcache_req_if (dcache_req_qual_if),
.dcache_rsp_if (dcache_rsp_if)
);
// VX_csr_handler vx_csr_handler(
// VX_csr_handler csr_handler(
// .clk (clk),
// .in_decode_csr_address(decode_csr_address),
// .vx_csr_w_req (vx_csr_w_req),
// .in_wb_valid (vx_writeback_if.wb_valid[0]),
// .csr_w_req_if (csr_w_req_if),
// .in_wb_valid (writeback_if.wb_valid[0]),
// .out_decode_csr_data (csr_decode_csr_data)
// );

View file

@ -449,7 +449,7 @@ module VX_bank #(
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
) vx_tag_data_access (
) tag_data_access (
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
@ -477,7 +477,7 @@ module VX_bank #(
.miss_st1e (miss_st1e),
.dirty_st1e (dirty_st1e),
.fill_saw_dirty_st1e(fill_saw_dirty_st1e)
);
);
wire qual_valid_st1e_2 = valid_st1[STAGE_1_CYCLES-1] && !is_fill_st1[STAGE_1_CYCLES-1];
@ -581,7 +581,7 @@ module VX_bank #(
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
) vx_fill_invalidator (
) fill_invalidator (
.clk (clk),
.reset (reset),
.possible_fill (possible_fill),

View file

@ -157,7 +157,7 @@ module VX_cache #(
.PRFQ_SIZE (PRFQ_SIZE),
.PRFQ_STRIDE (PRFQ_STRIDE),
.SIMULATED_DRAM_LATENCY_CYCLES (SIMULATED_DRAM_LATENCY_CYCLES)
) vx_cache_dram_req_arb (
) cache_dram_req_arb (
.clk (clk),
.reset (reset),
.dfqq_full (dfqq_full),
@ -191,7 +191,7 @@ module VX_cache #(
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES (SIMULATED_DRAM_LATENCY_CYCLES)
) vx_cache_core_req_bank_sell (
) cache_core_req_bank_sell (
.core_req_valid (core_req_valid),
.core_req_addr (core_req_addr),
.per_bank_valids (per_bank_valids)
@ -215,7 +215,7 @@ module VX_cache #(
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
) vx_cache_core_wb_sel_merge (
) cache_core_wb_sel_merge (
.per_bank_wb_valid (per_bank_wb_valid),
.per_bank_wb_tid (per_bank_wb_tid),
.per_bank_wb_rd (per_bank_wb_rd),
@ -239,7 +239,7 @@ module VX_cache #(
// Snoop Forward Logic
VX_snp_fwd_arb #(
.NUM_BANKS(NUM_BANKS)
) vx_snp_fwd_arb(
) snp_fwd_arb(
.per_bank_snp_fwd (per_bank_snp_fwd),
.per_bank_snp_fwd_addr(per_bank_snp_fwd_addr),
.per_bank_snp_fwd_pop (per_bank_snp_fwd_pop),

View file

@ -94,7 +94,7 @@ module VX_cache_dfq_queue #(
VX_generic_priority_encoder #(
.N(NUM_BANKS)
) vx_sel_bank (
) sel_bank (
.valids(qual_bank_dram_fill_req),
.index (qual_request_index),
.found (qual_has_request)

View file

@ -102,7 +102,7 @@ module VX_cache_dram_req_arb #(
wire dfqq_pop = !dwb_valid && dfqq_req && !dram_req_full; // If no dwb, and dfqq has valids, then pop
wire dfqq_push = (|per_bank_dram_fill_req_valid);
VX_cache_dfq_queue vx_cache_dfq_queue(
VX_cache_dfq_queue cache_dfq_queue(
.clk (clk),
.reset (reset),
.dfqq_push (dfqq_push),
@ -121,7 +121,7 @@ module VX_cache_dram_req_arb #(
VX_generic_priority_encoder #(
.N(NUM_BANKS)
) vx_sel_dwb (
) sel_dwb (
.valids(use_wb_valid),
.index (dwb_bank),
.found (dwb_valid)

View file

@ -142,10 +142,11 @@ module VX_cache_req_queue #(
assign qual_pc = use_per_pc;
wire[`LOG2UP(NUM_REQUESTS)-1:0] qual_request_index;
wire qual_has_request;
wire qual_has_request;
VX_generic_priority_encoder #(
.N(NUM_REQUESTS)
) vx_sel_bank (
) sel_bank (
.valids(qual_valids),
.index (qual_request_index),
.found (qual_has_request)

View file

@ -80,7 +80,7 @@ module VX_cache_wb_sel_merge #(
VX_generic_priority_encoder #(
.N(NUM_BANKS)
) vx_sel_bank (
) sel_bank (
.valids(per_bank_wb_valid),
.index (main_bank_index),
.found (found_bank)

View file

@ -1,7 +1,6 @@
`include "VX_cache_config.vh"
module VX_dcache_llv_resp_bank_sel
#(
module VX_dcache_llv_resp_bank_sel #(
// Size of cache in bytes
parameter CACHE_SIZE_BYTES = 1024,
// Size of line inside a bank in bytes
@ -15,8 +14,7 @@ module VX_dcache_llv_resp_bank_sel
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob
@ -26,7 +24,7 @@ module VX_dcache_llv_resp_bank_sel
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size
@ -39,12 +37,9 @@ module VX_dcache_llv_resp_bank_sel
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
// Dram knobs
// Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
)
(
) (
output reg [NUM_BANKS-1:0] per_bank_llvq_pop,
input wire[NUM_BANKS-1:0] per_bank_llvq_valid,
input wire[NUM_BANKS-1:0][31:0] per_bank_llvq_rsp_addr,
@ -55,20 +50,19 @@ module VX_dcache_llv_resp_bank_sel
output reg[NUM_REQUESTS-1:0] llvq_valid,
output reg[NUM_REQUESTS-1:0][31:0] llvq_rsp_addr,
output reg[NUM_REQUESTS-1:0][`BANK_LINE_WORDS-1:0][31:0] llvq_rsp_data
);
wire [(`LOG2UP(NUM_BANKS))-1:0] main_bank_index;
wire found_bank;
wire found_bank;
VX_generic_priority_encoder #(.N(NUM_BANKS)) vx_sel_bank(
.valids(per_bank_llvq_valid),
.index (main_bank_index),
.found (found_bank)
VX_generic_priority_encoder #(
.N(NUM_BANKS)
) sel_bank(
.valids(per_bank_llvq_valid),
.index (main_bank_index),
.found (found_bank)
);
always @(*) begin
llvq_valid = 0;
llvq_rsp_addr = 0;

View file

@ -86,7 +86,7 @@ module VX_fill_invalidator
VX_generic_priority_encoder #(
.N(FILL_INVALIDAOR_SIZE)
) vx_sel_bank (
) sel_bank (
.valids(~fills_active),
.index (enqueue_index),
.found (enqueue_found)
@ -136,7 +136,7 @@ module VX_fill_invalidator
// wire [(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index;
// wire enqueue_found;
// VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) vx_sel_bank(
// VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) sel_bank(
// .valids(~fills_active),
// .index (enqueue_index),
// .found (enqueue_found)

View file

@ -22,7 +22,7 @@ module VX_snp_fwd_arb
VX_generic_priority_encoder #(
.N(NUM_BANKS)
) vx_sel_ffsq(
) sel_ffsq (
.valids(qual_per_bank_snp_fwd),
.index (fsq_bank),
.found (fsq_valid)

View file

@ -110,7 +110,7 @@ module VX_tag_data_access #(
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
) vx_tag_data_structure (
) tag_data_structure (
.clk (clk),
.reset (reset),
.stall_bank_pipe(stall_bank_pipe),

View file

@ -1,12 +1,12 @@
`include "../VX_define.vh"
module VX_d_e_reg (
input wire clk,
input wire reset,
input wire in_branch_stall,
input wire in_freeze,
VX_frE_to_bckE_req_if vx_frE_to_bckE_req,
VX_frE_to_bckE_req_if vx_bckE_req
input wire clk,
input wire reset,
input wire in_branch_stall,
input wire in_freeze,
VX_frE_to_bckE_req_if frE_to_bckE_req_if,
VX_frE_to_bckE_req_if bckE_req_if
);
wire stall = in_freeze;
@ -19,8 +19,8 @@ module VX_d_e_reg (
.reset (reset),
.stall (stall),
.flush (flush),
.in ({vx_frE_to_bckE_req.csr_address, vx_frE_to_bckE_req.jalQual, vx_frE_to_bckE_req.ebreak, vx_frE_to_bckE_req.is_csr, vx_frE_to_bckE_req.csr_immed, vx_frE_to_bckE_req.csr_mask, vx_frE_to_bckE_req.rd, vx_frE_to_bckE_req.rs1, vx_frE_to_bckE_req.rs2, vx_frE_to_bckE_req.alu_op, vx_frE_to_bckE_req.wb, vx_frE_to_bckE_req.rs2_src, vx_frE_to_bckE_req.itype_immed, vx_frE_to_bckE_req.mem_read, vx_frE_to_bckE_req.mem_write, vx_frE_to_bckE_req.branch_type, vx_frE_to_bckE_req.upper_immed, vx_frE_to_bckE_req.curr_PC, vx_frE_to_bckE_req.jal, vx_frE_to_bckE_req.jal_offset, vx_frE_to_bckE_req.PC_next, vx_frE_to_bckE_req.valid, vx_frE_to_bckE_req.warp_num, vx_frE_to_bckE_req.is_wspawn, vx_frE_to_bckE_req.is_tmc, vx_frE_to_bckE_req.is_split, vx_frE_to_bckE_req.is_barrier}),
.out ({vx_bckE_req.csr_address , vx_bckE_req.jalQual , vx_bckE_req.ebreak ,vx_bckE_req.is_csr , vx_bckE_req.csr_immed , vx_bckE_req.csr_mask , vx_bckE_req.rd , vx_bckE_req.rs1 , vx_bckE_req.rs2 , vx_bckE_req.alu_op , vx_bckE_req.wb , vx_bckE_req.rs2_src , vx_bckE_req.itype_immed , vx_bckE_req.mem_read , vx_bckE_req.mem_write , vx_bckE_req.branch_type , vx_bckE_req.upper_immed , vx_bckE_req.curr_PC , vx_bckE_req.jal , vx_bckE_req.jal_offset , vx_bckE_req.PC_next , vx_bckE_req.valid , vx_bckE_req.warp_num , vx_bckE_req.is_wspawn , vx_bckE_req.is_tmc , vx_bckE_req.is_split , vx_bckE_req.is_barrier })
.in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.ebreak, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}),
.out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.ebreak ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier })
);
endmodule

View file

@ -28,21 +28,22 @@ module VX_priority_encoder_sm
reg[`NUM_THREADS-1:0] left_requests;
reg[`NUM_THREADS-1:0] serviced;
wire[`NUM_THREADS-1:0] use_valid;
wire requests_left = (|left_requests);
assign use_valid = (requests_left) ? left_requests : in_valid;
wire[NB:0][`NUM_THREADS-1:0] bank_valids;
VX_bank_valids #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_bank_valid(
VX_bank_valids #(
.NB(NB),
.BITS_PER_BANK(BITS_PER_BANK)
) bank_valid (
.in_valids(use_valid),
.in_addr(in_address),
.bank_valids(bank_valids)
);
);
wire[NB:0] more_than_one_valid;
@ -73,11 +74,13 @@ module VX_priority_encoder_sm
generate
for (curr_bank_o = 0; curr_bank_o <= NB; curr_bank_o = curr_bank_o + 1) begin : encoders
VX_generic_priority_encoder #(.N(NUM_REQ)) vx_priority_encoder(
VX_generic_priority_encoder #(
.N(NUM_REQ)
) priority_encoder (
.valids(bank_valids[curr_bank_o]),
.index(internal_req_num[curr_bank_o]),
.found(internal_out_valid[curr_bank_o])
);
);
assign out_address[curr_bank_o] = internal_out_valid[curr_bank_o] ? in_address[internal_req_num[curr_bank_o]] : 0;
assign out_data[curr_bank_o] = internal_out_valid[curr_bank_o] ? in_data[internal_req_num[curr_bank_o]] : 0;
end
@ -91,11 +94,9 @@ module VX_priority_encoder_sm
end
end
assign req_num = internal_req_num;
assign out_valid = internal_out_valid;
wire[`NUM_THREADS-1:0] serviced_qual = in_valid & (serviced);
wire[`NUM_THREADS-1:0] new_left_requests = (left_requests == 0) ? (in_valid & ~serviced_qual) : (left_requests & ~ serviced_qual);

View file

@ -1,23 +1,21 @@
`include "../VX_define.vh"
module VX_shared_memory
#(
parameter SM_SIZE = 4096, // Bytes
parameter SM_BANKS = 4,
parameter SM_BYTES_PER_READ = 16,
parameter SM_WORDS_PER_READ = 4,
parameter SM_LOG_WORDS_PER_READ = 2,
parameter SM_HEIGHT = 128, // Bytes
parameter SM_BANK_OFFSET_START = 2,
parameter SM_BANK_OFFSET_END = 4,
parameter SM_BLOCK_OFFSET_START = 5,
parameter SM_BLOCK_OFFSET_END = 6,
parameter SM_INDEX_START = 7,
parameter SM_INDEX_END = 13,
parameter NUM_REQ = 4,
parameter BITS_PER_BANK = 3
)
(
module VX_shared_memory #(
parameter SM_SIZE = 4096, // Bytes
parameter SM_BANKS = 4,
parameter SM_BYTES_PER_READ = 16,
parameter SM_WORDS_PER_READ = 4,
parameter SM_LOG_WORDS_PER_READ = 2,
parameter SM_HEIGHT = 128, // Bytes
parameter SM_BANK_OFFSET_START = 2,
parameter SM_BANK_OFFSET_END = 4,
parameter SM_BLOCK_OFFSET_START = 5,
parameter SM_BLOCK_OFFSET_END = 6,
parameter SM_INDEX_START = 7,
parameter SM_INDEX_END = 13,
parameter NUM_REQ = 4,
parameter BITS_PER_BANK = 3
) (
//INPUTS
input wire clk,
input wire reset,
@ -30,148 +28,144 @@ module VX_shared_memory
output wire[`NUM_THREADS-1:0] out_valid,
output wire[`NUM_THREADS-1:0][31:0] out_data,
output wire stall
);
//reg [NB:0][31:0] temp_address;
//reg [NB:0][31:0] temp_in_data;
//reg [NB:0] temp_in_valid;
reg [SM_BANKS - 1:0][31:0] temp_address;
reg [SM_BANKS - 1:0][31:0] temp_in_data;
reg [SM_BANKS - 1:0] temp_in_valid;
reg [`NUM_THREADS-1:0] temp_out_valid;
reg [`NUM_THREADS-1:0][31:0] temp_out_data;
//reg [NB:0][6:0] block_addr;
//reg [NB:0][3:0][31:0] block_wdata;
//reg [NB:0][3:0][31:0] block_rdata;
//reg [NB:0][1:0] block_we;
reg [SM_BANKS - 1:0][$clog2(SM_HEIGHT) - 1:0] block_addr;
reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_wdata;
reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_rdata;
reg [SM_BANKS - 1:0][SM_LOG_WORDS_PER_READ-1:0] block_we;
wire send_data;
//reg [NB:0][1:0] req_num;
reg [SM_BANKS - 1:0][`LOG2UP(NUM_REQ) - 1:0] req_num; // not positive about this
wire [`NUM_THREADS-1:0] orig_in_valid;
genvar f;
generate
for(f = 0; f < `NUM_THREADS; f = f+1) begin : orig_in_valid_setup
assign orig_in_valid[f] = in_valid[f];
end
assign out_valid = send_data ? temp_out_valid : 0;
assign out_data = send_data ? temp_out_data : 0;
endgenerate
VX_priority_encoder_sm #(
.NB(SM_BANKS - 1),
.BITS_PER_BANK(BITS_PER_BANK),
.NUM_REQ(NUM_REQ)
) priority_encoder_sm (
.clk(clk),
.reset(reset),
.in_valid(orig_in_valid),
.in_address(in_address),
.in_data(in_data),
.out_valid(temp_in_valid),
.out_address(temp_address),
.out_data(temp_in_data),
.req_num(req_num),
.stall(stall),
.send_data(send_data)
);
//reg[NB:0][31:0] temp_address;
//reg[NB:0][31:0] temp_in_data;
//reg[NB:0] temp_in_valid;
reg[SM_BANKS - 1:0][31:0] temp_address;
reg[SM_BANKS - 1:0][31:0] temp_in_data;
reg[SM_BANKS - 1:0] temp_in_valid;
genvar j;
integer i;
generate
for (j=0; j<= SM_BANKS - 1; j=j+1) begin : shared_mem_blocks
reg[`NUM_THREADS-1:0] temp_out_valid;
reg[`NUM_THREADS-1:0][31:0] temp_out_data;
wire shm_write = (mem_write != `NO_MEM_WRITE) && temp_in_valid[j];
//reg [NB:0][6:0] block_addr;
//reg [NB:0][3:0][31:0] block_wdata;
//reg [NB:0][3:0][31:0] block_rdata;
//reg [NB:0][1:0] block_we;
reg [SM_BANKS - 1:0][$clog2(SM_HEIGHT) - 1:0] block_addr;
reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_wdata;
reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_rdata;
reg [SM_BANKS - 1:0][SM_LOG_WORDS_PER_READ-1:0] block_we;
VX_shared_memory_block #(
.SMB_HEIGHT(SM_HEIGHT),
.SMB_WORDS_PER_READ(SM_WORDS_PER_READ),
.SMB_LOG_WORDS_PER_READ(SM_LOG_WORDS_PER_READ)
) shared_memory_block (
.clk (clk),
.reset (reset),
.addr (block_addr[j]),
.wdata (block_wdata[j]),
.we (block_we[j]),
.shm_write(shm_write),
.data_out (block_rdata[j])
);
end
wire send_data;
//reg[NB:0][1:0] req_num;
reg[SM_BANKS - 1:0][`LOG2UP(NUM_REQ) - 1:0] req_num; // not positive about this
wire [`NUM_THREADS-1:0] orig_in_valid;
genvar f;
generate
for(f = 0; f < `NUM_THREADS; f = f+1) begin : orig_in_valid_setup
assign orig_in_valid[f] = in_valid[f];
end
assign out_valid = send_data ? temp_out_valid : 0;
assign out_data = send_data ? temp_out_data : 0;
endgenerate
//VX_priority_encoder_sm #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_priority_encoder_sm(
VX_priority_encoder_sm #(.NB(SM_BANKS - 1), .BITS_PER_BANK(BITS_PER_BANK), .NUM_REQ(NUM_REQ)) vx_priority_encoder_sm(
.clk(clk),
.reset(reset),
.in_valid(orig_in_valid),
.in_address(in_address),
.in_data(in_data),
.out_valid(temp_in_valid),
.out_address(temp_address),
.out_data(temp_in_data),
.req_num(req_num),
.stall(stall),
.send_data(send_data)
);
genvar j;
integer i;
generate
//for(j=0; j<= NB; j=j+1) begin : sm_mem_block
for(j=0; j<= SM_BANKS - 1; j=j+1) begin : shared_mem_blocks
wire shm_write = (mem_write != `NO_MEM_WRITE) && temp_in_valid[j];
VX_shared_memory_block#
(
.SMB_HEIGHT(SM_HEIGHT),
.SMB_WORDS_PER_READ(SM_WORDS_PER_READ),
.SMB_LOG_WORDS_PER_READ(SM_LOG_WORDS_PER_READ)
) vx_shared_memory_block
(
.clk (clk),
.reset (reset),
.addr (block_addr[j]),
.wdata (block_wdata[j]),
.we (block_we[j]),
.shm_write(shm_write),
.data_out (block_rdata[j])
);
end
always @(*) begin
block_addr = 0;
block_we = 0;
block_wdata = 0;
//for(i = 0; i <= NB; i = i+1) begin
for(i = 0; i <= SM_BANKS - 1; i = i+1) begin
if(temp_in_valid[i] == 1'b1) begin
//1. Check if the request is actually to the shared memory
if((temp_address[i][31:24]) == 8'hFF) begin
// STORES
if(mem_write != `NO_MEM_WRITE) begin
if(mem_write == `SB_MEM_WRITE) begin
//TODO
end
else if(mem_write == `SH_MEM_WRITE) begin
//TODO
end
else if(mem_write == `SW_MEM_WRITE) begin
//block_addr[i] = temp_address[i][13:7];
//block_we[i] = temp_address[i][6:5];
//block_wdata[i][temp_address[i][6:5]] = temp_in_data[i];
block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START];
block_we[i] = temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START];
block_wdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]] = temp_in_data[i];
end
end
//LOADS
else if(mem_read != `NO_MEM_READ) begin
if(mem_read == `LB_MEM_READ) begin
//TODO
end
else if (mem_read == `LH_MEM_READ)
begin
//TODO
end
else if (mem_read == `LW_MEM_READ)
begin
//block_addr[i] = temp_address[i][13:7];
//temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][6:5]];
//temp_out_valid[req_num[i]] = 1'b1;
block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START];
temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]];
temp_out_valid[req_num[i]] = 1'b1;
end
else if (mem_read == `LBU_MEM_READ)
begin
//TODO
end
else if (mem_read == `LHU_MEM_READ)
begin
//TODO
always @(*) begin
block_addr = 0;
block_we = 0;
block_wdata = 0;
//for(i = 0; i <= NB; i = i+1) begin
for (i = 0; i <= SM_BANKS - 1; i = i+1) begin
if (temp_in_valid[i] == 1'b1) begin
//1. Check if the request is actually to the shared memory
if ((temp_address[i][31:24]) == 8'hFF) begin
// STORES
if (mem_write != `NO_MEM_WRITE) begin
if (mem_write == `SB_MEM_WRITE) begin
//TODO
end
else if (mem_write == `SH_MEM_WRITE) begin
//TODO
end
else if (mem_write == `SW_MEM_WRITE) begin
//block_addr[i] = temp_address[i][13:7];
//block_we[i] = temp_address[i][6:5];
//block_wdata[i][temp_address[i][6:5]] = temp_in_data[i];
block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START];
block_we[i] = temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START];
block_wdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]] = temp_in_data[i];
end
end
//LOADS
else if(mem_read != `NO_MEM_READ) begin
if(mem_read == `LB_MEM_READ) begin
//TODO
end
else if (mem_read == `LH_MEM_READ)
begin
//TODO
end
else if (mem_read == `LW_MEM_READ)
begin
//block_addr[i] = temp_address[i][13:7];
//temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][6:5]];
//temp_out_valid[req_num[i]] = 1'b1;
block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START];
temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]];
temp_out_valid[req_num[i]] = 1'b1;
end
else if (mem_read == `LBU_MEM_READ)
begin
//TODO
end
else if (mem_read == `LHU_MEM_READ)
begin
//TODO
end
end
end
end
end
end
end
end
endgenerate
endgenerate
endmodule