mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
RTL code refactoring
This commit is contained in:
parent
3139d37610
commit
e9dfa828fe
33 changed files with 998 additions and 1022 deletions
|
@ -86,7 +86,7 @@ reg[31:0] io_data;
|
|||
|
||||
initial begin
|
||||
// $fdumpfile("vortex1.vcd");
|
||||
load_file("../../runtime/tests/simple/vx_simple_main.hex");
|
||||
load_file("../../runtime/tests/simple/simple_main_if.hex");
|
||||
$dumpvars(0, vortex_tb);
|
||||
reset = 1;
|
||||
clk = 0;
|
||||
|
|
|
@ -1,130 +1,127 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_back_end
|
||||
#(
|
||||
parameter CORE_ID = 0
|
||||
)
|
||||
(
|
||||
module VX_back_end #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire schedule_delay,
|
||||
|
||||
VX_gpu_dcache_rsp_if vx_dcache_rsp,
|
||||
VX_gpu_dcache_req_if vx_dcache_req,
|
||||
VX_gpu_dcache_rsp_if dcache_rsp_if,
|
||||
VX_gpu_dcache_req_if dcache_req_if,
|
||||
|
||||
output wire out_mem_delay,
|
||||
output wire out_exec_delay,
|
||||
output wire gpr_stage_delay,
|
||||
VX_jal_response_if vx_jal_rsp,
|
||||
VX_branch_response_if vx_branch_rsp,
|
||||
output wire out_mem_delay,
|
||||
output wire out_exec_delay,
|
||||
output wire gpr_stage_delay,
|
||||
VX_jal_response_if jal_rsp_if,
|
||||
VX_branch_response_if branch_rsp_if,
|
||||
|
||||
VX_frE_to_bckE_req_if vx_bckE_req,
|
||||
VX_wb_if vx_writeback_if,
|
||||
VX_frE_to_bckE_req_if bckE_req_if,
|
||||
VX_wb_if writeback_if,
|
||||
|
||||
VX_warp_ctl_if vx_warp_ctl
|
||||
VX_warp_ctl_if warp_ctl_if
|
||||
);
|
||||
|
||||
VX_wb_if writeback_temp_if();
|
||||
assign writeback_if.wb = writeback_temp_if.wb;
|
||||
assign writeback_if.rd = writeback_temp_if.rd;
|
||||
assign writeback_if.write_data = writeback_temp_if.write_data;
|
||||
assign writeback_if.wb_valid = writeback_temp_if.wb_valid;
|
||||
assign writeback_if.wb_warp_num = writeback_temp_if.wb_warp_num;
|
||||
assign writeback_if.wb_pc = writeback_temp_if.wb_pc;
|
||||
|
||||
VX_wb_if vx_writeback_temp();
|
||||
assign vx_writeback_if.wb = vx_writeback_temp.wb;
|
||||
assign vx_writeback_if.rd = vx_writeback_temp.rd;
|
||||
assign vx_writeback_if.write_data = vx_writeback_temp.write_data;
|
||||
assign vx_writeback_if.wb_valid = vx_writeback_temp.wb_valid;
|
||||
assign vx_writeback_if.wb_warp_num = vx_writeback_temp.wb_warp_num;
|
||||
assign vx_writeback_if.wb_pc = vx_writeback_temp.wb_pc;
|
||||
|
||||
// assign VX_writeback_if(vx_writeback_temp);
|
||||
// assign VX_writeback_if(writeback_temp_if);
|
||||
|
||||
wire no_slot_mem;
|
||||
wire no_slot_exec;
|
||||
|
||||
// LSU input + output
|
||||
VX_lsu_req_if vx_lsu_req();
|
||||
VX_inst_mem_wb_if vx_mem_wb();
|
||||
VX_lsu_req_if lsu_req_if();
|
||||
VX_inst_mem_wb_if mem_wb_if();
|
||||
|
||||
// Exec unit input + output
|
||||
VX_exec_unit_req_if vx_exec_unit_req();
|
||||
VX_inst_exec_wb_if vx_inst_exec_wb();
|
||||
VX_exec_unit_req_if exec_unit_req_if();
|
||||
VX_inst_exec_wb_if inst_exec_wb_if();
|
||||
|
||||
// GPU unit input
|
||||
VX_gpu_inst_req_if vx_gpu_inst_req();
|
||||
VX_gpu_inst_req_if gpu_inst_req_if();
|
||||
|
||||
// CSR unit inputs
|
||||
VX_csr_req_if vx_csr_req();
|
||||
VX_csr_wb_if vx_csr_wb();
|
||||
VX_csr_req_if csr_req_if();
|
||||
VX_csr_wb_if csr_wb_if();
|
||||
wire no_slot_csr;
|
||||
wire stall_gpr_csr;
|
||||
|
||||
VX_gpr_stage vx_gpr_stage(
|
||||
VX_gpr_stage gpr_stage (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.schedule_delay (schedule_delay),
|
||||
.vx_writeback_if(vx_writeback_temp),
|
||||
.vx_bckE_req (vx_bckE_req),
|
||||
.writeback_if (writeback_temp_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
// New
|
||||
.vx_exec_unit_req(vx_exec_unit_req),
|
||||
.vx_lsu_req (vx_lsu_req),
|
||||
.vx_gpu_inst_req (vx_gpu_inst_req),
|
||||
.vx_csr_req (vx_csr_req),
|
||||
.exec_unit_req_if(exec_unit_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.gpu_inst_req_if (gpu_inst_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.stall_gpr_csr (stall_gpr_csr),
|
||||
// End new
|
||||
.memory_delay (out_mem_delay),
|
||||
.exec_delay (out_exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
.memory_delay (out_mem_delay),
|
||||
.exec_delay (out_exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
|
||||
VX_lsu load_store_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.vx_lsu_req (vx_lsu_req),
|
||||
.vx_mem_wb (vx_mem_wb),
|
||||
.vx_dcache_rsp(vx_dcache_rsp),
|
||||
.vx_dcache_req(vx_dcache_req),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.mem_wb_if (mem_wb_if),
|
||||
.dcache_rsp_if(dcache_rsp_if),
|
||||
.dcache_req_if(dcache_req_if),
|
||||
.out_delay (out_mem_delay),
|
||||
.no_slot_mem (no_slot_mem)
|
||||
);
|
||||
|
||||
VX_execute_unit vx_execUnit (
|
||||
VX_execute_unit execUnit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.vx_exec_unit_req(vx_exec_unit_req),
|
||||
.vx_inst_exec_wb (vx_inst_exec_wb),
|
||||
.vx_jal_rsp (vx_jal_rsp),
|
||||
.vx_branch_rsp (vx_branch_rsp),
|
||||
.exec_unit_req_if(exec_unit_req_if),
|
||||
.inst_exec_wb_if (inst_exec_wb_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.out_delay (out_exec_delay),
|
||||
.no_slot_exec (no_slot_exec)
|
||||
);
|
||||
|
||||
VX_gpgpu_inst vx_gpgpu_inst (
|
||||
.vx_gpu_inst_req(vx_gpu_inst_req),
|
||||
.vx_warp_ctl (vx_warp_ctl)
|
||||
VX_gpgpu_inst gpgpu_inst (
|
||||
.gpu_inst_req_if(gpu_inst_req_if),
|
||||
.warp_ctl_if (warp_ctl_if)
|
||||
);
|
||||
|
||||
// VX_csr_wrapper vx_csr_wrapper(
|
||||
// .vx_csr_req(vx_csr_req),
|
||||
// .vx_csr_wb (vx_csr_wb)
|
||||
// );
|
||||
// VX_csr_wrapper csr_wrapper(
|
||||
// .csr_req_if(csr_req_if),
|
||||
// .csr_wb_if (csr_wb_if)
|
||||
// );
|
||||
|
||||
VX_csr_pipe #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) vx_csr_pipe (
|
||||
) csr_pipe (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.no_slot_csr (no_slot_csr),
|
||||
.vx_csr_req (vx_csr_req),
|
||||
.vx_writeback(vx_writeback_temp),
|
||||
.vx_csr_wb (vx_csr_wb),
|
||||
.csr_req_if (csr_req_if),
|
||||
.writeback_if(writeback_temp_if),
|
||||
.csr_wb_if (csr_wb_if),
|
||||
.stall_gpr_csr(stall_gpr_csr)
|
||||
);
|
||||
|
||||
VX_writeback vx_wb (
|
||||
VX_writeback wb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.vx_mem_wb (vx_mem_wb),
|
||||
.vx_inst_exec_wb (vx_inst_exec_wb),
|
||||
.vx_csr_wb (vx_csr_wb),
|
||||
.mem_wb_if (mem_wb_if),
|
||||
.inst_exec_wb_if (inst_exec_wb_if),
|
||||
.csr_wb_if (csr_wb_if),
|
||||
|
||||
.vx_writeback_if(vx_writeback_temp),
|
||||
.writeback_if (writeback_temp_if),
|
||||
.no_slot_mem (no_slot_mem),
|
||||
.no_slot_exec (no_slot_exec),
|
||||
.no_slot_csr (no_slot_csr)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
module VX_csr_handler (
|
||||
input wire clk,
|
||||
input wire[`CSR_ADDR_SIZE-1:0] in_decode_csr_address, // done
|
||||
VX_csr_write_request_if vx_csr_w_req,
|
||||
VX_csr_write_request_if csr_w_req_if,
|
||||
input wire in_wb_valid,
|
||||
output wire[31:0] out_decode_csr_data // done
|
||||
);
|
||||
|
@ -9,9 +9,9 @@ module VX_csr_handler (
|
|||
wire[`CSR_ADDR_SIZE-1:0] in_mem_csr_address;
|
||||
wire[31:0] in_mem_csr_result;
|
||||
|
||||
assign in_mem_is_csr = vx_csr_w_req.is_csr;
|
||||
assign in_mem_csr_address = vx_csr_w_req.csr_address;
|
||||
assign in_mem_csr_result = vx_csr_w_req.csr_result;
|
||||
assign in_mem_is_csr = csr_w_req_if.is_csr;
|
||||
assign in_mem_csr_address = csr_w_req_if.csr_address;
|
||||
assign in_mem_csr_result = csr_w_req_if.csr_result;
|
||||
|
||||
reg [`CSR_WIDTH-1:0] csr [`NUM_CSRS-1:0];
|
||||
|
||||
|
|
|
@ -3,13 +3,13 @@
|
|||
module VX_csr_pipe #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk, // Clock
|
||||
input wire reset,
|
||||
input wire no_slot_csr,
|
||||
VX_csr_req_if vx_csr_req,
|
||||
VX_wb_if vx_writeback,
|
||||
VX_csr_wb_if vx_csr_wb,
|
||||
output wire stall_gpr_csr
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire no_slot_csr,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_csr_wb_if csr_wb_if,
|
||||
output wire stall_gpr_csr
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0] valid_s2;
|
||||
|
@ -24,16 +24,16 @@ module VX_csr_pipe #(
|
|||
wire[31:0] csr_read_data_unqual;
|
||||
wire[31:0] csr_read_data;
|
||||
|
||||
assign stall_gpr_csr = no_slot_csr && vx_csr_req.is_csr && |(vx_csr_req.valid);
|
||||
assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && |(csr_req_if.valid);
|
||||
|
||||
assign csr_read_data = (csr_address_s2 == vx_csr_req.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
|
||||
assign csr_read_data = (csr_address_s2 == csr_req_if.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
|
||||
|
||||
wire writeback = |vx_writeback.wb_valid;
|
||||
wire writeback = |writeback_if.wb_valid;
|
||||
|
||||
VX_csr_data vx_csr_data(
|
||||
VX_csr_data csr_data(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_read_csr_address (vx_csr_req.csr_address),
|
||||
.in_read_csr_address (csr_req_if.csr_address),
|
||||
.in_write_valid (is_csr_s2),
|
||||
.in_write_csr_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
|
||||
.in_write_csr_address(csr_address_s2),
|
||||
|
@ -44,10 +44,10 @@ module VX_csr_pipe #(
|
|||
reg [31:0] csr_updated_data;
|
||||
|
||||
always @(*) begin
|
||||
case (vx_csr_req.alu_op)
|
||||
`CSR_ALU_RW: csr_updated_data = vx_csr_req.csr_mask;
|
||||
`CSR_ALU_RS: csr_updated_data = csr_read_data | vx_csr_req.csr_mask;
|
||||
`CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - vx_csr_req.csr_mask);
|
||||
case (csr_req_if.alu_op)
|
||||
`CSR_ALU_RW: csr_updated_data = csr_req_if.csr_mask;
|
||||
`CSR_ALU_RS: csr_updated_data = csr_read_data | csr_req_if.csr_mask;
|
||||
`CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - csr_req_if.csr_mask);
|
||||
default: csr_updated_data = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
|
@ -61,7 +61,7 @@ module VX_csr_pipe #(
|
|||
.reset(reset),
|
||||
.stall(no_slot_csr),
|
||||
.flush(zero),
|
||||
.in ({vx_csr_req.valid, vx_csr_req.warp_num, vx_csr_req.rd, vx_csr_req.wb, vx_csr_req.is_csr, vx_csr_req.csr_address, csr_read_data , csr_updated_data }),
|
||||
.in ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_csr, csr_req_if.csr_address, csr_read_data , csr_updated_data }),
|
||||
.out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2})
|
||||
);
|
||||
|
||||
|
@ -97,10 +97,10 @@ module VX_csr_pipe #(
|
|||
warp_id_select ? warp_idz :
|
||||
csr_vec_read_data_s2;
|
||||
|
||||
assign vx_csr_wb.valid = valid_s2;
|
||||
assign vx_csr_wb.warp_num = warp_num_s2;
|
||||
assign vx_csr_wb.rd = rd_s2;
|
||||
assign vx_csr_wb.wb = wb_s2;
|
||||
assign vx_csr_wb.csr_result = final_csr_data;
|
||||
assign csr_wb_if.valid = valid_s2;
|
||||
assign csr_wb_if.warp_num = warp_num_s2;
|
||||
assign csr_wb_if.rd = rd_s2;
|
||||
assign csr_wb_if.wb = wb_s2;
|
||||
assign csr_wb_if.csr_result = final_csr_data;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_wrapper (
|
||||
VX_csr_req_if vx_csr_req,
|
||||
VX_csr_wb_if vx_csr_wb
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_csr_wb_if csr_wb_if
|
||||
);
|
||||
|
||||
|
||||
|
@ -17,21 +17,21 @@ module VX_csr_wrapper (
|
|||
end
|
||||
|
||||
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init
|
||||
assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, vx_csr_req.warp_num};
|
||||
assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
assign vx_csr_wb.valid = vx_csr_req.valid;
|
||||
assign vx_csr_wb.warp_num = vx_csr_req.warp_num;
|
||||
assign vx_csr_wb.rd = vx_csr_req.rd;
|
||||
assign vx_csr_wb.wb = vx_csr_req.wb;
|
||||
assign csr_wb_if.valid = csr_req_if.valid;
|
||||
assign csr_wb_if.warp_num = csr_req_if.warp_num;
|
||||
assign csr_wb_if.rd = csr_req_if.rd;
|
||||
assign csr_wb_if.wb = csr_req_if.wb;
|
||||
|
||||
|
||||
wire thread_select = vx_csr_req.csr_address == 12'h20;
|
||||
wire warp_select = vx_csr_req.csr_address == 12'h21;
|
||||
wire thread_select = csr_req_if.csr_address == 12'h20;
|
||||
wire warp_select = csr_req_if.csr_address == 12'h21;
|
||||
|
||||
assign vx_csr_wb.csr_result = thread_select ? thread_ids :
|
||||
assign csr_wb_if.csr_result = thread_select ? thread_ids :
|
||||
warp_select ? warp_ids :
|
||||
0;
|
||||
|
||||
|
|
|
@ -3,22 +3,22 @@
|
|||
|
||||
module VX_decode(
|
||||
// Fetch Inputs
|
||||
VX_inst_meta_if fd_inst_meta_de,
|
||||
VX_inst_meta_if fd_inst_meta_de,
|
||||
|
||||
// Outputs
|
||||
VX_frE_to_bckE_req_if vx_frE_to_bckE_req,
|
||||
VX_wstall_if vx_wstall,
|
||||
VX_join_if vx_join,
|
||||
VX_frE_to_bckE_req_if frE_to_bckE_req_if,
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if,
|
||||
|
||||
output wire terminate_sim
|
||||
output wire terminate_sim
|
||||
|
||||
);
|
||||
|
||||
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
||||
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
|
||||
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
|
||||
assign vx_frE_to_bckE_req.curr_PC = in_curr_PC;
|
||||
assign frE_to_bckE_req_if.curr_PC = in_curr_PC;
|
||||
|
||||
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
|
||||
|
||||
|
@ -84,20 +84,20 @@ module VX_decode(
|
|||
reg[2:0] temp_branch_type;
|
||||
reg temp_branch_stall;
|
||||
|
||||
assign vx_frE_to_bckE_req.valid = fd_inst_meta_de.valid;
|
||||
assign frE_to_bckE_req_if.valid = fd_inst_meta_de.valid;
|
||||
|
||||
assign vx_frE_to_bckE_req.warp_num = in_warp_num;
|
||||
assign frE_to_bckE_req_if.warp_num = in_warp_num;
|
||||
|
||||
assign curr_opcode = in_instruction[6:0];
|
||||
|
||||
assign vx_frE_to_bckE_req.rd = in_instruction[11:7];
|
||||
assign vx_frE_to_bckE_req.rs1 = in_instruction[19:15];
|
||||
assign vx_frE_to_bckE_req.rs2 = in_instruction[24:20];
|
||||
assign frE_to_bckE_req_if.rd = in_instruction[11:7];
|
||||
assign frE_to_bckE_req_if.rs1 = in_instruction[19:15];
|
||||
assign frE_to_bckE_req_if.rs2 = in_instruction[24:20];
|
||||
assign func3 = in_instruction[14:12];
|
||||
assign func7 = in_instruction[31:25];
|
||||
assign u_12 = in_instruction[31:20];
|
||||
|
||||
assign vx_frE_to_bckE_req.PC_next = in_curr_PC + 32'h4;
|
||||
assign frE_to_bckE_req_if.PC_next = in_curr_PC + 32'h4;
|
||||
|
||||
// Write Back sigal
|
||||
assign is_rtype = (curr_opcode == `R_INST);
|
||||
|
@ -123,43 +123,43 @@ module VX_decode(
|
|||
assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE
|
||||
|
||||
|
||||
assign vx_join.is_join = is_join;
|
||||
assign vx_join.join_warp_num = in_warp_num;
|
||||
assign join_if.is_join = is_join;
|
||||
assign join_if.join_warp_num = in_warp_num;
|
||||
|
||||
|
||||
assign vx_frE_to_bckE_req.is_wspawn = is_wspawn;
|
||||
assign vx_frE_to_bckE_req.is_tmc = is_tmc;
|
||||
assign vx_frE_to_bckE_req.is_split = is_split;
|
||||
assign vx_frE_to_bckE_req.is_barrier = is_barrier;
|
||||
assign frE_to_bckE_req_if.is_wspawn = is_wspawn;
|
||||
assign frE_to_bckE_req_if.is_tmc = is_tmc;
|
||||
assign frE_to_bckE_req_if.is_split = is_split;
|
||||
assign frE_to_bckE_req_if.is_barrier = is_barrier;
|
||||
|
||||
|
||||
|
||||
assign vx_frE_to_bckE_req.csr_immed = is_csr_immed;
|
||||
assign vx_frE_to_bckE_req.is_csr = is_csr;
|
||||
assign frE_to_bckE_req_if.csr_immed = is_csr_immed;
|
||||
assign frE_to_bckE_req_if.is_csr = is_csr;
|
||||
|
||||
|
||||
assign vx_frE_to_bckE_req.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
|
||||
assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
|
||||
is_linst ? `WB_MEM :
|
||||
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
|
||||
`NO_WB;
|
||||
|
||||
|
||||
assign vx_frE_to_bckE_req.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
|
||||
assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
|
||||
|
||||
// MEM signals
|
||||
assign vx_frE_to_bckE_req.mem_read = (is_linst) ? func3 : `NO_MEM_READ;
|
||||
assign vx_frE_to_bckE_req.mem_write = (is_stype) ? func3 : `NO_MEM_WRITE;
|
||||
assign frE_to_bckE_req_if.mem_read = (is_linst) ? func3 : `NO_MEM_READ;
|
||||
assign frE_to_bckE_req_if.mem_write = (is_stype) ? func3 : `NO_MEM_WRITE;
|
||||
|
||||
// UPPER IMMEDIATE
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`LUI_INST: temp_upper_immed = {func7, vx_frE_to_bckE_req.rs2, vx_frE_to_bckE_req.rs1, func3};
|
||||
`AUIPC_INST: temp_upper_immed = {func7, vx_frE_to_bckE_req.rs2, vx_frE_to_bckE_req.rs1, func3};
|
||||
`LUI_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
|
||||
`AUIPC_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
|
||||
default: temp_upper_immed = 20'h0;
|
||||
endcase // curr_opcode
|
||||
end
|
||||
|
||||
assign vx_frE_to_bckE_req.upper_immed = temp_upper_immed;
|
||||
assign frE_to_bckE_req_if.upper_immed = temp_upper_immed;
|
||||
|
||||
|
||||
assign jal_b_19_to_12 = in_instruction[19:12];
|
||||
|
@ -171,7 +171,7 @@ module VX_decode(
|
|||
assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset};
|
||||
|
||||
|
||||
assign jalr_immed = {func7, vx_frE_to_bckE_req.rs2};
|
||||
assign jalr_immed = {func7, frE_to_bckE_req_if.rs2};
|
||||
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
|
||||
|
||||
|
||||
|
@ -208,16 +208,16 @@ module VX_decode(
|
|||
endcase
|
||||
end
|
||||
|
||||
assign vx_frE_to_bckE_req.jalQual = is_jal;
|
||||
assign vx_frE_to_bckE_req.jal = temp_jal;
|
||||
assign vx_frE_to_bckE_req.jal_offset = temp_jal_offset;
|
||||
assign frE_to_bckE_req_if.jalQual = is_jal;
|
||||
assign frE_to_bckE_req_if.jal = temp_jal;
|
||||
assign frE_to_bckE_req_if.jal_offset = temp_jal_offset;
|
||||
|
||||
// wire is_ebreak;
|
||||
|
||||
|
||||
// assign is_ebreak = is_e_inst;
|
||||
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
|
||||
assign vx_frE_to_bckE_req.ebreak = ebreak;
|
||||
assign frE_to_bckE_req_if.ebreak = ebreak;
|
||||
assign terminate_sim = is_e_inst;
|
||||
|
||||
|
||||
|
@ -226,26 +226,26 @@ module VX_decode(
|
|||
assign csr_cond1 = func3 != 3'h0;
|
||||
assign csr_cond2 = u_12 >= 12'h2;
|
||||
|
||||
assign vx_frE_to_bckE_req.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55;
|
||||
assign frE_to_bckE_req_if.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55;
|
||||
|
||||
|
||||
// ITYPE IMEED
|
||||
assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
|
||||
assign alu_shift_i_immed = {{7{1'b0}}, vx_frE_to_bckE_req.rs2};
|
||||
assign alu_shift_i_immed = {{7{1'b0}}, frE_to_bckE_req_if.rs2};
|
||||
assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12;
|
||||
|
||||
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
|
||||
`S_INST: temp_itype_immed = {{20{func7[6]}}, func7, vx_frE_to_bckE_req.rd};
|
||||
`S_INST: temp_itype_immed = {{20{func7[6]}}, func7, frE_to_bckE_req_if.rd};
|
||||
`L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12};
|
||||
`B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]};
|
||||
default: temp_itype_immed = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign vx_frE_to_bckE_req.itype_immed = temp_itype_immed;
|
||||
assign frE_to_bckE_req_if.itype_immed = temp_itype_immed;
|
||||
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
|
@ -282,10 +282,10 @@ module VX_decode(
|
|||
endcase
|
||||
end
|
||||
|
||||
assign vx_frE_to_bckE_req.branch_type = temp_branch_type;
|
||||
assign frE_to_bckE_req_if.branch_type = temp_branch_type;
|
||||
|
||||
assign vx_wstall.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid);
|
||||
assign vx_wstall.warp_num = in_warp_num;
|
||||
assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid);
|
||||
assign wstall_if.warp_num = in_warp_num;
|
||||
|
||||
always @(*) begin
|
||||
// ALU OP
|
||||
|
@ -330,14 +330,14 @@ module VX_decode(
|
|||
|
||||
wire[4:0] temp_final_alu;
|
||||
|
||||
assign temp_final_alu = is_btype ? ((vx_frE_to_bckE_req.branch_type < `BLTU) ? `SUB : `SUBU) :
|
||||
assign temp_final_alu = is_btype ? ((frE_to_bckE_req_if.branch_type < `BLTU) ? `SUB : `SUBU) :
|
||||
is_lui ? `LUI_ALU :
|
||||
is_auipc ? `AUIPC_ALU :
|
||||
is_csr ? csr_alu :
|
||||
(is_stype || is_linst) ? `ADD :
|
||||
alu_op;
|
||||
|
||||
assign vx_frE_to_bckE_req.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
|
||||
assign frE_to_bckE_req_if.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
|
||||
|
||||
endmodule
|
||||
|
||||
|
|
|
@ -5,69 +5,69 @@ module VX_dmem_controller (
|
|||
input wire reset,
|
||||
|
||||
// Dram <-> Dcache
|
||||
VX_gpu_dcache_dram_req_if vx_gpu_dcache_dram_req,
|
||||
VX_gpu_dcache_dram_rsp_if vx_gpu_dcache_dram_res,
|
||||
VX_gpu_snp_req_rsp_if vx_gpu_dcache_snp_req,
|
||||
VX_gpu_dcache_dram_req_if gpu_dcache_dram_req_if,
|
||||
VX_gpu_dcache_dram_rsp_if gpu_dcache_dram_res_if,
|
||||
VX_gpu_snp_req_rsp_if gpu_dcache_snp_req_if,
|
||||
|
||||
// Dram <-> Icache
|
||||
VX_gpu_dcache_dram_req_if vx_gpu_icache_dram_req,
|
||||
VX_gpu_dcache_dram_rsp_if vx_gpu_icache_dram_res,
|
||||
VX_gpu_snp_req_rsp_if vx_gpu_icache_snp_req,
|
||||
VX_gpu_dcache_dram_req_if gpu_icache_dram_req_if,
|
||||
VX_gpu_dcache_dram_rsp_if gpu_icache_dram_res_if,
|
||||
VX_gpu_snp_req_rsp_if gpu_icache_snp_req_if,
|
||||
|
||||
// Core <-> Dcache
|
||||
VX_gpu_dcache_rsp_if vx_dcache_rsp,
|
||||
VX_gpu_dcache_req_if vx_dcache_req,
|
||||
VX_gpu_dcache_rsp_if dcache_rsp_if,
|
||||
VX_gpu_dcache_req_if dcache_req_if,
|
||||
|
||||
// Core <-> Icache
|
||||
VX_gpu_dcache_rsp_if vx_icache_rsp,
|
||||
VX_gpu_dcache_req_if vx_icache_req
|
||||
VX_gpu_dcache_rsp_if icache_rsp_if,
|
||||
VX_gpu_dcache_req_if icache_req_if
|
||||
);
|
||||
|
||||
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_rsp_smem();
|
||||
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req_smem();
|
||||
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_smem_if();
|
||||
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_smem_if();
|
||||
|
||||
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_rsp_dcache();
|
||||
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req_dcache();
|
||||
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_dcache_if();
|
||||
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_dcache_if();
|
||||
|
||||
wire to_shm = vx_dcache_req.core_req_addr[0][31:24] == 8'hFF;
|
||||
wire dcache_wants_wb = (|vx_dcache_rsp_dcache.core_wb_valid);
|
||||
wire to_shm = dcache_req_if.core_req_addr[0][31:24] == 8'hFF;
|
||||
wire dcache_wants_wb = (|dcache_rsp_dcache_if.core_wb_valid);
|
||||
|
||||
// Dcache Request
|
||||
assign vx_dcache_req_dcache.core_req_valid = vx_dcache_req.core_req_valid & {`NUM_THREADS{~to_shm}};
|
||||
assign vx_dcache_req_dcache.core_req_addr = vx_dcache_req.core_req_addr;
|
||||
assign vx_dcache_req_dcache.core_req_writedata = vx_dcache_req.core_req_writedata;
|
||||
assign vx_dcache_req_dcache.core_req_mem_read = vx_dcache_req.core_req_mem_read;
|
||||
assign vx_dcache_req_dcache.core_req_mem_write = vx_dcache_req.core_req_mem_write;
|
||||
assign vx_dcache_req_dcache.core_req_rd = vx_dcache_req.core_req_rd;
|
||||
assign vx_dcache_req_dcache.core_req_wb = vx_dcache_req.core_req_wb;
|
||||
assign vx_dcache_req_dcache.core_req_warp_num = vx_dcache_req.core_req_warp_num;
|
||||
assign vx_dcache_req_dcache.core_req_pc = vx_dcache_req.core_req_pc;
|
||||
assign vx_dcache_req_dcache.core_no_wb_slot = vx_dcache_req.core_no_wb_slot;
|
||||
assign dcache_req_dcache_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{~to_shm}};
|
||||
assign dcache_req_dcache_if.core_req_addr = dcache_req_if.core_req_addr;
|
||||
assign dcache_req_dcache_if.core_req_writedata = dcache_req_if.core_req_writedata;
|
||||
assign dcache_req_dcache_if.core_req_mem_read = dcache_req_if.core_req_mem_read;
|
||||
assign dcache_req_dcache_if.core_req_mem_write = dcache_req_if.core_req_mem_write;
|
||||
assign dcache_req_dcache_if.core_req_rd = dcache_req_if.core_req_rd;
|
||||
assign dcache_req_dcache_if.core_req_wb = dcache_req_if.core_req_wb;
|
||||
assign dcache_req_dcache_if.core_req_warp_num = dcache_req_if.core_req_warp_num;
|
||||
assign dcache_req_dcache_if.core_req_pc = dcache_req_if.core_req_pc;
|
||||
assign dcache_req_dcache_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot;
|
||||
|
||||
// Shred Memory Request
|
||||
assign vx_dcache_req_smem.core_req_valid = vx_dcache_req.core_req_valid & {`NUM_THREADS{to_shm}};
|
||||
assign vx_dcache_req_smem.core_req_addr = vx_dcache_req.core_req_addr;
|
||||
assign vx_dcache_req_smem.core_req_writedata = vx_dcache_req.core_req_writedata;
|
||||
assign vx_dcache_req_smem.core_req_mem_read = vx_dcache_req.core_req_mem_read;
|
||||
assign vx_dcache_req_smem.core_req_mem_write = vx_dcache_req.core_req_mem_write;
|
||||
assign vx_dcache_req_smem.core_req_rd = vx_dcache_req.core_req_rd;
|
||||
assign vx_dcache_req_smem.core_req_wb = vx_dcache_req.core_req_wb;
|
||||
assign vx_dcache_req_smem.core_req_warp_num = vx_dcache_req.core_req_warp_num;
|
||||
assign vx_dcache_req_smem.core_req_pc = vx_dcache_req.core_req_pc;
|
||||
assign vx_dcache_req_smem.core_no_wb_slot = vx_dcache_req.core_no_wb_slot || dcache_wants_wb;
|
||||
assign dcache_req_smem_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{to_shm}};
|
||||
assign dcache_req_smem_if.core_req_addr = dcache_req_if.core_req_addr;
|
||||
assign dcache_req_smem_if.core_req_writedata = dcache_req_if.core_req_writedata;
|
||||
assign dcache_req_smem_if.core_req_mem_read = dcache_req_if.core_req_mem_read;
|
||||
assign dcache_req_smem_if.core_req_mem_write = dcache_req_if.core_req_mem_write;
|
||||
assign dcache_req_smem_if.core_req_rd = dcache_req_if.core_req_rd;
|
||||
assign dcache_req_smem_if.core_req_wb = dcache_req_if.core_req_wb;
|
||||
assign dcache_req_smem_if.core_req_warp_num = dcache_req_if.core_req_warp_num;
|
||||
assign dcache_req_smem_if.core_req_pc = dcache_req_if.core_req_pc;
|
||||
assign dcache_req_smem_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot || dcache_wants_wb;
|
||||
|
||||
// Dcache Response
|
||||
assign vx_dcache_rsp.core_wb_valid = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_valid : vx_dcache_rsp_smem.core_wb_valid;
|
||||
assign vx_dcache_rsp.core_wb_req_rd = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_req_rd : vx_dcache_rsp_smem.core_wb_req_rd;
|
||||
assign vx_dcache_rsp.core_wb_req_wb = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_req_wb : vx_dcache_rsp_smem.core_wb_req_wb;
|
||||
assign vx_dcache_rsp.core_wb_warp_num = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_warp_num : vx_dcache_rsp_smem.core_wb_warp_num;
|
||||
assign vx_dcache_rsp.core_wb_readdata = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_readdata : vx_dcache_rsp_smem.core_wb_readdata;
|
||||
assign vx_dcache_rsp.core_wb_pc = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_pc : vx_dcache_rsp_smem.core_wb_pc;
|
||||
assign dcache_rsp_if.core_wb_valid = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_valid : dcache_rsp_smem_if.core_wb_valid;
|
||||
assign dcache_rsp_if.core_wb_req_rd = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_req_rd : dcache_rsp_smem_if.core_wb_req_rd;
|
||||
assign dcache_rsp_if.core_wb_req_wb = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_req_wb : dcache_rsp_smem_if.core_wb_req_wb;
|
||||
assign dcache_rsp_if.core_wb_warp_num = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_warp_num : dcache_rsp_smem_if.core_wb_warp_num;
|
||||
assign dcache_rsp_if.core_wb_readdata = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_readdata : dcache_rsp_smem_if.core_wb_readdata;
|
||||
assign dcache_rsp_if.core_wb_pc = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_pc : dcache_rsp_smem_if.core_wb_pc;
|
||||
|
||||
assign vx_dcache_rsp.delay_req = to_shm ? vx_dcache_rsp_smem.delay_req : vx_dcache_rsp_dcache.delay_req;
|
||||
assign dcache_rsp_if.delay_req = to_shm ? dcache_rsp_smem_if.delay_req : dcache_rsp_dcache_if.delay_req;
|
||||
|
||||
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_smem_dram_req();
|
||||
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_smem_dram_res();
|
||||
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_req_if();
|
||||
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_res_if();
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_SIZE_BYTES (`SCACHE_SIZE_BYTES),
|
||||
|
@ -95,46 +95,46 @@ module VX_dmem_controller (
|
|||
.reset (reset),
|
||||
|
||||
// Core req
|
||||
.core_req_valid (vx_dcache_req_smem.core_req_valid),
|
||||
.core_req_mem_read (vx_dcache_req_smem.core_req_mem_read),
|
||||
.core_req_mem_write(vx_dcache_req_smem.core_req_mem_write),
|
||||
.core_req_addr (vx_dcache_req_smem.core_req_addr),
|
||||
.core_req_writedata(vx_dcache_req_smem.core_req_writedata),
|
||||
.core_req_rd (vx_dcache_req_smem.core_req_rd),
|
||||
.core_req_wb (vx_dcache_req_smem.core_req_wb),
|
||||
.core_req_warp_num (vx_dcache_req_smem.core_req_warp_num),
|
||||
.core_req_pc (vx_dcache_req_smem.core_req_pc),
|
||||
.core_req_valid (dcache_req_smem_if.core_req_valid),
|
||||
.core_req_mem_read (dcache_req_smem_if.core_req_mem_read),
|
||||
.core_req_mem_write(dcache_req_smem_if.core_req_mem_write),
|
||||
.core_req_addr (dcache_req_smem_if.core_req_addr),
|
||||
.core_req_writedata(dcache_req_smem_if.core_req_writedata),
|
||||
.core_req_rd (dcache_req_smem_if.core_req_rd),
|
||||
.core_req_wb (dcache_req_smem_if.core_req_wb),
|
||||
.core_req_warp_num (dcache_req_smem_if.core_req_warp_num),
|
||||
.core_req_pc (dcache_req_smem_if.core_req_pc),
|
||||
|
||||
// Delay Core Req
|
||||
.delay_req (vx_dcache_rsp_smem.delay_req),
|
||||
.delay_req (dcache_rsp_smem_if.delay_req),
|
||||
|
||||
// Core Cache Can't WB
|
||||
.core_no_wb_slot (vx_dcache_req_smem.core_no_wb_slot),
|
||||
.core_no_wb_slot (dcache_req_smem_if.core_no_wb_slot),
|
||||
|
||||
// Cache CWB
|
||||
.core_wb_valid (vx_dcache_rsp_smem.core_wb_valid),
|
||||
.core_wb_req_rd (vx_dcache_rsp_smem.core_wb_req_rd),
|
||||
.core_wb_req_wb (vx_dcache_rsp_smem.core_wb_req_wb),
|
||||
.core_wb_warp_num (vx_dcache_rsp_smem.core_wb_warp_num),
|
||||
.core_wb_readdata (vx_dcache_rsp_smem.core_wb_readdata),
|
||||
.core_wb_pc (vx_dcache_rsp_smem.core_wb_pc),
|
||||
.core_wb_valid (dcache_rsp_smem_if.core_wb_valid),
|
||||
.core_wb_req_rd (dcache_rsp_smem_if.core_wb_req_rd),
|
||||
.core_wb_req_wb (dcache_rsp_smem_if.core_wb_req_wb),
|
||||
.core_wb_warp_num (dcache_rsp_smem_if.core_wb_warp_num),
|
||||
.core_wb_readdata (dcache_rsp_smem_if.core_wb_readdata),
|
||||
.core_wb_pc (dcache_rsp_smem_if.core_wb_pc),
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
.core_wb_address (),
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
// DRAM response
|
||||
.dram_rsp_valid (vx_gpu_smem_dram_res.dram_rsp_valid),
|
||||
.dram_rsp_addr (vx_gpu_smem_dram_res.dram_rsp_addr),
|
||||
.dram_rsp_data (vx_gpu_smem_dram_res.dram_rsp_data),
|
||||
.dram_rsp_valid (gpu_smem_dram_res_if.dram_rsp_valid),
|
||||
.dram_rsp_addr (gpu_smem_dram_res_if.dram_rsp_addr),
|
||||
.dram_rsp_data (gpu_smem_dram_res_if.dram_rsp_data),
|
||||
|
||||
// DRAM accept response
|
||||
.dram_rsp_ready (vx_gpu_smem_dram_req.dram_rsp_ready),
|
||||
.dram_rsp_ready (gpu_smem_dram_req_if.dram_rsp_ready),
|
||||
|
||||
// DRAM Req
|
||||
.dram_req_read (vx_gpu_smem_dram_req.dram_req_read),
|
||||
.dram_req_write (vx_gpu_smem_dram_req.dram_req_write),
|
||||
.dram_req_addr (vx_gpu_smem_dram_req.dram_req_addr),
|
||||
.dram_req_data (vx_gpu_smem_dram_req.dram_req_data),
|
||||
.dram_req_read (gpu_smem_dram_req_if.dram_req_read),
|
||||
.dram_req_write (gpu_smem_dram_req_if.dram_req_write),
|
||||
.dram_req_addr (gpu_smem_dram_req_if.dram_req_addr),
|
||||
.dram_req_data (gpu_smem_dram_req_if.dram_req_data),
|
||||
.dram_req_full (1),
|
||||
|
||||
// Snoop Request
|
||||
|
@ -178,52 +178,52 @@ module VX_dmem_controller (
|
|||
.reset (reset),
|
||||
|
||||
// Core req
|
||||
.core_req_valid (vx_dcache_req_dcache.core_req_valid),
|
||||
.core_req_mem_read (vx_dcache_req_dcache.core_req_mem_read),
|
||||
.core_req_mem_write(vx_dcache_req_dcache.core_req_mem_write),
|
||||
.core_req_addr (vx_dcache_req_dcache.core_req_addr),
|
||||
.core_req_writedata(vx_dcache_req_dcache.core_req_writedata),
|
||||
.core_req_rd (vx_dcache_req_dcache.core_req_rd),
|
||||
.core_req_wb (vx_dcache_req_dcache.core_req_wb),
|
||||
.core_req_warp_num (vx_dcache_req_dcache.core_req_warp_num),
|
||||
.core_req_pc (vx_dcache_req_dcache.core_req_pc),
|
||||
.core_req_valid (dcache_req_dcache_if.core_req_valid),
|
||||
.core_req_mem_read (dcache_req_dcache_if.core_req_mem_read),
|
||||
.core_req_mem_write(dcache_req_dcache_if.core_req_mem_write),
|
||||
.core_req_addr (dcache_req_dcache_if.core_req_addr),
|
||||
.core_req_writedata(dcache_req_dcache_if.core_req_writedata),
|
||||
.core_req_rd (dcache_req_dcache_if.core_req_rd),
|
||||
.core_req_wb (dcache_req_dcache_if.core_req_wb),
|
||||
.core_req_warp_num (dcache_req_dcache_if.core_req_warp_num),
|
||||
.core_req_pc (dcache_req_dcache_if.core_req_pc),
|
||||
|
||||
// Delay Core Req
|
||||
.delay_req (vx_dcache_rsp_dcache.delay_req),
|
||||
.delay_req (dcache_rsp_dcache_if.delay_req),
|
||||
|
||||
// Core Cache Can't WB
|
||||
.core_no_wb_slot (vx_dcache_req_dcache.core_no_wb_slot),
|
||||
.core_no_wb_slot (dcache_req_dcache_if.core_no_wb_slot),
|
||||
|
||||
// Cache CWB
|
||||
.core_wb_valid (vx_dcache_rsp_dcache.core_wb_valid),
|
||||
.core_wb_req_rd (vx_dcache_rsp_dcache.core_wb_req_rd),
|
||||
.core_wb_req_wb (vx_dcache_rsp_dcache.core_wb_req_wb),
|
||||
.core_wb_warp_num (vx_dcache_rsp_dcache.core_wb_warp_num),
|
||||
.core_wb_readdata (vx_dcache_rsp_dcache.core_wb_readdata),
|
||||
.core_wb_pc (vx_dcache_rsp_dcache.core_wb_pc),
|
||||
.core_wb_valid (dcache_rsp_dcache_if.core_wb_valid),
|
||||
.core_wb_req_rd (dcache_rsp_dcache_if.core_wb_req_rd),
|
||||
.core_wb_req_wb (dcache_rsp_dcache_if.core_wb_req_wb),
|
||||
.core_wb_warp_num (dcache_rsp_dcache_if.core_wb_warp_num),
|
||||
.core_wb_readdata (dcache_rsp_dcache_if.core_wb_readdata),
|
||||
.core_wb_pc (dcache_rsp_dcache_if.core_wb_pc),
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
.core_wb_address (),
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
// DRAM response
|
||||
.dram_rsp_valid (vx_gpu_dcache_dram_res.dram_rsp_valid),
|
||||
.dram_rsp_addr (vx_gpu_dcache_dram_res.dram_rsp_addr),
|
||||
.dram_rsp_data (vx_gpu_dcache_dram_res.dram_rsp_data),
|
||||
.dram_rsp_valid (gpu_dcache_dram_res_if.dram_rsp_valid),
|
||||
.dram_rsp_addr (gpu_dcache_dram_res_if.dram_rsp_addr),
|
||||
.dram_rsp_data (gpu_dcache_dram_res_if.dram_rsp_data),
|
||||
|
||||
// DRAM accept response
|
||||
.dram_rsp_ready (vx_gpu_dcache_dram_req.dram_rsp_ready),
|
||||
.dram_rsp_ready (gpu_dcache_dram_req_if.dram_rsp_ready),
|
||||
|
||||
// DRAM Req
|
||||
.dram_req_read (vx_gpu_dcache_dram_req.dram_req_read),
|
||||
.dram_req_write (vx_gpu_dcache_dram_req.dram_req_write),
|
||||
.dram_req_addr (vx_gpu_dcache_dram_req.dram_req_addr),
|
||||
.dram_req_data (vx_gpu_dcache_dram_req.dram_req_data),
|
||||
.dram_req_full (vx_gpu_dcache_dram_req.dram_req_full),
|
||||
.dram_req_read (gpu_dcache_dram_req_if.dram_req_read),
|
||||
.dram_req_write (gpu_dcache_dram_req_if.dram_req_write),
|
||||
.dram_req_addr (gpu_dcache_dram_req_if.dram_req_addr),
|
||||
.dram_req_data (gpu_dcache_dram_req_if.dram_req_data),
|
||||
.dram_req_full (gpu_dcache_dram_req_if.dram_req_full),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req_valid (vx_gpu_dcache_snp_req.snp_req_valid),
|
||||
.snp_req_addr (vx_gpu_dcache_snp_req.snp_req_addr),
|
||||
.snp_req_full (vx_gpu_dcache_snp_req.snp_req_full),
|
||||
.snp_req_valid (gpu_dcache_snp_req_if.snp_req_valid),
|
||||
.snp_req_addr (gpu_dcache_snp_req_if.snp_req_addr),
|
||||
.snp_req_full (gpu_dcache_snp_req_if.snp_req_full),
|
||||
|
||||
// Snoop Forward
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
|
@ -259,52 +259,52 @@ module VX_dmem_controller (
|
|||
.reset (reset),
|
||||
|
||||
// Core req
|
||||
.core_req_valid (vx_icache_req.core_req_valid),
|
||||
.core_req_mem_read (vx_icache_req.core_req_mem_read),
|
||||
.core_req_mem_write (vx_icache_req.core_req_mem_write),
|
||||
.core_req_addr (vx_icache_req.core_req_addr),
|
||||
.core_req_writedata (vx_icache_req.core_req_writedata),
|
||||
.core_req_rd (vx_icache_req.core_req_rd),
|
||||
.core_req_wb (vx_icache_req.core_req_wb),
|
||||
.core_req_warp_num (vx_icache_req.core_req_warp_num),
|
||||
.core_req_pc (vx_icache_req.core_req_pc),
|
||||
.core_req_valid (icache_req_if.core_req_valid),
|
||||
.core_req_mem_read (icache_req_if.core_req_mem_read),
|
||||
.core_req_mem_write (icache_req_if.core_req_mem_write),
|
||||
.core_req_addr (icache_req_if.core_req_addr),
|
||||
.core_req_writedata (icache_req_if.core_req_writedata),
|
||||
.core_req_rd (icache_req_if.core_req_rd),
|
||||
.core_req_wb (icache_req_if.core_req_wb),
|
||||
.core_req_warp_num (icache_req_if.core_req_warp_num),
|
||||
.core_req_pc (icache_req_if.core_req_pc),
|
||||
|
||||
// Delay Core Req
|
||||
.delay_req (vx_icache_rsp.delay_req),
|
||||
.delay_req (icache_rsp_if.delay_req),
|
||||
|
||||
// Core Cache Can't WB
|
||||
.core_no_wb_slot (vx_icache_req.core_no_wb_slot),
|
||||
.core_no_wb_slot (icache_req_if.core_no_wb_slot),
|
||||
|
||||
// Cache CWB
|
||||
.core_wb_valid (vx_icache_rsp.core_wb_valid),
|
||||
.core_wb_req_rd (vx_icache_rsp.core_wb_req_rd),
|
||||
.core_wb_req_wb (vx_icache_rsp.core_wb_req_wb),
|
||||
.core_wb_warp_num (vx_icache_rsp.core_wb_warp_num),
|
||||
.core_wb_readdata (vx_icache_rsp.core_wb_readdata),
|
||||
.core_wb_pc (vx_icache_rsp.core_wb_pc),
|
||||
.core_wb_valid (icache_rsp_if.core_wb_valid),
|
||||
.core_wb_req_rd (icache_rsp_if.core_wb_req_rd),
|
||||
.core_wb_req_wb (icache_rsp_if.core_wb_req_wb),
|
||||
.core_wb_warp_num (icache_rsp_if.core_wb_warp_num),
|
||||
.core_wb_readdata (icache_rsp_if.core_wb_readdata),
|
||||
.core_wb_pc (icache_rsp_if.core_wb_pc),
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
.core_wb_address (),
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
// DRAM response
|
||||
.dram_rsp_valid (vx_gpu_icache_dram_res.dram_rsp_valid),
|
||||
.dram_rsp_addr (vx_gpu_icache_dram_res.dram_rsp_addr),
|
||||
.dram_rsp_data (vx_gpu_icache_dram_res.dram_rsp_data),
|
||||
.dram_rsp_valid (gpu_icache_dram_res_if.dram_rsp_valid),
|
||||
.dram_rsp_addr (gpu_icache_dram_res_if.dram_rsp_addr),
|
||||
.dram_rsp_data (gpu_icache_dram_res_if.dram_rsp_data),
|
||||
|
||||
// DRAM accept response
|
||||
.dram_rsp_ready (vx_gpu_icache_dram_req.dram_rsp_ready),
|
||||
.dram_rsp_ready (gpu_icache_dram_req_if.dram_rsp_ready),
|
||||
|
||||
// DRAM Req
|
||||
.dram_req_read (vx_gpu_icache_dram_req.dram_req_read),
|
||||
.dram_req_write (vx_gpu_icache_dram_req.dram_req_write),
|
||||
.dram_req_addr (vx_gpu_icache_dram_req.dram_req_addr),
|
||||
.dram_req_data (vx_gpu_icache_dram_req.dram_req_data),
|
||||
.dram_req_full (vx_gpu_icache_dram_req.dram_req_full),
|
||||
.dram_req_read (gpu_icache_dram_req_if.dram_req_read),
|
||||
.dram_req_write (gpu_icache_dram_req_if.dram_req_write),
|
||||
.dram_req_addr (gpu_icache_dram_req_if.dram_req_addr),
|
||||
.dram_req_data (gpu_icache_dram_req_if.dram_req_data),
|
||||
.dram_req_full (gpu_icache_dram_req_if.dram_req_full),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req_valid (vx_gpu_icache_snp_req.snp_req_valid),
|
||||
.snp_req_addr (vx_gpu_icache_snp_req.snp_req_addr),
|
||||
.snp_req_full (vx_gpu_icache_snp_req.snp_req_full),
|
||||
.snp_req_valid (gpu_icache_snp_req_if.snp_req_valid),
|
||||
.snp_req_addr (gpu_icache_snp_req_if.snp_req_addr),
|
||||
.snp_req_full (gpu_icache_snp_req_if.snp_req_full),
|
||||
|
||||
// Snoop Forward
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_execute_unit (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
// Request
|
||||
VX_exec_unit_req_if vx_exec_unit_req,
|
||||
VX_exec_unit_req_if exec_unit_req_if,
|
||||
|
||||
// Output
|
||||
// Writeback
|
||||
VX_inst_exec_wb_if vx_inst_exec_wb,
|
||||
VX_inst_exec_wb_if inst_exec_wb_if,
|
||||
// JAL Response
|
||||
VX_jal_response_if vx_jal_rsp,
|
||||
VX_jal_response_if jal_rsp_if,
|
||||
// Branch Response
|
||||
VX_branch_response_if vx_branch_rsp,
|
||||
VX_branch_response_if branch_rsp_if,
|
||||
|
||||
input wire no_slot_exec,
|
||||
output wire out_delay
|
||||
input wire no_slot_exec,
|
||||
output wire out_delay
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] in_a_reg_data;
|
||||
|
@ -31,23 +31,23 @@ module VX_execute_unit (
|
|||
wire[31:0] in_jal_offset;
|
||||
wire[31:0] in_curr_PC;
|
||||
|
||||
assign in_a_reg_data = vx_exec_unit_req.a_reg_data;
|
||||
assign in_b_reg_data = vx_exec_unit_req.b_reg_data;
|
||||
assign in_alu_op = vx_exec_unit_req.alu_op;
|
||||
assign in_rs2_src = vx_exec_unit_req.rs2_src;
|
||||
assign in_itype_immed = vx_exec_unit_req.itype_immed;
|
||||
assign in_branch_type = vx_exec_unit_req.branch_type;
|
||||
assign in_upper_immed = vx_exec_unit_req.upper_immed;
|
||||
assign in_jal = vx_exec_unit_req.jal;
|
||||
assign in_jal_offset = vx_exec_unit_req.jal_offset;
|
||||
assign in_curr_PC = vx_exec_unit_req.curr_PC;
|
||||
assign in_a_reg_data = exec_unit_req_if.a_reg_data;
|
||||
assign in_b_reg_data = exec_unit_req_if.b_reg_data;
|
||||
assign in_alu_op = exec_unit_req_if.alu_op;
|
||||
assign in_rs2_src = exec_unit_req_if.rs2_src;
|
||||
assign in_itype_immed = exec_unit_req_if.itype_immed;
|
||||
assign in_branch_type = exec_unit_req_if.branch_type;
|
||||
assign in_upper_immed = exec_unit_req_if.upper_immed;
|
||||
assign in_jal = exec_unit_req_if.jal;
|
||||
assign in_jal_offset = exec_unit_req_if.jal_offset;
|
||||
assign in_curr_PC = exec_unit_req_if.curr_PC;
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire[`NUM_THREADS-1:0] alu_stall;
|
||||
genvar index_out_reg;
|
||||
generate
|
||||
for (index_out_reg = 0; index_out_reg < `NUM_THREADS; index_out_reg = index_out_reg + 1) begin : alu_defs
|
||||
VX_alu vx_alu(
|
||||
VX_alu alu(
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
// .in_reg_data (in_reg_data[1:0]),
|
||||
|
@ -77,17 +77,17 @@ module VX_execute_unit (
|
|||
VX_generic_priority_encoder #(
|
||||
.N(`NUM_THREADS)
|
||||
) choose_alu_result (
|
||||
.valids(vx_exec_unit_req.valid),
|
||||
.valids(exec_unit_req_if.valid),
|
||||
.index (jal_branch_use_index),
|
||||
.found (jal_branch_found_valid)
|
||||
);
|
||||
);
|
||||
|
||||
wire[31:0] branch_use_alu_result = alu_result[jal_branch_use_index];
|
||||
|
||||
reg temp_branch_dir;
|
||||
always @(*)
|
||||
begin
|
||||
case (vx_exec_unit_req.branch_type)
|
||||
case (exec_unit_req_if.branch_type)
|
||||
`BEQ: temp_branch_dir = (branch_use_alu_result == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`BNE: temp_branch_dir = (branch_use_alu_result == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BLT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
|
@ -104,35 +104,35 @@ module VX_execute_unit (
|
|||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup
|
||||
assign duplicate_PC_data[i] = vx_exec_unit_req.PC_next;
|
||||
assign duplicate_PC_data[i] = exec_unit_req_if.PC_next;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
// VX_inst_exec_wb_if vx_inst_exec_wb_temp();
|
||||
// VX_inst_exec_wb_if inst_exec_wb_temp_if();
|
||||
// JAL Response
|
||||
VX_jal_response_if vx_jal_rsp_temp();
|
||||
VX_jal_response_if jal_rsp_temp_if();
|
||||
// Branch Response
|
||||
VX_branch_response_if vx_branch_rsp_temp();
|
||||
VX_branch_response_if branch_rsp_temp_if();
|
||||
|
||||
// Actual Writeback
|
||||
assign vx_inst_exec_wb.rd = vx_exec_unit_req.rd;
|
||||
assign vx_inst_exec_wb.wb = vx_exec_unit_req.wb;
|
||||
assign vx_inst_exec_wb.wb_valid = vx_exec_unit_req.valid & {`NUM_THREADS{!internal_stall}};
|
||||
assign vx_inst_exec_wb.wb_warp_num = vx_exec_unit_req.warp_num;
|
||||
assign vx_inst_exec_wb.alu_result = vx_exec_unit_req.jal ? duplicate_PC_data : alu_result;
|
||||
assign inst_exec_wb_if.rd = exec_unit_req_if.rd;
|
||||
assign inst_exec_wb_if.wb = exec_unit_req_if.wb;
|
||||
assign inst_exec_wb_if.wb_valid = exec_unit_req_if.valid & {`NUM_THREADS{!internal_stall}};
|
||||
assign inst_exec_wb_if.wb_warp_num = exec_unit_req_if.warp_num;
|
||||
assign inst_exec_wb_if.alu_result = exec_unit_req_if.jal ? duplicate_PC_data : alu_result;
|
||||
|
||||
assign vx_inst_exec_wb.exec_wb_pc = in_curr_PC;
|
||||
assign inst_exec_wb_if.exec_wb_pc = in_curr_PC;
|
||||
// Jal rsp
|
||||
assign vx_jal_rsp_temp.jal = in_jal;
|
||||
assign vx_jal_rsp_temp.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset);
|
||||
assign vx_jal_rsp_temp.jal_warp_num = vx_exec_unit_req.warp_num;
|
||||
assign jal_rsp_temp_if.jal = in_jal;
|
||||
assign jal_rsp_temp_if.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset);
|
||||
assign jal_rsp_temp_if.jal_warp_num = exec_unit_req_if.warp_num;
|
||||
|
||||
// Branch rsp
|
||||
assign vx_branch_rsp_temp.valid_branch = (vx_exec_unit_req.branch_type != `NO_BRANCH) && (|vx_exec_unit_req.valid);
|
||||
assign vx_branch_rsp_temp.branch_dir = temp_branch_dir;
|
||||
assign vx_branch_rsp_temp.branch_warp_num = vx_exec_unit_req.warp_num;
|
||||
assign vx_branch_rsp_temp.branch_dest = $signed(vx_exec_unit_req.curr_PC) + ($signed(vx_exec_unit_req.itype_immed) << 1); // itype_immed = branch_offset
|
||||
assign branch_rsp_temp_if.valid_branch = (exec_unit_req_if.branch_type != `NO_BRANCH) && (|exec_unit_req_if.valid);
|
||||
assign branch_rsp_temp_if.branch_dir = temp_branch_dir;
|
||||
assign branch_rsp_temp_if.branch_warp_num = exec_unit_req_if.warp_num;
|
||||
assign branch_rsp_temp_if.branch_dest = $signed(exec_unit_req_if.curr_PC) + ($signed(exec_unit_req_if.itype_immed) << 1); // itype_immed = branch_offset
|
||||
|
||||
|
||||
wire zero = 0;
|
||||
|
@ -142,8 +142,8 @@ module VX_execute_unit (
|
|||
// .reset(reset),
|
||||
// .stall(zero),
|
||||
// .flush(zero),
|
||||
// .in ({vx_inst_exec_wb_temp.rd, vx_inst_exec_wb_temp.wb, vx_inst_exec_wb_temp.wb_valid, vx_inst_exec_wb_temp.wb_warp_num, vx_inst_exec_wb_temp.alu_result, vx_inst_exec_wb_temp.exec_wb_pc}),
|
||||
// .out ({vx_inst_exec_wb.rd , vx_inst_exec_wb.wb , vx_inst_exec_wb.wb_valid , vx_inst_exec_wb.wb_warp_num , vx_inst_exec_wb.alu_result , vx_inst_exec_wb.exec_wb_pc })
|
||||
// .in ({inst_exec_wb_temp_if.rd, inst_exec_wb_temp_if.wb, inst_exec_wb_temp_if.wb_valid, inst_exec_wb_temp_if.wb_warp_num, inst_exec_wb_temp_if.alu_result, inst_exec_wb_temp_if.exec_wb_pc}),
|
||||
// .out ({inst_exec_wb_if.rd , inst_exec_wb_if.wb , inst_exec_wb_if.wb_valid , inst_exec_wb_if.wb_warp_num , inst_exec_wb_if.alu_result , inst_exec_wb_if.exec_wb_pc })
|
||||
// );
|
||||
|
||||
VX_generic_register #(
|
||||
|
@ -153,8 +153,8 @@ module VX_execute_unit (
|
|||
.reset(reset),
|
||||
.stall(zero),
|
||||
.flush(zero),
|
||||
.in ({vx_jal_rsp_temp.jal, vx_jal_rsp_temp.jal_dest, vx_jal_rsp_temp.jal_warp_num}),
|
||||
.out ({vx_jal_rsp.jal , vx_jal_rsp.jal_dest , vx_jal_rsp.jal_warp_num})
|
||||
.in ({jal_rsp_temp_if.jal, jal_rsp_temp_if.jal_dest, jal_rsp_temp_if.jal_warp_num}),
|
||||
.out ({jal_rsp_if.jal , jal_rsp_if.jal_dest , jal_rsp_if.jal_warp_num})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
|
@ -164,8 +164,8 @@ module VX_execute_unit (
|
|||
.reset(reset),
|
||||
.stall(zero),
|
||||
.flush(zero),
|
||||
.in ({vx_branch_rsp_temp.valid_branch, vx_branch_rsp_temp.branch_dir, vx_branch_rsp_temp.branch_warp_num, vx_branch_rsp_temp.branch_dest}),
|
||||
.out ({vx_branch_rsp.valid_branch , vx_branch_rsp.branch_dir , vx_branch_rsp.branch_warp_num , vx_branch_rsp.branch_dest })
|
||||
.in ({branch_rsp_temp_if.valid_branch, branch_rsp_temp_if.branch_dir, branch_rsp_temp_if.branch_warp_num, branch_rsp_temp_if.branch_dest}),
|
||||
.out ({branch_rsp_if.valid_branch , branch_rsp_if.branch_dir , branch_rsp_if.branch_warp_num , branch_rsp_if.branch_dest })
|
||||
);
|
||||
|
||||
// always @(*) begin
|
||||
|
@ -178,7 +178,7 @@ module VX_execute_unit (
|
|||
|
||||
// end
|
||||
|
||||
// assign out_is_csr = vx_exec_unit_req.is_csr;
|
||||
// assign out_csr_address = vx_exec_unit_req.csr_address;
|
||||
// assign out_is_csr = exec_unit_req_if.is_csr;
|
||||
// assign out_csr_address = exec_unit_req_if.csr_address;
|
||||
|
||||
endmodule : VX_execute_unit
|
|
@ -1,20 +1,20 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fetch (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_wstall_if vx_wstall,
|
||||
VX_join_if vx_join,
|
||||
input wire schedule_delay,
|
||||
input wire icache_stage_delay,
|
||||
input wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
input wire[`NUM_THREADS-1:0] icache_stage_valids,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if,
|
||||
input wire schedule_delay,
|
||||
input wire icache_stage_delay,
|
||||
input wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
input wire[`NUM_THREADS-1:0] icache_stage_valids,
|
||||
|
||||
output wire out_ebreak,
|
||||
VX_jal_response_if vx_jal_rsp,
|
||||
VX_branch_response_if vx_branch_rsp,
|
||||
output wire out_ebreak,
|
||||
VX_jal_response_if jal_rsp_if,
|
||||
VX_branch_response_if branch_rsp_if,
|
||||
VX_inst_meta_if fe_inst_meta_fi,
|
||||
VX_warp_ctl_if vx_warp_ctl
|
||||
VX_warp_ctl_if warp_ctl_if
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0] thread_mask;
|
||||
|
@ -22,15 +22,12 @@ module VX_fetch (
|
|||
wire[31:0] warp_pc;
|
||||
wire scheduled_warp;
|
||||
|
||||
|
||||
wire pipe_stall;
|
||||
|
||||
|
||||
// Only reason this is there is because there is a hidden assumption that decode is exactly after fetch
|
||||
|
||||
// Locals
|
||||
|
||||
|
||||
assign pipe_stall = schedule_delay || icache_stage_delay;
|
||||
|
||||
VX_warp_scheduler warp_scheduler(
|
||||
|
@ -38,52 +35,52 @@ module VX_fetch (
|
|||
.reset (reset),
|
||||
.stall (pipe_stall),
|
||||
|
||||
.is_barrier (vx_warp_ctl.is_barrier),
|
||||
.barrier_id (vx_warp_ctl.barrier_id),
|
||||
.num_warps (vx_warp_ctl.num_warps),
|
||||
.barrier_warp_num (vx_warp_ctl.warp_num),
|
||||
.is_barrier (warp_ctl_if.is_barrier),
|
||||
.barrier_id (warp_ctl_if.barrier_id),
|
||||
.num_warps (warp_ctl_if.num_warps),
|
||||
.barrier_warp_num (warp_ctl_if.warp_num),
|
||||
|
||||
// Wspawn
|
||||
.wspawn (vx_warp_ctl.wspawn),
|
||||
.wsapwn_pc (vx_warp_ctl.wspawn_pc),
|
||||
.wspawn_new_active(vx_warp_ctl.wspawn_new_active),
|
||||
.wspawn (warp_ctl_if.wspawn),
|
||||
.wsapwn_pc (warp_ctl_if.wspawn_pc),
|
||||
.wspawn_new_active(warp_ctl_if.wspawn_new_active),
|
||||
// CTM
|
||||
.ctm (vx_warp_ctl.change_mask),
|
||||
.ctm_mask (vx_warp_ctl.thread_mask),
|
||||
.ctm_warp_num (vx_warp_ctl.warp_num),
|
||||
.ctm (warp_ctl_if.change_mask),
|
||||
.ctm_mask (warp_ctl_if.thread_mask),
|
||||
.ctm_warp_num (warp_ctl_if.warp_num),
|
||||
// WHALT
|
||||
.whalt (vx_warp_ctl.ebreak),
|
||||
.whalt_warp_num (vx_warp_ctl.warp_num),
|
||||
.whalt (warp_ctl_if.ebreak),
|
||||
.whalt_warp_num (warp_ctl_if.warp_num),
|
||||
// Wstall
|
||||
.wstall (vx_wstall.wstall),
|
||||
.wstall_warp_num (vx_wstall.warp_num),
|
||||
.wstall (wstall_if.wstall),
|
||||
.wstall_warp_num (wstall_if.warp_num),
|
||||
|
||||
// Lock/release Stuff
|
||||
.icache_stage_valids(icache_stage_valids),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
|
||||
// Join
|
||||
.is_join (vx_join.is_join),
|
||||
.join_warp_num (vx_join.join_warp_num),
|
||||
.is_join (join_if.is_join),
|
||||
.join_warp_num (join_if.join_warp_num),
|
||||
|
||||
// Split
|
||||
.is_split (vx_warp_ctl.is_split),
|
||||
.dont_split (vx_warp_ctl.dont_split),
|
||||
.split_new_mask (vx_warp_ctl.split_new_mask),
|
||||
.split_later_mask (vx_warp_ctl.split_later_mask),
|
||||
.split_save_pc (vx_warp_ctl.split_save_pc),
|
||||
.split_warp_num (vx_warp_ctl.warp_num),
|
||||
.is_split (warp_ctl_if.is_split),
|
||||
.dont_split (warp_ctl_if.dont_split),
|
||||
.split_new_mask (warp_ctl_if.split_new_mask),
|
||||
.split_later_mask (warp_ctl_if.split_later_mask),
|
||||
.split_save_pc (warp_ctl_if.split_save_pc),
|
||||
.split_warp_num (warp_ctl_if.warp_num),
|
||||
|
||||
// JAL
|
||||
.jal (vx_jal_rsp.jal),
|
||||
.jal_dest (vx_jal_rsp.jal_dest),
|
||||
.jal_warp_num (vx_jal_rsp.jal_warp_num),
|
||||
.jal (jal_rsp_if.jal),
|
||||
.jal_dest (jal_rsp_if.jal_dest),
|
||||
.jal_warp_num (jal_rsp_if.jal_warp_num),
|
||||
|
||||
// Branch
|
||||
.branch_valid (vx_branch_rsp.valid_branch),
|
||||
.branch_dir (vx_branch_rsp.branch_dir),
|
||||
.branch_dest (vx_branch_rsp.branch_dest),
|
||||
.branch_warp_num (vx_branch_rsp.branch_warp_num),
|
||||
.branch_valid (branch_rsp_if.valid_branch),
|
||||
.branch_dir (branch_rsp_if.branch_dir),
|
||||
.branch_dest (branch_rsp_if.branch_dest),
|
||||
.branch_warp_num (branch_rsp_if.branch_warp_num),
|
||||
|
||||
// Outputs
|
||||
.thread_mask (thread_mask),
|
||||
|
|
|
@ -6,73 +6,68 @@ module VX_front_end (
|
|||
|
||||
input wire schedule_delay,
|
||||
|
||||
VX_warp_ctl_if vx_warp_ctl,
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
|
||||
VX_gpu_dcache_rsp_if vx_icache_rsp,
|
||||
VX_gpu_dcache_req_if vx_icache_req,
|
||||
VX_gpu_dcache_rsp_if icache_rsp_if,
|
||||
VX_gpu_dcache_req_if icache_req_if,
|
||||
|
||||
VX_jal_response_if vx_jal_rsp,
|
||||
VX_branch_response_if vx_branch_rsp,
|
||||
VX_jal_response_if jal_rsp_if,
|
||||
VX_branch_response_if branch_rsp_if,
|
||||
|
||||
VX_frE_to_bckE_req_if vx_bckE_req,
|
||||
VX_frE_to_bckE_req_if bckE_req_if,
|
||||
|
||||
output wire fetch_ebreak
|
||||
);
|
||||
|
||||
VX_inst_meta_if fe_inst_meta_fi();
|
||||
VX_inst_meta_if fe_inst_meta_fi2();
|
||||
VX_inst_meta_if fe_inst_meta_id();
|
||||
|
||||
VX_inst_meta_if fe_inst_meta_fi();
|
||||
VX_inst_meta_if fe_inst_meta_fi2();
|
||||
VX_inst_meta_if fe_inst_meta_id();
|
||||
VX_frE_to_bckE_req_if frE_to_bckE_req_if();
|
||||
VX_inst_meta_if fd_inst_meta_de();
|
||||
|
||||
VX_frE_to_bckE_req_if vx_frE_to_bckE_req();
|
||||
VX_inst_meta_if fd_inst_meta_de();
|
||||
wire total_freeze = schedule_delay;
|
||||
wire icache_stage_delay;
|
||||
|
||||
wire total_freeze = schedule_delay;
|
||||
wire icache_stage_delay;
|
||||
wire vortex_ebreak;
|
||||
wire terminate_sim;
|
||||
|
||||
wire vortex_ebreak;
|
||||
wire terminate_sim;
|
||||
wire[`NW_BITS-1:0] icache_stage_wid;
|
||||
wire[`NUM_THREADS-1:0] icache_stage_valids;
|
||||
|
||||
wire[`NW_BITS-1:0] icache_stage_wid;
|
||||
wire[`NUM_THREADS-1:0] icache_stage_valids;
|
||||
|
||||
reg old_ebreak; // This should be eventually removed
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
old_ebreak <= 0;
|
||||
end else begin
|
||||
old_ebreak <= old_ebreak || fetch_ebreak;
|
||||
reg old_ebreak; // This should be eventually removed
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
old_ebreak <= 0;
|
||||
end else begin
|
||||
old_ebreak <= old_ebreak || fetch_ebreak;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign fetch_ebreak = vortex_ebreak || terminate_sim || old_ebreak;
|
||||
assign fetch_ebreak = vortex_ebreak || terminate_sim || old_ebreak;
|
||||
|
||||
VX_wstall_if wstall_if();
|
||||
VX_join_if join_if();
|
||||
|
||||
VX_wstall_if vx_wstall();
|
||||
VX_join_if vx_join();
|
||||
|
||||
VX_fetch vx_fetch(
|
||||
VX_fetch fetch(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
.icache_stage_valids(icache_stage_valids),
|
||||
.vx_wstall (vx_wstall),
|
||||
.vx_join (vx_join),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.vx_jal_rsp (vx_jal_rsp),
|
||||
.vx_warp_ctl (vx_warp_ctl),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.icache_stage_delay (icache_stage_delay),
|
||||
.vx_branch_rsp (vx_branch_rsp),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.out_ebreak (vortex_ebreak), // fetch_ebreak
|
||||
.fe_inst_meta_fi (fe_inst_meta_fi)
|
||||
);
|
||||
|
||||
wire freeze_fi_reg = total_freeze || icache_stage_delay;
|
||||
wire freeze_fi_reg = total_freeze || icache_stage_delay;
|
||||
|
||||
|
||||
|
||||
|
||||
VX_f_d_reg vx_f_i_reg(
|
||||
VX_f_d_reg f_i_reg(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_freeze (freeze_fi_reg),
|
||||
|
@ -80,46 +75,46 @@ VX_f_d_reg vx_f_i_reg(
|
|||
.fd_inst_meta_de(fe_inst_meta_fi2)
|
||||
);
|
||||
|
||||
VX_icache_stage vx_icache_stage(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.total_freeze (total_freeze),
|
||||
.icache_stage_delay (icache_stage_delay),
|
||||
.icache_stage_valids(icache_stage_valids),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
.fe_inst_meta_fi (fe_inst_meta_fi2),
|
||||
.fe_inst_meta_id (fe_inst_meta_id),
|
||||
.vx_icache_rsp (vx_icache_rsp),
|
||||
.vx_icache_req (vx_icache_req)
|
||||
VX_icache_stage icache_stage(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.total_freeze (total_freeze),
|
||||
.icache_stage_delay (icache_stage_delay),
|
||||
.icache_stage_valids(icache_stage_valids),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
.fe_inst_meta_fi (fe_inst_meta_fi2),
|
||||
.fe_inst_meta_id (fe_inst_meta_id),
|
||||
.icache_rsp_if (icache_rsp_if),
|
||||
.icache_req_if (icache_req_if)
|
||||
);
|
||||
|
||||
|
||||
VX_i_d_reg vx_i_d_reg(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_freeze (total_freeze),
|
||||
.fe_inst_meta_fd(fe_inst_meta_id),
|
||||
.fd_inst_meta_de(fd_inst_meta_de)
|
||||
VX_i_d_reg i_d_reg(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_freeze (total_freeze),
|
||||
.fe_inst_meta_fd (fe_inst_meta_id),
|
||||
.fd_inst_meta_de (fd_inst_meta_de)
|
||||
);
|
||||
|
||||
|
||||
VX_decode vx_decode(
|
||||
.fd_inst_meta_de (fd_inst_meta_de),
|
||||
.vx_frE_to_bckE_req(vx_frE_to_bckE_req),
|
||||
.vx_wstall (vx_wstall),
|
||||
.vx_join (vx_join),
|
||||
.terminate_sim (terminate_sim)
|
||||
VX_decode decode(
|
||||
.fd_inst_meta_de (fd_inst_meta_de),
|
||||
.frE_to_bckE_req_if (frE_to_bckE_req_if),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if),
|
||||
.terminate_sim (terminate_sim)
|
||||
);
|
||||
|
||||
wire no_br_stall = 0;
|
||||
wire no_br_stall = 0;
|
||||
|
||||
VX_d_e_reg vx_d_e_reg(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_branch_stall(no_br_stall),
|
||||
.in_freeze (total_freeze),
|
||||
.vx_frE_to_bckE_req(vx_frE_to_bckE_req),
|
||||
.vx_bckE_req (vx_bckE_req)
|
||||
VX_d_e_reg d_e_reg(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_branch_stall (no_br_stall),
|
||||
.in_freeze (total_freeze),
|
||||
.frE_to_bckE_req_if (frE_to_bckE_req_if),
|
||||
.bckE_req_if (bckE_req_if)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -2,57 +2,57 @@
|
|||
|
||||
module VX_gpgpu_inst (
|
||||
// Input
|
||||
VX_gpu_inst_req_if vx_gpu_inst_req,
|
||||
VX_gpu_inst_req_if gpu_inst_req_if,
|
||||
|
||||
// Output
|
||||
VX_warp_ctl_if vx_warp_ctl
|
||||
VX_warp_ctl_if warp_ctl_if
|
||||
);
|
||||
wire[`NUM_THREADS-1:0] curr_valids = vx_gpu_inst_req.valid;
|
||||
wire is_split = (vx_gpu_inst_req.is_split);
|
||||
wire[`NUM_THREADS-1:0] curr_valids = gpu_inst_req_if.valid;
|
||||
wire is_split = (gpu_inst_req_if.is_split);
|
||||
|
||||
wire[`NUM_THREADS-1:0] tmc_new_mask;
|
||||
wire all_threads = `NUM_THREADS < vx_gpu_inst_req.a_reg_data[0];
|
||||
wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0];
|
||||
|
||||
genvar curr_t;
|
||||
generate
|
||||
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init
|
||||
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < vx_gpu_inst_req.a_reg_data[0];
|
||||
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < gpu_inst_req_if.a_reg_data[0];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire valid_inst = (|curr_valids);
|
||||
|
||||
assign vx_warp_ctl.warp_num = vx_gpu_inst_req.warp_num;
|
||||
assign vx_warp_ctl.change_mask = (vx_gpu_inst_req.is_tmc) && valid_inst;
|
||||
assign vx_warp_ctl.thread_mask = vx_gpu_inst_req.is_tmc ? tmc_new_mask : 0;
|
||||
assign warp_ctl_if.warp_num = gpu_inst_req_if.warp_num;
|
||||
assign warp_ctl_if.change_mask = (gpu_inst_req_if.is_tmc) && valid_inst;
|
||||
assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0;
|
||||
|
||||
// assign vx_warp_ctl.ebreak = (vx_gpu_inst_req.a_reg_data[0] == 0) && valid_inst;
|
||||
assign vx_warp_ctl.ebreak = vx_warp_ctl.change_mask && (vx_warp_ctl.thread_mask == 0);
|
||||
// assign warp_ctl_if.ebreak = (gpu_inst_req_if.a_reg_data[0] == 0) && valid_inst;
|
||||
assign warp_ctl_if.ebreak = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0);
|
||||
|
||||
wire wspawn = vx_gpu_inst_req.is_wspawn;
|
||||
wire[31:0] wspawn_pc = vx_gpu_inst_req.rd2;
|
||||
wire all_active = `NUM_WARPS < vx_gpu_inst_req.a_reg_data[0];
|
||||
wire wspawn = gpu_inst_req_if.is_wspawn;
|
||||
wire[31:0] wspawn_pc = gpu_inst_req_if.rd2;
|
||||
wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0];
|
||||
wire[`NUM_WARPS-1:0] wspawn_new_active;
|
||||
|
||||
genvar curr_w;
|
||||
generate
|
||||
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init
|
||||
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < vx_gpu_inst_req.a_reg_data[0];
|
||||
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < gpu_inst_req_if.a_reg_data[0];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign vx_warp_ctl.is_barrier = vx_gpu_inst_req.is_barrier && valid_inst;
|
||||
assign vx_warp_ctl.barrier_id = vx_gpu_inst_req.a_reg_data[0];
|
||||
assign warp_ctl_if.is_barrier = gpu_inst_req_if.is_barrier && valid_inst;
|
||||
assign warp_ctl_if.barrier_id = gpu_inst_req_if.a_reg_data[0];
|
||||
|
||||
`DEBUG_BEGIN
|
||||
wire[31:0] num_warps_m1 = vx_gpu_inst_req.rd2 - 1;
|
||||
wire[31:0] num_warps_m1 = gpu_inst_req_if.rd2 - 1;
|
||||
`DEBUG_END
|
||||
|
||||
assign vx_warp_ctl.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0];
|
||||
assign warp_ctl_if.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0];
|
||||
|
||||
assign vx_warp_ctl.wspawn = wspawn;
|
||||
assign vx_warp_ctl.wspawn_pc = wspawn_pc;
|
||||
assign vx_warp_ctl.wspawn_new_active = wspawn_new_active;
|
||||
assign warp_ctl_if.wspawn = wspawn;
|
||||
assign warp_ctl_if.wspawn_pc = wspawn_pc;
|
||||
assign warp_ctl_if.wspawn_new_active = wspawn_new_active;
|
||||
|
||||
wire[`NUM_THREADS-1:0] split_new_use_mask;
|
||||
wire[`NUM_THREADS-1:0] split_new_later_mask;
|
||||
|
@ -61,7 +61,7 @@ module VX_gpgpu_inst (
|
|||
genvar curr_s_t;
|
||||
generate
|
||||
for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init
|
||||
wire curr_bool = (vx_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1);
|
||||
wire curr_bool = (gpu_inst_req_if.a_reg_data[curr_s_t] == 32'b1);
|
||||
|
||||
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
|
||||
assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool);
|
||||
|
@ -79,15 +79,15 @@ module VX_gpgpu_inst (
|
|||
|
||||
// wire[`NW_BITS-1:0] num_valids = $countones(curr_valids);
|
||||
|
||||
assign vx_warp_ctl.is_split = is_split && (num_valids > 1);
|
||||
assign vx_warp_ctl.dont_split = vx_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
|
||||
assign vx_warp_ctl.split_new_mask = split_new_use_mask;
|
||||
assign vx_warp_ctl.split_later_mask = split_new_later_mask;
|
||||
assign vx_warp_ctl.split_save_pc = vx_gpu_inst_req.pc_next;
|
||||
assign vx_warp_ctl.split_warp_num = vx_gpu_inst_req.warp_num;
|
||||
assign warp_ctl_if.is_split = is_split && (num_valids > 1);
|
||||
assign warp_ctl_if.dont_split = warp_ctl_if.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
|
||||
assign warp_ctl_if.split_new_mask = split_new_use_mask;
|
||||
assign warp_ctl_if.split_later_mask = split_new_later_mask;
|
||||
assign warp_ctl_if.split_save_pc = gpu_inst_req_if.pc_next;
|
||||
assign warp_ctl_if.split_warp_num = gpu_inst_req_if.warp_num;
|
||||
|
||||
// vx_gpu_inst_req.is_wspawn
|
||||
// vx_gpu_inst_req.is_split
|
||||
// vx_gpu_inst_req.is_barrier
|
||||
// gpu_inst_req_if.is_wspawn
|
||||
// gpu_inst_req_if.is_split
|
||||
// gpu_inst_req_if.is_barrier
|
||||
|
||||
endmodule
|
|
@ -4,8 +4,8 @@ module VX_gpr (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid_write_request,
|
||||
VX_gpr_read_if vx_gpr_read,
|
||||
VX_wb_if vx_writeback_if,
|
||||
VX_gpr_read_if gpr_read_if,
|
||||
VX_wb_if writeback_if,
|
||||
|
||||
output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] out_a_reg_data,
|
||||
output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] out_b_reg_data
|
||||
|
@ -13,36 +13,36 @@ module VX_gpr (
|
|||
wire write_enable;
|
||||
|
||||
`ifndef ASIC
|
||||
assign write_enable = valid_write_request && ((vx_writeback_if.wb != 0)) && (vx_writeback_if.rd != 0);
|
||||
assign write_enable = valid_write_request && ((writeback_if.wb != 0)) && (writeback_if.rd != 0);
|
||||
|
||||
byte_enabled_simple_dual_port_ram first_ram(
|
||||
.we (write_enable),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.waddr (vx_writeback_if.rd),
|
||||
.raddr1(vx_gpr_read.rs1),
|
||||
.raddr2(vx_gpr_read.rs2),
|
||||
.be (vx_writeback_if.wb_valid),
|
||||
.wdata (vx_writeback_if.write_data),
|
||||
.waddr (writeback_if.rd),
|
||||
.raddr1(gpr_read_if.rs1),
|
||||
.raddr2(gpr_read_if.rs2),
|
||||
.be (writeback_if.wb_valid),
|
||||
.wdata (writeback_if.write_data),
|
||||
.q1 (out_a_reg_data),
|
||||
.q2 (out_b_reg_data)
|
||||
);
|
||||
`else
|
||||
assign write_enable = valid_write_request && ((vx_writeback_if.wb != 0));
|
||||
wire going_to_write = write_enable & (|vx_writeback_if.wb_valid);
|
||||
assign write_enable = valid_write_request && ((writeback_if.wb != 0));
|
||||
wire going_to_write = write_enable & (|writeback_if.wb_valid);
|
||||
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
|
||||
|
||||
genvar curr_t;
|
||||
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin
|
||||
wire local_write = write_enable & vx_writeback_if.wb_valid[curr_t];
|
||||
wire local_write = write_enable & writeback_if.wb_valid[curr_t];
|
||||
assign write_bit_mask[curr_t] = {`NUM_GPRS{~local_write}};
|
||||
end
|
||||
|
||||
// wire cenb = !going_to_write;
|
||||
wire cenb = 0;
|
||||
|
||||
// wire cena_1 = (vx_gpr_read.rs1 == 0);
|
||||
// wire cena_2 = (vx_gpr_read.rs2 == 0);
|
||||
// wire cena_1 = (gpr_read_if.rs1 == 0);
|
||||
// wire cena_2 = (gpr_read_if.rs2 == 0);
|
||||
wire cena_1 = 0;
|
||||
wire cena_2 = 0;
|
||||
|
||||
|
@ -65,7 +65,7 @@ module VX_gpr (
|
|||
assign out_b_reg_data = temp_b;
|
||||
`endif
|
||||
|
||||
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (vx_writeback_if.rd != 0) ? vx_writeback_if.write_data : 0;
|
||||
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (writeback_if.rd != 0) ? writeback_if.write_data : 0;
|
||||
|
||||
genvar curr_base_thread;
|
||||
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)
|
||||
|
@ -82,11 +82,11 @@ module VX_gpr (
|
|||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_1),
|
||||
.AA(vx_gpr_read.rs1[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AA(gpr_read_if.rs1[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(vx_writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
|
@ -121,11 +121,11 @@ module VX_gpr (
|
|||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_2),
|
||||
.AA(vx_gpr_read.rs2[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AA(gpr_read_if.rs2[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(vx_writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
|
|
|
@ -12,72 +12,72 @@ module VX_gpr_stage (
|
|||
|
||||
// inputs
|
||||
// Instruction Information
|
||||
VX_frE_to_bckE_req_if vx_bckE_req,
|
||||
VX_frE_to_bckE_req_if bckE_req_if,
|
||||
|
||||
// WriteBack inputs
|
||||
VX_wb_if vx_writeback_if,
|
||||
VX_wb_if writeback_if,
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_if vx_exec_unit_req,
|
||||
VX_lsu_req_if vx_lsu_req,
|
||||
VX_gpu_inst_req_if vx_gpu_inst_req,
|
||||
VX_csr_req_if vx_csr_req
|
||||
VX_exec_unit_req_if exec_unit_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_gpu_inst_req_if gpu_inst_req_if,
|
||||
VX_csr_req_if csr_req_if
|
||||
);
|
||||
`DEBUG_BEGIN
|
||||
wire[31:0] curr_PC = vx_bckE_req.curr_PC;
|
||||
wire[2:0] branchType = vx_bckE_req.branch_type;
|
||||
wire is_store = (vx_bckE_req.mem_write != `NO_MEM_WRITE);
|
||||
wire is_load = (vx_bckE_req.mem_read != `NO_MEM_READ);
|
||||
wire jalQual = vx_bckE_req.jalQual;
|
||||
wire[31:0] curr_PC = bckE_req_if.curr_PC;
|
||||
wire[2:0] branchType = bckE_req_if.branch_type;
|
||||
wire is_store = (bckE_req_if.mem_write != `NO_MEM_WRITE);
|
||||
wire is_load = (bckE_req_if.mem_read != `NO_MEM_READ);
|
||||
wire jalQual = bckE_req_if.jalQual;
|
||||
`DEBUG_END
|
||||
|
||||
VX_gpr_read_if vx_gpr_read();
|
||||
assign vx_gpr_read.rs1 = vx_bckE_req.rs1;
|
||||
assign vx_gpr_read.rs2 = vx_bckE_req.rs2;
|
||||
assign vx_gpr_read.warp_num = vx_bckE_req.warp_num;
|
||||
VX_gpr_read_if gpr_read_if();
|
||||
assign gpr_read_if.rs1 = bckE_req_if.rs1;
|
||||
assign gpr_read_if.rs2 = bckE_req_if.rs2;
|
||||
assign gpr_read_if.warp_num = bckE_req_if.warp_num;
|
||||
|
||||
`ifndef ASIC
|
||||
VX_gpr_jal_if vx_gpr_jal();
|
||||
assign vx_gpr_jal.is_jal = vx_bckE_req.jalQual;
|
||||
assign vx_gpr_jal.curr_PC = vx_bckE_req.curr_PC;
|
||||
VX_gpr_jal_if gpr_jal_if();
|
||||
assign gpr_jal_if.is_jal = bckE_req_if.jalQual;
|
||||
assign gpr_jal_if.curr_PC = bckE_req_if.curr_PC;
|
||||
`else
|
||||
VX_gpr_jal_if vx_gpr_jal();
|
||||
assign vx_gpr_jal.is_jal = vx_exec_unit_req.jalQual;
|
||||
assign vx_gpr_jal.curr_PC = vx_exec_unit_req.curr_PC;
|
||||
VX_gpr_jal_if gpr_jal_if();
|
||||
assign gpr_jal_if.is_jal = exec_unit_req_if.jalQual;
|
||||
assign gpr_jal_if.curr_PC = exec_unit_req_if.curr_PC;
|
||||
`endif
|
||||
|
||||
VX_gpr_data_if vx_gpr_datf();
|
||||
VX_gpr_data_if gpr_datf_if();
|
||||
|
||||
VX_gpr_wrapper vx_grp_wrapper (
|
||||
VX_gpr_wrapper grp_wrapper (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.vx_writeback_if(vx_writeback_if),
|
||||
.vx_gpr_read (vx_gpr_read),
|
||||
.vx_gpr_jal (vx_gpr_jal),
|
||||
.writeback_if(writeback_if),
|
||||
.gpr_read_if (gpr_read_if),
|
||||
.gpr_jal_if (gpr_jal_if),
|
||||
|
||||
.out_a_reg_data (vx_gpr_datf.a_reg_data),
|
||||
.out_b_reg_data (vx_gpr_datf.b_reg_data)
|
||||
.out_a_reg_data (gpr_datf_if.a_reg_data),
|
||||
.out_b_reg_data (gpr_datf_if.b_reg_data)
|
||||
);
|
||||
|
||||
// assign vx_bckE_req.is_csr = is_csr;
|
||||
// assign vx_bckE_req_out.csr_mask = (vx_bckE_req.sr_immed == 1'b1) ? {27'h0, vx_bckE_req.rs1} : vx_gpr_data.a_reg_data[0];
|
||||
// assign bckE_req_if.is_csr = is_csr;
|
||||
// assign bckE_req_out_if.csr_mask = (bckE_req_if.sr_immed == 1'b1) ? {27'h0, bckE_req_if.rs1} : gpr_data_if.a_reg_data[0];
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_if vx_exec_unit_req_temp();
|
||||
VX_lsu_req_if vx_lsu_req_temp();
|
||||
VX_gpu_inst_req_if vx_gpu_inst_req_temp();
|
||||
VX_csr_req_if vx_csr_req_temp();
|
||||
VX_exec_unit_req_if exec_unit_req_temp_if();
|
||||
VX_lsu_req_if lsu_req_temp_if();
|
||||
VX_gpu_inst_req_if gpu_inst_req_temp_if();
|
||||
VX_csr_req_if csr_req_temp_if();
|
||||
|
||||
VX_inst_multiplex vx_inst_mult(
|
||||
.vx_bckE_req (vx_bckE_req),
|
||||
.vx_gpr_data (vx_gpr_datf),
|
||||
.vx_exec_unit_req(vx_exec_unit_req_temp),
|
||||
.vx_lsu_req (vx_lsu_req_temp),
|
||||
.vx_gpu_inst_req (vx_gpu_inst_req_temp),
|
||||
.vx_csr_req (vx_csr_req_temp)
|
||||
VX_inst_multiplex inst_mult(
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.gpr_data_if (gpr_datf_if),
|
||||
.exec_unit_req_if(exec_unit_req_temp_if),
|
||||
.lsu_req_if (lsu_req_temp_if),
|
||||
.gpu_inst_req_if (gpu_inst_req_temp_if),
|
||||
.csr_req_if (csr_req_temp_if)
|
||||
);
|
||||
`DEBUG_BEGIN
|
||||
wire is_lsu = (|vx_lsu_req_temp.valid);
|
||||
wire is_lsu = (|lsu_req_temp_if.valid);
|
||||
`DEBUG_END
|
||||
wire stall_rest = 0;
|
||||
wire flush_rest = schedule_delay;
|
||||
|
@ -88,7 +88,7 @@ module VX_gpr_stage (
|
|||
wire stall_exec = exec_delay;
|
||||
wire flush_exec = schedule_delay && !stall_exec;
|
||||
|
||||
wire stall_csr = stall_gpr_csr && vx_bckE_req.is_csr && (|vx_bckE_req.valid);
|
||||
wire stall_csr = stall_gpr_csr && bckE_req_if.is_csr && (|bckE_req_if.valid);
|
||||
|
||||
assign gpr_stage_delay = stall_lsu || stall_exec || stall_csr;
|
||||
|
||||
|
@ -125,11 +125,11 @@ module VX_gpr_stage (
|
|||
.out ({temp_store_data, temp_base_address})
|
||||
);
|
||||
|
||||
assign real_store_data = vx_lsu_req_temp.store_data;
|
||||
assign real_base_address = vx_lsu_req_temp.base_address;
|
||||
assign real_store_data = lsu_req_temp_if.store_data;
|
||||
assign real_base_address = lsu_req_temp_if.base_address;
|
||||
|
||||
assign vx_lsu_req.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data;
|
||||
assign vx_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address;
|
||||
assign lsu_req_if.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data;
|
||||
assign lsu_req_if.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS))
|
||||
|
@ -138,8 +138,8 @@ module VX_gpr_stage (
|
|||
.reset(reset),
|
||||
.stall(stall_lsu),
|
||||
.flush(flush_lsu),
|
||||
.in ({vx_lsu_req_temp.valid, vx_lsu_req_temp.lsu_pc, vx_lsu_req_temp.warp_num, vx_lsu_req_temp.offset, vx_lsu_req_temp.mem_read, vx_lsu_req_temp.mem_write, vx_lsu_req_temp.rd, vx_lsu_req_temp.wb}),
|
||||
.out ({vx_lsu_req.valid , vx_lsu_req.lsu_pc ,vx_lsu_req.warp_num , vx_lsu_req.offset , vx_lsu_req.mem_read , vx_lsu_req.mem_write , vx_lsu_req.rd , vx_lsu_req.wb })
|
||||
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
|
||||
.out ({lsu_req_if.valid , lsu_req_if.lsu_pc ,lsu_req_if.warp_num , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
|
@ -149,12 +149,12 @@ module VX_gpr_stage (
|
|||
.reset(reset),
|
||||
.stall(stall_exec),
|
||||
.flush(flush_exec),
|
||||
.in ({vx_exec_unit_req_temp.valid, vx_exec_unit_req_temp.warp_num, vx_exec_unit_req_temp.curr_PC, vx_exec_unit_req_temp.PC_next, vx_exec_unit_req_temp.rd, vx_exec_unit_req_temp.wb, vx_exec_unit_req_temp.alu_op, vx_exec_unit_req_temp.rs1, vx_exec_unit_req_temp.rs2, vx_exec_unit_req_temp.rs2_src, vx_exec_unit_req_temp.itype_immed, vx_exec_unit_req_temp.upper_immed, vx_exec_unit_req_temp.branch_type, vx_exec_unit_req_temp.jalQual, vx_exec_unit_req_temp.jal, vx_exec_unit_req_temp.jal_offset, vx_exec_unit_req_temp.ebreak, vx_exec_unit_req_temp.wspawn, vx_exec_unit_req_temp.is_csr, vx_exec_unit_req_temp.csr_address, vx_exec_unit_req_temp.csr_immed, vx_exec_unit_req_temp.csr_mask}),
|
||||
.out ({vx_exec_unit_req.valid , vx_exec_unit_req.warp_num , vx_exec_unit_req.curr_PC , vx_exec_unit_req.PC_next , vx_exec_unit_req.rd , vx_exec_unit_req.wb , vx_exec_unit_req.alu_op , vx_exec_unit_req.rs1 , vx_exec_unit_req.rs2 , vx_exec_unit_req.rs2_src , vx_exec_unit_req.itype_immed , vx_exec_unit_req.upper_immed , vx_exec_unit_req.branch_type , vx_exec_unit_req.jalQual , vx_exec_unit_req.jal , vx_exec_unit_req.jal_offset , vx_exec_unit_req.ebreak , vx_exec_unit_req.wspawn , vx_exec_unit_req.is_csr , vx_exec_unit_req.csr_address , vx_exec_unit_req.csr_immed , vx_exec_unit_req.csr_mask })
|
||||
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
|
||||
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
|
||||
);
|
||||
|
||||
assign vx_exec_unit_req.a_reg_data = real_base_address;
|
||||
assign vx_exec_unit_req.b_reg_data = real_store_data;
|
||||
assign exec_unit_req_if.a_reg_data = real_base_address;
|
||||
assign exec_unit_req_if.b_reg_data = real_store_data;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(36 + `NW_BITS-1 + 1 + (`NUM_THREADS))
|
||||
|
@ -163,12 +163,12 @@ module VX_gpr_stage (
|
|||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.flush(flush_rest),
|
||||
.in ({vx_gpu_inst_req_temp.valid, vx_gpu_inst_req_temp.warp_num, vx_gpu_inst_req_temp.is_wspawn, vx_gpu_inst_req_temp.is_tmc, vx_gpu_inst_req_temp.is_split, vx_gpu_inst_req_temp.is_barrier, vx_gpu_inst_req_temp.pc_next}),
|
||||
.out ({vx_gpu_inst_req.valid , vx_gpu_inst_req.warp_num , vx_gpu_inst_req.is_wspawn , vx_gpu_inst_req.is_tmc , vx_gpu_inst_req.is_split , vx_gpu_inst_req.is_barrier , vx_gpu_inst_req.pc_next })
|
||||
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next}),
|
||||
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next })
|
||||
);
|
||||
|
||||
assign vx_gpu_inst_req.a_reg_data = real_base_address;
|
||||
assign vx_gpu_inst_req.rd2 = real_store_data;
|
||||
assign gpu_inst_req_if.a_reg_data = real_base_address;
|
||||
assign gpu_inst_req_if.rd2 = real_store_data;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)
|
||||
|
@ -177,8 +177,8 @@ module VX_gpr_stage (
|
|||
.reset(reset),
|
||||
.stall(stall_gpr_csr),
|
||||
.flush(flush_rest),
|
||||
.in ({vx_csr_req_temp.valid, vx_csr_req_temp.warp_num, vx_csr_req_temp.rd, vx_csr_req_temp.wb, vx_csr_req_temp.alu_op, vx_csr_req_temp.is_csr, vx_csr_req_temp.csr_address, vx_csr_req_temp.csr_immed, vx_csr_req_temp.csr_mask}),
|
||||
.out ({vx_csr_req.valid , vx_csr_req.warp_num , vx_csr_req.rd , vx_csr_req.wb , vx_csr_req.alu_op , vx_csr_req.is_csr , vx_csr_req.csr_address , vx_csr_req.csr_immed , vx_csr_req.csr_mask })
|
||||
.in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_address, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}),
|
||||
.out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_address , csr_req_if.csr_immed , csr_req_if.csr_mask })
|
||||
);
|
||||
|
||||
`else
|
||||
|
@ -191,8 +191,8 @@ module VX_gpr_stage (
|
|||
.reset(reset),
|
||||
.stall(stall_lsu),
|
||||
.flush(flush_lsu),
|
||||
.in ({vx_lsu_req_temp.valid, vx_lsu_req_temp.lsu_pc, vx_lsu_req_temp.warp_num, vx_lsu_req_temp.store_data, vx_lsu_req_temp.base_address, vx_lsu_req_temp.offset, vx_lsu_req_temp.mem_read, vx_lsu_req_temp.mem_write, vx_lsu_req_temp.rd, vx_lsu_req_temp.wb}),
|
||||
.out ({vx_lsu_req.valid , vx_lsu_req.lsu_pc , vx_lsu_req.warp_num , vx_lsu_req.store_data , vx_lsu_req.base_address , vx_lsu_req.offset , vx_lsu_req.mem_read , vx_lsu_req.mem_write , vx_lsu_req.rd , vx_lsu_req.wb })
|
||||
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.store_data, lsu_req_temp_if.base_address, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
|
||||
.out ({lsu_req_if.valid , lsu_req_if.lsu_pc , lsu_req_if.warp_num , lsu_req_if.store_data , lsu_req_if.base_address , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
|
@ -202,8 +202,8 @@ module VX_gpr_stage (
|
|||
.reset(reset),
|
||||
.stall(stall_exec),
|
||||
.flush(flush_exec),
|
||||
.in ({vx_exec_unit_req_temp.valid, vx_exec_unit_req_temp.warp_num, vx_exec_unit_req_temp.curr_PC, vx_exec_unit_req_temp.PC_next, vx_exec_unit_req_temp.rd, vx_exec_unit_req_temp.wb, vx_exec_unit_req_temp.a_reg_data, vx_exec_unit_req_temp.b_reg_data, vx_exec_unit_req_temp.alu_op, vx_exec_unit_req_temp.rs1, vx_exec_unit_req_temp.rs2, vx_exec_unit_req_temp.rs2_src, vx_exec_unit_req_temp.itype_immed, vx_exec_unit_req_temp.upper_immed, vx_exec_unit_req_temp.branch_type, vx_exec_unit_req_temp.jalQual, vx_exec_unit_req_temp.jal, vx_exec_unit_req_temp.jal_offset, vx_exec_unit_req_temp.ebreak, vx_exec_unit_req_temp.wspawn, vx_exec_unit_req_temp.is_csr, vx_exec_unit_req_temp.csr_address, vx_exec_unit_req_temp.csr_immed, vx_exec_unit_req_temp.csr_mask}),
|
||||
.out ({vx_exec_unit_req.valid , vx_exec_unit_req.warp_num , vx_exec_unit_req.curr_PC , vx_exec_unit_req.PC_next , vx_exec_unit_req.rd , vx_exec_unit_req.wb , vx_exec_unit_req.a_reg_data , vx_exec_unit_req.b_reg_data , vx_exec_unit_req.alu_op , vx_exec_unit_req.rs1 , vx_exec_unit_req.rs2 , vx_exec_unit_req.rs2_src , vx_exec_unit_req.itype_immed , vx_exec_unit_req.upper_immed , vx_exec_unit_req.branch_type , vx_exec_unit_req.jalQual , vx_exec_unit_req.jal , vx_exec_unit_req.jal_offset , vx_exec_unit_req.ebreak , vx_exec_unit_req.wspawn , vx_exec_unit_req.is_csr , vx_exec_unit_req.csr_address , vx_exec_unit_req.csr_immed , vx_exec_unit_req.csr_mask })
|
||||
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
|
||||
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
|
@ -213,8 +213,8 @@ module VX_gpr_stage (
|
|||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.flush(flush_rest),
|
||||
.in ({vx_gpu_inst_req_temp.valid, vx_gpu_inst_req_temp.warp_num, vx_gpu_inst_req_temp.is_wspawn, vx_gpu_inst_req_temp.is_tmc, vx_gpu_inst_req_temp.is_split, vx_gpu_inst_req_temp.is_barrier, vx_gpu_inst_req_temp.pc_next, vx_gpu_inst_req_temp.a_reg_data, vx_gpu_inst_req_temp.rd2}),
|
||||
.out ({vx_gpu_inst_req.valid , vx_gpu_inst_req.warp_num , vx_gpu_inst_req.is_wspawn , vx_gpu_inst_req.is_tmc , vx_gpu_inst_req.is_split , vx_gpu_inst_req.is_barrier , vx_gpu_inst_req.pc_next , vx_gpu_inst_req.a_reg_data , vx_gpu_inst_req.rd2 })
|
||||
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next, gpu_inst_req_temp_if.a_reg_data, gpu_inst_req_temp_if.rd2}),
|
||||
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next , gpu_inst_req_if.a_reg_data , gpu_inst_req_if.rd2 })
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
|
@ -224,8 +224,8 @@ module VX_gpr_stage (
|
|||
.reset(reset),
|
||||
.stall(stall_gpr_csr),
|
||||
.flush(flush_rest),
|
||||
.in ({vx_csr_req_temp.valid, vx_csr_req_temp.warp_num, vx_csr_req_temp.rd, vx_csr_req_temp.wb, vx_csr_req_temp.alu_op, vx_csr_req_temp.is_csr, vx_csr_req_temp.csr_address, vx_csr_req_temp.csr_immed, vx_csr_req_temp.csr_mask}),
|
||||
.out ({vx_csr_req.valid , vx_csr_req.warp_num , vx_csr_req.rd , vx_csr_req.wb , vx_csr_req.alu_op , vx_csr_req.is_csr , vx_csr_req.csr_address , vx_csr_req.csr_immed , vx_csr_req.csr_mask })
|
||||
.in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_address, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}),
|
||||
.out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_address , csr_req_if.csr_immed , csr_req_if.csr_mask })
|
||||
);
|
||||
|
||||
`endif
|
||||
|
|
|
@ -3,9 +3,9 @@
|
|||
module VX_gpr_wrapper (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_gpr_read_if vx_gpr_read,
|
||||
VX_wb_if vx_writeback_if,
|
||||
VX_gpr_jal_if vx_gpr_jal,
|
||||
VX_gpr_read_if gpr_read_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_gpr_jal_if gpr_jal_if,
|
||||
|
||||
output wire[`NUM_THREADS-1:0][31:0] out_a_reg_data,
|
||||
output wire[`NUM_THREADS-1:0][31:0] out_b_reg_data
|
||||
|
@ -19,13 +19,13 @@ module VX_gpr_wrapper (
|
|||
genvar index;
|
||||
generate
|
||||
for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign
|
||||
assign jal_data[index] = vx_gpr_jal.curr_PC;
|
||||
assign jal_data[index] = gpr_jal_if.curr_PC;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
`ifndef ASIC
|
||||
assign out_a_reg_data = (vx_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[vx_gpr_read.warp_num]));
|
||||
assign out_b_reg_data = (temp_b_reg_data[vx_gpr_read.warp_num]);
|
||||
assign out_a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[gpr_read_if.warp_num]));
|
||||
assign out_b_reg_data = (temp_b_reg_data[gpr_read_if.warp_num]);
|
||||
`else
|
||||
|
||||
wire zer = 0;
|
||||
|
@ -38,31 +38,29 @@ module VX_gpr_wrapper (
|
|||
.reset(reset),
|
||||
.stall(zer),
|
||||
.flush(zer),
|
||||
.in (vx_gpr_read.warp_num),
|
||||
.in (gpr_read_if.warp_num),
|
||||
.out (old_warp_num)
|
||||
);
|
||||
|
||||
assign out_a_reg_data = (vx_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[old_warp_num]));
|
||||
assign out_a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[old_warp_num]));
|
||||
assign out_b_reg_data = (temp_b_reg_data[old_warp_num]);
|
||||
|
||||
|
||||
`endif
|
||||
|
||||
genvar warp_index;
|
||||
generate
|
||||
|
||||
for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs
|
||||
|
||||
wire valid_write_request = warp_index == vx_writeback_if.wb_warp_num;
|
||||
VX_gpr vx_gpr(
|
||||
wire valid_write_request = warp_index == writeback_if.wb_warp_num;
|
||||
VX_gpr gpr(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_write_request(valid_write_request),
|
||||
.vx_gpr_read (vx_gpr_read),
|
||||
.vx_writeback_if (vx_writeback_if),
|
||||
.gpr_read_if (gpr_read_if),
|
||||
.writeback_if (writeback_if),
|
||||
.out_a_reg_data (temp_a_reg_data[warp_index]),
|
||||
.out_b_reg_data (temp_b_reg_data[warp_index])
|
||||
);
|
||||
|
||||
);
|
||||
end
|
||||
|
||||
endgenerate
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_icache_stage (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire total_freeze,
|
||||
output wire icache_stage_delay,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire total_freeze,
|
||||
output wire icache_stage_delay,
|
||||
output wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
output wire[`NUM_THREADS-1:0] icache_stage_valids,
|
||||
VX_inst_meta_if fe_inst_meta_fi,
|
||||
VX_inst_meta_if fe_inst_meta_id,
|
||||
VX_inst_meta_if fe_inst_meta_fi,
|
||||
VX_inst_meta_if fe_inst_meta_id,
|
||||
|
||||
VX_gpu_dcache_rsp_if vx_icache_rsp,
|
||||
VX_gpu_dcache_req_if vx_icache_req
|
||||
VX_gpu_dcache_rsp_if icache_rsp_if,
|
||||
VX_gpu_dcache_req_if icache_req_if
|
||||
);
|
||||
|
||||
reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0];
|
||||
|
@ -19,30 +19,30 @@ module VX_icache_stage (
|
|||
wire valid_inst = (|fe_inst_meta_fi.valid);
|
||||
|
||||
// Icache Request
|
||||
assign vx_icache_req.core_req_valid = valid_inst && !total_freeze;
|
||||
assign vx_icache_req.core_req_addr = fe_inst_meta_fi.inst_pc;
|
||||
assign vx_icache_req.core_req_writedata = 32'b0;
|
||||
assign vx_icache_req.core_req_mem_read = `LW_MEM_READ;
|
||||
assign vx_icache_req.core_req_mem_write = `NO_MEM_WRITE;
|
||||
assign vx_icache_req.core_req_rd = 5'b0;
|
||||
assign vx_icache_req.core_req_wb = {1{2'b1}};
|
||||
assign vx_icache_req.core_req_warp_num = fe_inst_meta_fi.warp_num;
|
||||
assign vx_icache_req.core_req_pc = fe_inst_meta_fi.inst_pc;
|
||||
assign icache_req_if.core_req_valid = valid_inst && !total_freeze;
|
||||
assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc;
|
||||
assign icache_req_if.core_req_writedata = 32'b0;
|
||||
assign icache_req_if.core_req_mem_read = `LW_MEM_READ;
|
||||
assign icache_req_if.core_req_mem_write = `NO_MEM_WRITE;
|
||||
assign icache_req_if.core_req_rd = 5'b0;
|
||||
assign icache_req_if.core_req_wb = {1{2'b1}};
|
||||
assign icache_req_if.core_req_warp_num = fe_inst_meta_fi.warp_num;
|
||||
assign icache_req_if.core_req_pc = fe_inst_meta_fi.inst_pc;
|
||||
|
||||
assign fe_inst_meta_id.instruction = vx_icache_rsp.core_wb_readdata[0][31:0];
|
||||
assign fe_inst_meta_id.inst_pc = vx_icache_rsp.core_wb_pc[0];
|
||||
assign fe_inst_meta_id.warp_num = vx_icache_rsp.core_wb_warp_num;
|
||||
assign fe_inst_meta_id.instruction = icache_rsp_if.core_wb_readdata[0][31:0];
|
||||
assign fe_inst_meta_id.inst_pc = icache_rsp_if.core_wb_pc[0];
|
||||
assign fe_inst_meta_id.warp_num = icache_rsp_if.core_wb_warp_num;
|
||||
|
||||
assign fe_inst_meta_id.valid = vx_icache_rsp.core_wb_valid ? threads_active[vx_icache_rsp.core_wb_warp_num] : 0;
|
||||
assign fe_inst_meta_id.valid = icache_rsp_if.core_wb_valid ? threads_active[icache_rsp_if.core_wb_warp_num] : 0;
|
||||
|
||||
assign icache_stage_wid = fe_inst_meta_id.warp_num;
|
||||
assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
|
||||
|
||||
// Cache can't accept request
|
||||
assign icache_stage_delay = vx_icache_rsp.delay_req;
|
||||
assign icache_stage_delay = icache_rsp_if.delay_req;
|
||||
|
||||
// Core can't accept response
|
||||
assign vx_icache_req.core_no_wb_slot = total_freeze;
|
||||
assign icache_req_if.core_no_wb_slot = total_freeze;
|
||||
|
||||
integer curr_w;
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -2,23 +2,23 @@
|
|||
|
||||
module VX_inst_multiplex (
|
||||
// Inputs
|
||||
VX_frE_to_bckE_req_if vx_bckE_req,
|
||||
VX_gpr_data_if vx_gpr_data,
|
||||
VX_frE_to_bckE_req_if bckE_req_if,
|
||||
VX_gpr_data_if gpr_data_if,
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_if vx_exec_unit_req,
|
||||
VX_lsu_req_if vx_lsu_req,
|
||||
VX_gpu_inst_req_if vx_gpu_inst_req,
|
||||
VX_csr_req_if vx_csr_req
|
||||
VX_exec_unit_req_if exec_unit_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_gpu_inst_req_if gpu_inst_req_if,
|
||||
VX_csr_req_if csr_req_if
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0] is_mem_mask;
|
||||
wire[`NUM_THREADS-1:0] is_gpu_mask;
|
||||
wire[`NUM_THREADS-1:0] is_csr_mask;
|
||||
|
||||
wire is_mem = (vx_bckE_req.mem_write != `NO_MEM_WRITE) || (vx_bckE_req.mem_read != `NO_MEM_READ);
|
||||
wire is_gpu = (vx_bckE_req.is_wspawn || vx_bckE_req.is_tmc || vx_bckE_req.is_barrier || vx_bckE_req.is_split);
|
||||
wire is_csr = vx_bckE_req.is_csr;
|
||||
wire is_mem = (bckE_req_if.mem_write != `NO_MEM_WRITE) || (bckE_req_if.mem_read != `NO_MEM_READ);
|
||||
wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split);
|
||||
wire is_csr = bckE_req_if.is_csr;
|
||||
// wire is_gpu = 0;
|
||||
|
||||
genvar currT;
|
||||
|
@ -31,64 +31,64 @@ module VX_inst_multiplex (
|
|||
endgenerate
|
||||
|
||||
// LSU Unit
|
||||
assign vx_lsu_req.valid = vx_bckE_req.valid & is_mem_mask;
|
||||
assign vx_lsu_req.warp_num = vx_bckE_req.warp_num;
|
||||
assign vx_lsu_req.base_address = vx_gpr_data.a_reg_data;
|
||||
assign vx_lsu_req.store_data = vx_gpr_data.b_reg_data;
|
||||
assign lsu_req_if.valid = bckE_req_if.valid & is_mem_mask;
|
||||
assign lsu_req_if.warp_num = bckE_req_if.warp_num;
|
||||
assign lsu_req_if.base_address = gpr_data_if.a_reg_data;
|
||||
assign lsu_req_if.store_data = gpr_data_if.b_reg_data;
|
||||
|
||||
assign vx_lsu_req.offset = vx_bckE_req.itype_immed;
|
||||
assign lsu_req_if.offset = bckE_req_if.itype_immed;
|
||||
|
||||
assign vx_lsu_req.mem_read = vx_bckE_req.mem_read;
|
||||
assign vx_lsu_req.mem_write = vx_bckE_req.mem_write;
|
||||
assign vx_lsu_req.rd = vx_bckE_req.rd;
|
||||
assign vx_lsu_req.wb = vx_bckE_req.wb;
|
||||
assign vx_lsu_req.lsu_pc = vx_bckE_req.curr_PC;
|
||||
assign lsu_req_if.mem_read = bckE_req_if.mem_read;
|
||||
assign lsu_req_if.mem_write = bckE_req_if.mem_write;
|
||||
assign lsu_req_if.rd = bckE_req_if.rd;
|
||||
assign lsu_req_if.wb = bckE_req_if.wb;
|
||||
assign lsu_req_if.lsu_pc = bckE_req_if.curr_PC;
|
||||
|
||||
|
||||
// Execute Unit
|
||||
assign vx_exec_unit_req.valid = vx_bckE_req.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
|
||||
assign vx_exec_unit_req.warp_num = vx_bckE_req.warp_num;
|
||||
assign vx_exec_unit_req.curr_PC = vx_bckE_req.curr_PC;
|
||||
assign vx_exec_unit_req.PC_next = vx_bckE_req.PC_next;
|
||||
assign vx_exec_unit_req.rd = vx_bckE_req.rd;
|
||||
assign vx_exec_unit_req.wb = vx_bckE_req.wb;
|
||||
assign vx_exec_unit_req.a_reg_data = vx_gpr_data.a_reg_data;
|
||||
assign vx_exec_unit_req.b_reg_data = vx_gpr_data.b_reg_data;
|
||||
assign vx_exec_unit_req.alu_op = vx_bckE_req.alu_op;
|
||||
assign vx_exec_unit_req.rs1 = vx_bckE_req.rs1;
|
||||
assign vx_exec_unit_req.rs2 = vx_bckE_req.rs2;
|
||||
assign vx_exec_unit_req.rs2_src = vx_bckE_req.rs2_src;
|
||||
assign vx_exec_unit_req.itype_immed = vx_bckE_req.itype_immed;
|
||||
assign vx_exec_unit_req.upper_immed = vx_bckE_req.upper_immed;
|
||||
assign vx_exec_unit_req.branch_type = vx_bckE_req.branch_type;
|
||||
assign vx_exec_unit_req.jalQual = vx_bckE_req.jalQual;
|
||||
assign vx_exec_unit_req.jal = vx_bckE_req.jal;
|
||||
assign vx_exec_unit_req.jal_offset = vx_bckE_req.jal_offset;
|
||||
assign vx_exec_unit_req.ebreak = vx_bckE_req.ebreak;
|
||||
assign exec_unit_req_if.valid = bckE_req_if.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
|
||||
assign exec_unit_req_if.warp_num = bckE_req_if.warp_num;
|
||||
assign exec_unit_req_if.curr_PC = bckE_req_if.curr_PC;
|
||||
assign exec_unit_req_if.PC_next = bckE_req_if.PC_next;
|
||||
assign exec_unit_req_if.rd = bckE_req_if.rd;
|
||||
assign exec_unit_req_if.wb = bckE_req_if.wb;
|
||||
assign exec_unit_req_if.a_reg_data = gpr_data_if.a_reg_data;
|
||||
assign exec_unit_req_if.b_reg_data = gpr_data_if.b_reg_data;
|
||||
assign exec_unit_req_if.alu_op = bckE_req_if.alu_op;
|
||||
assign exec_unit_req_if.rs1 = bckE_req_if.rs1;
|
||||
assign exec_unit_req_if.rs2 = bckE_req_if.rs2;
|
||||
assign exec_unit_req_if.rs2_src = bckE_req_if.rs2_src;
|
||||
assign exec_unit_req_if.itype_immed = bckE_req_if.itype_immed;
|
||||
assign exec_unit_req_if.upper_immed = bckE_req_if.upper_immed;
|
||||
assign exec_unit_req_if.branch_type = bckE_req_if.branch_type;
|
||||
assign exec_unit_req_if.jalQual = bckE_req_if.jalQual;
|
||||
assign exec_unit_req_if.jal = bckE_req_if.jal;
|
||||
assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset;
|
||||
assign exec_unit_req_if.ebreak = bckE_req_if.ebreak;
|
||||
|
||||
|
||||
// GPR Req
|
||||
assign vx_gpu_inst_req.valid = vx_bckE_req.valid & is_gpu_mask;
|
||||
assign vx_gpu_inst_req.warp_num = vx_bckE_req.warp_num;
|
||||
assign vx_gpu_inst_req.is_wspawn = vx_bckE_req.is_wspawn;
|
||||
assign vx_gpu_inst_req.is_tmc = vx_bckE_req.is_tmc;
|
||||
assign vx_gpu_inst_req.is_split = vx_bckE_req.is_split;
|
||||
assign vx_gpu_inst_req.is_barrier = vx_bckE_req.is_barrier;
|
||||
assign vx_gpu_inst_req.a_reg_data = vx_gpr_data.a_reg_data;
|
||||
assign vx_gpu_inst_req.rd2 = vx_gpr_data.b_reg_data[0];
|
||||
assign vx_gpu_inst_req.pc_next = vx_bckE_req.PC_next;
|
||||
assign gpu_inst_req_if.valid = bckE_req_if.valid & is_gpu_mask;
|
||||
assign gpu_inst_req_if.warp_num = bckE_req_if.warp_num;
|
||||
assign gpu_inst_req_if.is_wspawn = bckE_req_if.is_wspawn;
|
||||
assign gpu_inst_req_if.is_tmc = bckE_req_if.is_tmc;
|
||||
assign gpu_inst_req_if.is_split = bckE_req_if.is_split;
|
||||
assign gpu_inst_req_if.is_barrier = bckE_req_if.is_barrier;
|
||||
assign gpu_inst_req_if.a_reg_data = gpr_data_if.a_reg_data;
|
||||
assign gpu_inst_req_if.rd2 = gpr_data_if.b_reg_data[0];
|
||||
assign gpu_inst_req_if.pc_next = bckE_req_if.PC_next;
|
||||
|
||||
|
||||
// CSR Req
|
||||
assign vx_csr_req.valid = vx_bckE_req.valid & is_csr_mask;
|
||||
assign vx_csr_req.warp_num = vx_bckE_req.warp_num;
|
||||
assign vx_csr_req.rd = vx_bckE_req.rd;
|
||||
assign vx_csr_req.wb = vx_bckE_req.wb;
|
||||
assign vx_csr_req.alu_op = vx_bckE_req.alu_op;
|
||||
assign vx_csr_req.is_csr = vx_bckE_req.is_csr;
|
||||
assign vx_csr_req.csr_address = vx_bckE_req.csr_address;
|
||||
assign vx_csr_req.csr_immed = vx_bckE_req.csr_immed;
|
||||
assign vx_csr_req.csr_mask = vx_bckE_req.csr_mask;
|
||||
assign csr_req_if.valid = bckE_req_if.valid & is_csr_mask;
|
||||
assign csr_req_if.warp_num = bckE_req_if.warp_num;
|
||||
assign csr_req_if.rd = bckE_req_if.rd;
|
||||
assign csr_req_if.wb = bckE_req_if.wb;
|
||||
assign csr_req_if.alu_op = bckE_req_if.alu_op;
|
||||
assign csr_req_if.is_csr = bckE_req_if.is_csr;
|
||||
assign csr_req_if.csr_address = bckE_req_if.csr_address;
|
||||
assign csr_req_if.csr_immed = bckE_req_if.csr_immed;
|
||||
assign csr_req_if.csr_mask = bckE_req_if.csr_mask;
|
||||
|
||||
endmodule
|
||||
|
||||
|
|
|
@ -1,23 +1,23 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire no_slot_mem,
|
||||
VX_lsu_req_if vx_lsu_req,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire no_slot_mem,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
|
||||
// Write back to GPR
|
||||
VX_inst_mem_wb_if vx_mem_wb,
|
||||
VX_inst_mem_wb_if mem_wb_if,
|
||||
|
||||
VX_gpu_dcache_rsp_if vx_dcache_rsp,
|
||||
VX_gpu_dcache_req_if vx_dcache_req,
|
||||
output wire out_delay
|
||||
VX_gpu_dcache_rsp_if dcache_rsp_if,
|
||||
VX_gpu_dcache_req_if dcache_req_if,
|
||||
output wire out_delay
|
||||
);
|
||||
// Generate Addresses
|
||||
wire[`NUM_THREADS-1:0][31:0] address;
|
||||
VX_lsu_addr_gen VX_lsu_addr_gen (
|
||||
.base_address (vx_lsu_req.base_address),
|
||||
.offset (vx_lsu_req.offset),
|
||||
.base_address (lsu_req_if.base_address),
|
||||
.offset (lsu_req_if.offset),
|
||||
.address (address)
|
||||
);
|
||||
|
||||
|
@ -40,33 +40,33 @@ module VX_lsu (
|
|||
.reset(reset),
|
||||
.stall(out_delay),
|
||||
.flush(zero),
|
||||
.in ({address , vx_lsu_req.store_data, vx_lsu_req.valid, vx_lsu_req.mem_read, vx_lsu_req.mem_write, vx_lsu_req.rd, vx_lsu_req.warp_num, vx_lsu_req.wb, vx_lsu_req.lsu_pc}),
|
||||
.in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.lsu_pc}),
|
||||
.out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc })
|
||||
);
|
||||
|
||||
// Core Request
|
||||
assign vx_dcache_req.core_req_valid = use_valid;
|
||||
assign vx_dcache_req.core_req_addr = use_address;
|
||||
assign vx_dcache_req.core_req_writedata = use_store_data;
|
||||
assign vx_dcache_req.core_req_mem_read = {`NUM_THREADS{use_mem_read}};
|
||||
assign vx_dcache_req.core_req_mem_write = {`NUM_THREADS{use_mem_write}};
|
||||
assign vx_dcache_req.core_req_rd = use_rd;
|
||||
assign vx_dcache_req.core_req_wb = {`NUM_THREADS{use_wb}};
|
||||
assign vx_dcache_req.core_req_warp_num = use_warp_num;
|
||||
assign vx_dcache_req.core_req_pc = use_pc;
|
||||
assign dcache_req_if.core_req_valid = use_valid;
|
||||
assign dcache_req_if.core_req_addr = use_address;
|
||||
assign dcache_req_if.core_req_writedata = use_store_data;
|
||||
assign dcache_req_if.core_req_mem_read = {`NUM_THREADS{use_mem_read}};
|
||||
assign dcache_req_if.core_req_mem_write = {`NUM_THREADS{use_mem_write}};
|
||||
assign dcache_req_if.core_req_rd = use_rd;
|
||||
assign dcache_req_if.core_req_wb = {`NUM_THREADS{use_wb}};
|
||||
assign dcache_req_if.core_req_warp_num = use_warp_num;
|
||||
assign dcache_req_if.core_req_pc = use_pc;
|
||||
|
||||
// Core can't accept response
|
||||
assign vx_dcache_req.core_no_wb_slot = no_slot_mem;
|
||||
assign dcache_req_if.core_no_wb_slot = no_slot_mem;
|
||||
|
||||
// Cache can't accept request
|
||||
assign out_delay = vx_dcache_rsp.delay_req;
|
||||
assign out_delay = dcache_rsp_if.delay_req;
|
||||
|
||||
// Core Response
|
||||
assign vx_mem_wb.rd = vx_dcache_rsp.core_wb_req_rd;
|
||||
assign vx_mem_wb.wb = vx_dcache_rsp.core_wb_req_wb;
|
||||
assign vx_mem_wb.wb_valid = vx_dcache_rsp.core_wb_valid;
|
||||
assign vx_mem_wb.wb_warp_num = vx_dcache_rsp.core_wb_warp_num;
|
||||
assign vx_mem_wb.loaded_data = vx_dcache_rsp.core_wb_readdata;
|
||||
assign mem_wb_if.rd = dcache_rsp_if.core_wb_req_rd;
|
||||
assign mem_wb_if.wb = dcache_rsp_if.core_wb_req_wb;
|
||||
assign mem_wb_if.wb_valid = dcache_rsp_if.core_wb_valid;
|
||||
assign mem_wb_if.wb_warp_num = dcache_rsp_if.core_wb_warp_num;
|
||||
assign mem_wb_if.loaded_data = dcache_rsp_if.core_wb_readdata;
|
||||
|
||||
wire[(`LOG2UP(`NUM_THREADS))-1:0] use_pc_index;
|
||||
|
||||
|
@ -75,12 +75,12 @@ module VX_lsu (
|
|||
`DEBUG_END
|
||||
|
||||
VX_generic_priority_encoder #(.N(`NUM_THREADS)) pick_first_pc(
|
||||
.valids(vx_dcache_rsp.core_wb_valid),
|
||||
.valids(dcache_rsp_if.core_wb_valid),
|
||||
.index (use_pc_index),
|
||||
.found (found)
|
||||
);
|
||||
|
||||
assign vx_mem_wb.mem_wb_pc = vx_dcache_rsp.core_wb_pc[use_pc_index];
|
||||
assign mem_wb_if.mem_wb_pc = dcache_rsp_if.core_wb_pc[use_pc_index];
|
||||
|
||||
endmodule // Memory
|
||||
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_scheduler (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire memory_delay,
|
||||
input wire exec_delay,
|
||||
input wire gpr_stage_delay,
|
||||
VX_frE_to_bckE_req_if vx_bckE_req,
|
||||
VX_wb_if vx_writeback_if,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire memory_delay,
|
||||
input wire exec_delay,
|
||||
input wire gpr_stage_delay,
|
||||
VX_frE_to_bckE_req_if bckE_req_if,
|
||||
VX_wb_if writeback_if,
|
||||
|
||||
output wire schedule_delay,
|
||||
output wire is_empty
|
||||
|
@ -18,31 +18,31 @@ module VX_scheduler (
|
|||
|
||||
reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
||||
|
||||
wire valid_wb = (vx_writeback_if.wb != 0) && (|vx_writeback_if.wb_valid) && (vx_writeback_if.rd != 0);
|
||||
wire wb_inc = (vx_bckE_req.wb != 0) && (vx_bckE_req.rd != 0);
|
||||
wire valid_wb = (writeback_if.wb != 0) && (|writeback_if.wb_valid) && (writeback_if.rd != 0);
|
||||
wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0);
|
||||
|
||||
wire rs1_rename = rename_table[vx_bckE_req.warp_num][vx_bckE_req.rs1] != 0;
|
||||
wire rs2_rename = rename_table[vx_bckE_req.warp_num][vx_bckE_req.rs2] != 0;
|
||||
wire rd_rename = rename_table[vx_bckE_req.warp_num][vx_bckE_req.rd ] != 0;
|
||||
wire rs1_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0;
|
||||
wire rs2_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0;
|
||||
wire rd_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0;
|
||||
|
||||
wire is_store = (vx_bckE_req.mem_write != `NO_MEM_WRITE);
|
||||
wire is_load = (vx_bckE_req.mem_read != `NO_MEM_READ);
|
||||
wire is_store = (bckE_req_if.mem_write != `NO_MEM_WRITE);
|
||||
wire is_load = (bckE_req_if.mem_read != `NO_MEM_READ);
|
||||
|
||||
// classify our next instruction.
|
||||
wire is_mem = is_store || is_load;
|
||||
wire is_gpu = (vx_bckE_req.is_wspawn || vx_bckE_req.is_tmc || vx_bckE_req.is_barrier || vx_bckE_req.is_split);
|
||||
wire is_csr = vx_bckE_req.is_csr;
|
||||
wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split);
|
||||
wire is_csr = bckE_req_if.is_csr;
|
||||
wire is_exec = !is_mem && !is_gpu && !is_csr;
|
||||
|
||||
wire using_rs2 = (vx_bckE_req.rs2_src == `RS2_REG) || is_store || vx_bckE_req.is_barrier || vx_bckE_req.is_wspawn;
|
||||
wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn;
|
||||
|
||||
wire rs1_rename_qual = ((rs1_rename) && (vx_bckE_req.rs1 != 0));
|
||||
wire rs2_rename_qual = ((rs2_rename) && (vx_bckE_req.rs2 != 0 && using_rs2));
|
||||
wire rd_rename_qual = ((rd_rename ) && (vx_bckE_req.rd != 0));
|
||||
wire rs1_rename_qual = ((rs1_rename) && (bckE_req_if.rs1 != 0));
|
||||
wire rs2_rename_qual = ((rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2));
|
||||
wire rd_rename_qual = ((rd_rename ) && (bckE_req_if.rd != 0));
|
||||
|
||||
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
|
||||
|
||||
assign schedule_delay = ((rename_valid) && (|vx_bckE_req.valid))
|
||||
assign schedule_delay = ((rename_valid) && (|bckE_req_if.valid))
|
||||
|| (memory_delay && is_mem)
|
||||
|| (gpr_stage_delay && (is_mem || is_exec))
|
||||
|| (exec_delay && is_exec);
|
||||
|
@ -59,15 +59,15 @@ module VX_scheduler (
|
|||
end
|
||||
end else begin
|
||||
if (valid_wb) begin
|
||||
rename_table[vx_writeback_if.wb_warp_num][vx_writeback_if.rd] <= rename_table[vx_writeback_if.wb_warp_num][vx_writeback_if.rd] & (~vx_writeback_if.wb_valid);
|
||||
rename_table[writeback_if.wb_warp_num][writeback_if.rd] <= rename_table[writeback_if.wb_warp_num][writeback_if.rd] & (~writeback_if.wb_valid);
|
||||
end
|
||||
|
||||
if (!schedule_delay && wb_inc) begin
|
||||
rename_table[vx_bckE_req.warp_num][vx_bckE_req.rd] <= vx_bckE_req.valid;
|
||||
rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid;
|
||||
end
|
||||
|
||||
if (valid_wb
|
||||
&& (0 == (rename_table[vx_writeback_if.wb_warp_num][vx_writeback_if.rd] & ~vx_writeback_if.wb_valid))) begin
|
||||
&& (0 == (rename_table[writeback_if.wb_warp_num][writeback_if.rd] & ~writeback_if.wb_valid))) begin
|
||||
count_valid <= count_valid - 1;
|
||||
end
|
||||
|
||||
|
|
|
@ -1,67 +1,66 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_writeback (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
// Mem WB info
|
||||
VX_inst_mem_wb_if vx_mem_wb,
|
||||
VX_inst_mem_wb_if mem_wb_if,
|
||||
// EXEC Unit WB info
|
||||
VX_inst_exec_wb_if vx_inst_exec_wb,
|
||||
VX_inst_exec_wb_if inst_exec_wb_if,
|
||||
// CSR Unit WB info
|
||||
VX_csr_wb_if vx_csr_wb,
|
||||
VX_csr_wb_if csr_wb_if,
|
||||
|
||||
// Actual WB to GPR
|
||||
VX_wb_if vx_writeback_if,
|
||||
output wire no_slot_mem,
|
||||
output wire no_slot_exec,
|
||||
output wire no_slot_csr
|
||||
VX_wb_if writeback_if,
|
||||
output wire no_slot_mem,
|
||||
output wire no_slot_exec,
|
||||
output wire no_slot_csr
|
||||
);
|
||||
|
||||
VX_wb_if vx_writeback_tempp();
|
||||
VX_wb_if writeback_tempp_if();
|
||||
|
||||
wire exec_wb = (vx_inst_exec_wb.wb != 0) && (|vx_inst_exec_wb.wb_valid);
|
||||
wire mem_wb = (vx_mem_wb.wb != 0) && (|vx_mem_wb.wb_valid);
|
||||
wire csr_wb = (vx_csr_wb.wb != 0) && (|vx_csr_wb.valid);
|
||||
wire exec_wb = (inst_exec_wb_if.wb != 0) && (|inst_exec_wb_if.wb_valid);
|
||||
wire mem_wb = (mem_wb_if.wb != 0) && (|mem_wb_if.wb_valid);
|
||||
wire csr_wb = (csr_wb_if.wb != 0) && (|csr_wb_if.valid);
|
||||
|
||||
|
||||
assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
|
||||
assign no_slot_csr = csr_wb && (exec_wb);
|
||||
assign no_slot_exec = 0;
|
||||
|
||||
assign vx_writeback_tempp.write_data = exec_wb ? vx_inst_exec_wb.alu_result :
|
||||
csr_wb ? vx_csr_wb.csr_result :
|
||||
mem_wb ? vx_mem_wb.loaded_data :
|
||||
assign writeback_tempp_if.write_data = exec_wb ? inst_exec_wb_if.alu_result :
|
||||
csr_wb ? csr_wb_if.csr_result :
|
||||
mem_wb ? mem_wb_if.loaded_data :
|
||||
0;
|
||||
|
||||
|
||||
assign vx_writeback_tempp.wb_valid = exec_wb ? vx_inst_exec_wb.wb_valid :
|
||||
csr_wb ? vx_csr_wb.valid :
|
||||
mem_wb ? vx_mem_wb.wb_valid :
|
||||
assign writeback_tempp_if.wb_valid = exec_wb ? inst_exec_wb_if.wb_valid :
|
||||
csr_wb ? csr_wb_if.valid :
|
||||
mem_wb ? mem_wb_if.wb_valid :
|
||||
0;
|
||||
|
||||
assign vx_writeback_tempp.rd = exec_wb ? vx_inst_exec_wb.rd :
|
||||
csr_wb ? vx_csr_wb.rd :
|
||||
mem_wb ? vx_mem_wb.rd :
|
||||
assign writeback_tempp_if.rd = exec_wb ? inst_exec_wb_if.rd :
|
||||
csr_wb ? csr_wb_if.rd :
|
||||
mem_wb ? mem_wb_if.rd :
|
||||
0;
|
||||
|
||||
assign vx_writeback_tempp.wb = exec_wb ? vx_inst_exec_wb.wb :
|
||||
csr_wb ? vx_csr_wb.wb :
|
||||
mem_wb ? vx_mem_wb.wb :
|
||||
assign writeback_tempp_if.wb = exec_wb ? inst_exec_wb_if.wb :
|
||||
csr_wb ? csr_wb_if.wb :
|
||||
mem_wb ? mem_wb_if.wb :
|
||||
0;
|
||||
|
||||
assign vx_writeback_tempp.wb_warp_num = exec_wb ? vx_inst_exec_wb.wb_warp_num :
|
||||
csr_wb ? vx_csr_wb.warp_num :
|
||||
mem_wb ? vx_mem_wb.wb_warp_num :
|
||||
assign writeback_tempp_if.wb_warp_num = exec_wb ? inst_exec_wb_if.wb_warp_num :
|
||||
csr_wb ? csr_wb_if.warp_num :
|
||||
mem_wb ? mem_wb_if.wb_warp_num :
|
||||
0;
|
||||
|
||||
|
||||
|
||||
assign vx_writeback_tempp.wb_pc = exec_wb ? vx_inst_exec_wb.exec_wb_pc :
|
||||
assign writeback_tempp_if.wb_pc = exec_wb ? inst_exec_wb_if.exec_wb_pc :
|
||||
csr_wb ? 32'hdeadbeef :
|
||||
mem_wb ? vx_mem_wb.mem_wb_pc :
|
||||
mem_wb ? mem_wb_if.mem_wb_pc :
|
||||
32'hdeadbeef;
|
||||
|
||||
|
||||
wire zero = 0;
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] use_wb_data;
|
||||
|
@ -71,19 +70,19 @@ module VX_writeback (
|
|||
.reset(reset),
|
||||
.stall(zero),
|
||||
.flush(zero),
|
||||
.in ({vx_writeback_tempp.write_data, vx_writeback_tempp.wb_valid, vx_writeback_tempp.rd, vx_writeback_tempp.wb, vx_writeback_tempp.wb_warp_num, vx_writeback_tempp.wb_pc}),
|
||||
.out ({use_wb_data , vx_writeback_if.wb_valid, vx_writeback_if.rd, vx_writeback_if.wb, vx_writeback_if.wb_warp_num, vx_writeback_if.wb_pc})
|
||||
.in ({writeback_tempp_if.write_data, writeback_tempp_if.wb_valid, writeback_tempp_if.rd, writeback_tempp_if.wb, writeback_tempp_if.wb_warp_num, writeback_tempp_if.wb_pc}),
|
||||
.out ({use_wb_data , writeback_if.wb_valid, writeback_if.rd, writeback_if.wb, writeback_if.wb_warp_num, writeback_if.wb_pc})
|
||||
);
|
||||
|
||||
|
||||
reg[31:0] last_data_wb /* verilator public */ ;
|
||||
always @(posedge clk) begin
|
||||
if ((|vx_writeback_if.wb_valid) && (vx_writeback_if.wb != 0) && (vx_writeback_if.rd == 28)) begin
|
||||
if ((|writeback_if.wb_valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin
|
||||
last_data_wb <= use_wb_data[0];
|
||||
end
|
||||
end
|
||||
|
||||
assign vx_writeback_if.write_data = use_wb_data;
|
||||
assign writeback_if.write_data = use_wb_data;
|
||||
|
||||
endmodule : VX_writeback
|
||||
|
||||
|
|
166
hw/rtl/Vortex.v
166
hw/rtl/Vortex.v
|
@ -99,106 +99,106 @@ module Vortex
|
|||
wire schedule_delay;
|
||||
|
||||
// Dcache Interface
|
||||
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_rsp();
|
||||
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req();
|
||||
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req_qual();
|
||||
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_if();
|
||||
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_if();
|
||||
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_qual_if();
|
||||
|
||||
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_dcache_dram_req();
|
||||
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_dcache_dram_res();
|
||||
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_dcache_dram_req_if();
|
||||
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_dcache_dram_res_if();
|
||||
|
||||
assign vx_gpu_dcache_dram_res.dram_rsp_valid = dram_rsp_valid;
|
||||
assign vx_gpu_dcache_dram_res.dram_rsp_addr = dram_rsp_addr;
|
||||
assign gpu_dcache_dram_res_if.dram_rsp_valid = dram_rsp_valid;
|
||||
assign gpu_dcache_dram_res_if.dram_rsp_addr = dram_rsp_addr;
|
||||
|
||||
assign dram_req_write = vx_gpu_dcache_dram_req.dram_req_write;
|
||||
assign dram_req_read = vx_gpu_dcache_dram_req.dram_req_read;
|
||||
assign dram_req_addr = vx_gpu_dcache_dram_req.dram_req_addr;
|
||||
assign dram_rsp_ready = vx_gpu_dcache_dram_req.dram_rsp_ready;
|
||||
assign dram_req_write = gpu_dcache_dram_req_if.dram_req_write;
|
||||
assign dram_req_read = gpu_dcache_dram_req_if.dram_req_read;
|
||||
assign dram_req_addr = gpu_dcache_dram_req_if.dram_req_addr;
|
||||
assign dram_rsp_ready = gpu_dcache_dram_req_if.dram_rsp_ready;
|
||||
|
||||
assign vx_gpu_dcache_dram_req.dram_req_full = dram_req_full;
|
||||
assign gpu_dcache_dram_req_if.dram_req_full = dram_req_full;
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `DBANK_LINE_WORDS; i=i+1) begin
|
||||
assign vx_gpu_dcache_dram_res.dram_rsp_data[i] = dram_rsp_data[i * 32 +: 32];
|
||||
assign dram_req_data[i * 32 +: 32] = vx_gpu_dcache_dram_req.dram_req_data[i];
|
||||
assign gpu_dcache_dram_res_if.dram_rsp_data[i] = dram_rsp_data[i * 32 +: 32];
|
||||
assign dram_req_data[i * 32 +: 32] = gpu_dcache_dram_req_if.dram_req_data[i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire temp_io_valid = (!memory_delay)
|
||||
&& (|vx_dcache_req.core_req_valid)
|
||||
&& (vx_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE)
|
||||
&& (vx_dcache_req.core_req_addr[0] == 32'h00010000);
|
||||
&& (|dcache_req_if.core_req_valid)
|
||||
&& (dcache_req_if.core_req_mem_write[0] != `NO_MEM_WRITE)
|
||||
&& (dcache_req_if.core_req_addr[0] == 32'h00010000);
|
||||
|
||||
wire[31:0] temp_io_data = vx_dcache_req.core_req_writedata[0];
|
||||
wire[31:0] temp_io_data = dcache_req_if.core_req_writedata[0];
|
||||
assign io_valid = temp_io_valid;
|
||||
assign io_data = temp_io_data;
|
||||
|
||||
assign vx_dcache_req_qual.core_req_valid = vx_dcache_req.core_req_valid & {`NUM_THREADS{~io_valid}};
|
||||
assign vx_dcache_req_qual.core_req_addr = vx_dcache_req.core_req_addr;
|
||||
assign vx_dcache_req_qual.core_req_writedata = vx_dcache_req.core_req_writedata;
|
||||
assign vx_dcache_req_qual.core_req_mem_read = vx_dcache_req.core_req_mem_read;
|
||||
assign vx_dcache_req_qual.core_req_mem_write = vx_dcache_req.core_req_mem_write;
|
||||
assign vx_dcache_req_qual.core_req_rd = vx_dcache_req.core_req_rd;
|
||||
assign vx_dcache_req_qual.core_req_wb = vx_dcache_req.core_req_wb;
|
||||
assign vx_dcache_req_qual.core_req_warp_num = vx_dcache_req.core_req_warp_num;
|
||||
assign vx_dcache_req_qual.core_req_pc = vx_dcache_req.core_req_pc;
|
||||
assign vx_dcache_req_qual.core_no_wb_slot = vx_dcache_req.core_no_wb_slot;
|
||||
assign dcache_req_qual_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{~io_valid}};
|
||||
assign dcache_req_qual_if.core_req_addr = dcache_req_if.core_req_addr;
|
||||
assign dcache_req_qual_if.core_req_writedata = dcache_req_if.core_req_writedata;
|
||||
assign dcache_req_qual_if.core_req_mem_read = dcache_req_if.core_req_mem_read;
|
||||
assign dcache_req_qual_if.core_req_mem_write = dcache_req_if.core_req_mem_write;
|
||||
assign dcache_req_qual_if.core_req_rd = dcache_req_if.core_req_rd;
|
||||
assign dcache_req_qual_if.core_req_wb = dcache_req_if.core_req_wb;
|
||||
assign dcache_req_qual_if.core_req_warp_num = dcache_req_if.core_req_warp_num;
|
||||
assign dcache_req_qual_if.core_req_pc = dcache_req_if.core_req_pc;
|
||||
assign dcache_req_qual_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot;
|
||||
|
||||
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`INUM_REQUESTS)) vx_icache_rsp();
|
||||
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`INUM_REQUESTS)) vx_icache_req();
|
||||
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_rsp_if();
|
||||
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_req_if();
|
||||
|
||||
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) vx_gpu_icache_dram_req();
|
||||
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) vx_gpu_icache_dram_res();
|
||||
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_req_if();
|
||||
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_res_if();
|
||||
|
||||
assign vx_gpu_icache_dram_res.dram_rsp_valid = I_dram_rsp_valid;
|
||||
assign vx_gpu_icache_dram_res.dram_rsp_addr = I_dram_rsp_addr;
|
||||
assign gpu_icache_dram_res_if.dram_rsp_valid = I_dram_rsp_valid;
|
||||
assign gpu_icache_dram_res_if.dram_rsp_addr = I_dram_rsp_addr;
|
||||
|
||||
assign I_dram_req_write = vx_gpu_icache_dram_req.dram_req_write;
|
||||
assign I_dram_req_read = vx_gpu_icache_dram_req.dram_req_read;
|
||||
assign I_dram_req_addr = vx_gpu_icache_dram_req.dram_req_addr;
|
||||
assign I_dram_rsp_ready = vx_gpu_icache_dram_req.dram_rsp_ready;
|
||||
assign I_dram_req_write = gpu_icache_dram_req_if.dram_req_write;
|
||||
assign I_dram_req_read = gpu_icache_dram_req_if.dram_req_read;
|
||||
assign I_dram_req_addr = gpu_icache_dram_req_if.dram_req_addr;
|
||||
assign I_dram_rsp_ready = gpu_icache_dram_req_if.dram_rsp_ready;
|
||||
|
||||
assign vx_gpu_icache_dram_req.dram_req_full = I_dram_req_full;
|
||||
assign gpu_icache_dram_req_if.dram_req_full = I_dram_req_full;
|
||||
|
||||
genvar j;
|
||||
generate
|
||||
for (j = 0; j < `IBANK_LINE_WORDS; j = j + 1) begin
|
||||
assign vx_gpu_icache_dram_res.dram_rsp_data[j] = I_dram_rsp_data[j * 32 +: 32];
|
||||
assign I_dram_req_data[j * 32 +: 32] = vx_gpu_icache_dram_req.dram_req_data[j];
|
||||
assign gpu_icache_dram_res_if.dram_rsp_data[j] = I_dram_rsp_data[j * 32 +: 32];
|
||||
assign I_dram_req_data[j * 32 +: 32] = gpu_icache_dram_req_if.dram_req_data[j];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Front-end to Back-end
|
||||
VX_frE_to_bckE_req_if vx_bckE_req(); // New instruction request to EXE/MEM
|
||||
VX_frE_to_bckE_req_if bckE_req_if(); // New instruction request to EXE/MEM
|
||||
|
||||
// Back-end to Front-end
|
||||
VX_wb_if vx_writeback_if(); // Writeback to GPRs
|
||||
VX_branch_response_if vx_branch_rsp(); // Branch Resolution to Fetch
|
||||
VX_jal_response_if vx_jal_rsp(); // Jump resolution to Fetch
|
||||
VX_wb_if writeback_if(); // Writeback to GPRs
|
||||
VX_branch_response_if branch_rsp_if(); // Branch Resolution to Fetch
|
||||
VX_jal_response_if jal_rsp_if(); // Jump resolution to Fetch
|
||||
|
||||
// CSR Buses
|
||||
// VX_csr_write_request_if vx_csr_w_req();
|
||||
// VX_csr_write_request_if csr_w_req_if();
|
||||
|
||||
VX_warp_ctl_if vx_warp_ctl();
|
||||
VX_gpu_snp_req_rsp_if vx_gpu_icache_snp_req();
|
||||
VX_gpu_snp_req_rsp_if vx_gpu_dcache_snp_req();
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
VX_gpu_snp_req_rsp_if gpu_icache_snp_req_if();
|
||||
VX_gpu_snp_req_rsp_if gpu_dcache_snp_req_if();
|
||||
|
||||
assign vx_gpu_dcache_snp_req.snp_req_valid = snp_req_valid;
|
||||
assign vx_gpu_dcache_snp_req.snp_req_addr = snp_req_addr;
|
||||
assign snp_req_full = vx_gpu_dcache_snp_req.snp_req_full;
|
||||
assign gpu_dcache_snp_req_if.snp_req_valid = snp_req_valid;
|
||||
assign gpu_dcache_snp_req_if.snp_req_addr = snp_req_addr;
|
||||
assign snp_req_full = gpu_dcache_snp_req_if.snp_req_full;
|
||||
|
||||
VX_front_end vx_front_end(
|
||||
VX_front_end front_end(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.vx_warp_ctl (vx_warp_ctl),
|
||||
.vx_bckE_req (vx_bckE_req),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.vx_icache_rsp (vx_icache_rsp),
|
||||
.vx_icache_req (vx_icache_req),
|
||||
.vx_jal_rsp (vx_jal_rsp),
|
||||
.vx_branch_rsp (vx_branch_rsp),
|
||||
.icache_rsp_if (icache_rsp_if),
|
||||
.icache_req_if (icache_req_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.fetch_ebreak (out_ebreak)
|
||||
);
|
||||
|
||||
|
@ -208,56 +208,56 @@ VX_scheduler schedule(
|
|||
.memory_delay (memory_delay),
|
||||
.exec_delay (exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay),
|
||||
.vx_bckE_req (vx_bckE_req),
|
||||
.vx_writeback_if (vx_writeback_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.writeback_if (writeback_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.is_empty (scheduler_empty)
|
||||
);
|
||||
|
||||
VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end(
|
||||
VX_back_end #(.CORE_ID(CORE_ID)) back_end(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.schedule_delay (schedule_delay),
|
||||
.vx_warp_ctl (vx_warp_ctl),
|
||||
.vx_bckE_req (vx_bckE_req),
|
||||
.vx_jal_rsp (vx_jal_rsp),
|
||||
.vx_branch_rsp (vx_branch_rsp),
|
||||
.vx_dcache_rsp (vx_dcache_rsp),
|
||||
.vx_dcache_req (vx_dcache_req),
|
||||
.vx_writeback_if (vx_writeback_if),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
.dcache_req_if (dcache_req_if),
|
||||
.writeback_if (writeback_if),
|
||||
.out_mem_delay (memory_delay),
|
||||
.out_exec_delay (exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
|
||||
VX_dmem_controller vx_dmem_controller(
|
||||
VX_dmem_controller dmem_controller(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Dram <-> Dcache
|
||||
.vx_gpu_dcache_dram_req (vx_gpu_dcache_dram_req),
|
||||
.vx_gpu_dcache_dram_res (vx_gpu_dcache_dram_res),
|
||||
.vx_gpu_dcache_snp_req (vx_gpu_dcache_snp_req),
|
||||
.gpu_dcache_dram_req_if (gpu_dcache_dram_req_if),
|
||||
.gpu_dcache_dram_res_if (gpu_dcache_dram_res_if),
|
||||
.gpu_dcache_snp_req_if (gpu_dcache_snp_req_if),
|
||||
|
||||
// Dram <-> Icache
|
||||
.vx_gpu_icache_dram_req (vx_gpu_icache_dram_req),
|
||||
.vx_gpu_icache_dram_res (vx_gpu_icache_dram_res),
|
||||
.vx_gpu_icache_snp_req (vx_gpu_icache_snp_req),
|
||||
.gpu_icache_dram_req_if (gpu_icache_dram_req_if),
|
||||
.gpu_icache_dram_res_if (gpu_icache_dram_res_if),
|
||||
.gpu_icache_snp_req_if (gpu_icache_snp_req_if),
|
||||
|
||||
// Core <-> Icache
|
||||
.vx_icache_req (vx_icache_req),
|
||||
.vx_icache_rsp (vx_icache_rsp),
|
||||
.icache_req_if (icache_req_if),
|
||||
.icache_rsp_if (icache_rsp_if),
|
||||
|
||||
// Core <-> Dcache
|
||||
.vx_dcache_req (vx_dcache_req_qual),
|
||||
.vx_dcache_rsp (vx_dcache_rsp)
|
||||
.dcache_req_if (dcache_req_qual_if),
|
||||
.dcache_rsp_if (dcache_rsp_if)
|
||||
);
|
||||
|
||||
// VX_csr_handler vx_csr_handler(
|
||||
// VX_csr_handler csr_handler(
|
||||
// .clk (clk),
|
||||
// .in_decode_csr_address(decode_csr_address),
|
||||
// .vx_csr_w_req (vx_csr_w_req),
|
||||
// .in_wb_valid (vx_writeback_if.wb_valid[0]),
|
||||
// .csr_w_req_if (csr_w_req_if),
|
||||
// .in_wb_valid (writeback_if.wb_valid[0]),
|
||||
// .out_decode_csr_data (csr_decode_csr_data)
|
||||
// );
|
||||
|
||||
|
|
|
@ -449,7 +449,7 @@ module VX_bank #(
|
|||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
) vx_tag_data_access (
|
||||
) tag_data_access (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_bank_pipe),
|
||||
|
@ -477,7 +477,7 @@ module VX_bank #(
|
|||
.miss_st1e (miss_st1e),
|
||||
.dirty_st1e (dirty_st1e),
|
||||
.fill_saw_dirty_st1e(fill_saw_dirty_st1e)
|
||||
);
|
||||
);
|
||||
|
||||
wire qual_valid_st1e_2 = valid_st1[STAGE_1_CYCLES-1] && !is_fill_st1[STAGE_1_CYCLES-1];
|
||||
|
||||
|
@ -581,7 +581,7 @@ module VX_bank #(
|
|||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
) vx_fill_invalidator (
|
||||
) fill_invalidator (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.possible_fill (possible_fill),
|
||||
|
|
|
@ -157,7 +157,7 @@ module VX_cache #(
|
|||
.PRFQ_SIZE (PRFQ_SIZE),
|
||||
.PRFQ_STRIDE (PRFQ_STRIDE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES (SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
) vx_cache_dram_req_arb (
|
||||
) cache_dram_req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.dfqq_full (dfqq_full),
|
||||
|
@ -191,7 +191,7 @@ module VX_cache #(
|
|||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES (SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
) vx_cache_core_req_bank_sell (
|
||||
) cache_core_req_bank_sell (
|
||||
.core_req_valid (core_req_valid),
|
||||
.core_req_addr (core_req_addr),
|
||||
.per_bank_valids (per_bank_valids)
|
||||
|
@ -215,7 +215,7 @@ module VX_cache #(
|
|||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
) vx_cache_core_wb_sel_merge (
|
||||
) cache_core_wb_sel_merge (
|
||||
.per_bank_wb_valid (per_bank_wb_valid),
|
||||
.per_bank_wb_tid (per_bank_wb_tid),
|
||||
.per_bank_wb_rd (per_bank_wb_rd),
|
||||
|
@ -239,7 +239,7 @@ module VX_cache #(
|
|||
// Snoop Forward Logic
|
||||
VX_snp_fwd_arb #(
|
||||
.NUM_BANKS(NUM_BANKS)
|
||||
) vx_snp_fwd_arb(
|
||||
) snp_fwd_arb(
|
||||
.per_bank_snp_fwd (per_bank_snp_fwd),
|
||||
.per_bank_snp_fwd_addr(per_bank_snp_fwd_addr),
|
||||
.per_bank_snp_fwd_pop (per_bank_snp_fwd_pop),
|
||||
|
|
|
@ -94,7 +94,7 @@ module VX_cache_dfq_queue #(
|
|||
|
||||
VX_generic_priority_encoder #(
|
||||
.N(NUM_BANKS)
|
||||
) vx_sel_bank (
|
||||
) sel_bank (
|
||||
.valids(qual_bank_dram_fill_req),
|
||||
.index (qual_request_index),
|
||||
.found (qual_has_request)
|
||||
|
|
|
@ -102,7 +102,7 @@ module VX_cache_dram_req_arb #(
|
|||
wire dfqq_pop = !dwb_valid && dfqq_req && !dram_req_full; // If no dwb, and dfqq has valids, then pop
|
||||
wire dfqq_push = (|per_bank_dram_fill_req_valid);
|
||||
|
||||
VX_cache_dfq_queue vx_cache_dfq_queue(
|
||||
VX_cache_dfq_queue cache_dfq_queue(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.dfqq_push (dfqq_push),
|
||||
|
@ -121,7 +121,7 @@ module VX_cache_dram_req_arb #(
|
|||
|
||||
VX_generic_priority_encoder #(
|
||||
.N(NUM_BANKS)
|
||||
) vx_sel_dwb (
|
||||
) sel_dwb (
|
||||
.valids(use_wb_valid),
|
||||
.index (dwb_bank),
|
||||
.found (dwb_valid)
|
||||
|
|
|
@ -142,10 +142,11 @@ module VX_cache_req_queue #(
|
|||
assign qual_pc = use_per_pc;
|
||||
|
||||
wire[`LOG2UP(NUM_REQUESTS)-1:0] qual_request_index;
|
||||
wire qual_has_request;
|
||||
wire qual_has_request;
|
||||
|
||||
VX_generic_priority_encoder #(
|
||||
.N(NUM_REQUESTS)
|
||||
) vx_sel_bank (
|
||||
) sel_bank (
|
||||
.valids(qual_valids),
|
||||
.index (qual_request_index),
|
||||
.found (qual_has_request)
|
||||
|
|
|
@ -80,7 +80,7 @@ module VX_cache_wb_sel_merge #(
|
|||
|
||||
VX_generic_priority_encoder #(
|
||||
.N(NUM_BANKS)
|
||||
) vx_sel_bank (
|
||||
) sel_bank (
|
||||
.valids(per_bank_wb_valid),
|
||||
.index (main_bank_index),
|
||||
.found (found_bank)
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_dcache_llv_resp_bank_sel
|
||||
#(
|
||||
module VX_dcache_llv_resp_bank_sel #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE_BYTES = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
|
@ -15,8 +14,7 @@ module VX_dcache_llv_resp_bank_sel
|
|||
// Number of cycles to complete stage 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Request Queue Size
|
||||
parameter REQQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
|
@ -26,7 +24,7 @@ module VX_dcache_llv_resp_bank_sel
|
|||
// Snoop Req Queue
|
||||
parameter SNRQ_SIZE = 8,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
|
@ -39,12 +37,9 @@ module VX_dcache_llv_resp_bank_sel
|
|||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
parameter FILL_INVALIDAOR_SIZE = 16,
|
||||
|
||||
// Dram knobs
|
||||
// Dram knobs
|
||||
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
|
||||
|
||||
|
||||
)
|
||||
(
|
||||
) (
|
||||
output reg [NUM_BANKS-1:0] per_bank_llvq_pop,
|
||||
input wire[NUM_BANKS-1:0] per_bank_llvq_valid,
|
||||
input wire[NUM_BANKS-1:0][31:0] per_bank_llvq_rsp_addr,
|
||||
|
@ -55,20 +50,19 @@ module VX_dcache_llv_resp_bank_sel
|
|||
output reg[NUM_REQUESTS-1:0] llvq_valid,
|
||||
output reg[NUM_REQUESTS-1:0][31:0] llvq_rsp_addr,
|
||||
output reg[NUM_REQUESTS-1:0][`BANK_LINE_WORDS-1:0][31:0] llvq_rsp_data
|
||||
|
||||
|
||||
);
|
||||
|
||||
wire [(`LOG2UP(NUM_BANKS))-1:0] main_bank_index;
|
||||
wire found_bank;
|
||||
wire found_bank;
|
||||
|
||||
VX_generic_priority_encoder #(.N(NUM_BANKS)) vx_sel_bank(
|
||||
.valids(per_bank_llvq_valid),
|
||||
.index (main_bank_index),
|
||||
.found (found_bank)
|
||||
VX_generic_priority_encoder #(
|
||||
.N(NUM_BANKS)
|
||||
) sel_bank(
|
||||
.valids(per_bank_llvq_valid),
|
||||
.index (main_bank_index),
|
||||
.found (found_bank)
|
||||
);
|
||||
|
||||
|
||||
always @(*) begin
|
||||
llvq_valid = 0;
|
||||
llvq_rsp_addr = 0;
|
||||
|
|
|
@ -86,7 +86,7 @@ module VX_fill_invalidator
|
|||
|
||||
VX_generic_priority_encoder #(
|
||||
.N(FILL_INVALIDAOR_SIZE)
|
||||
) vx_sel_bank (
|
||||
) sel_bank (
|
||||
.valids(~fills_active),
|
||||
.index (enqueue_index),
|
||||
.found (enqueue_found)
|
||||
|
@ -136,7 +136,7 @@ module VX_fill_invalidator
|
|||
// wire [(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index;
|
||||
// wire enqueue_found;
|
||||
|
||||
// VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) vx_sel_bank(
|
||||
// VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) sel_bank(
|
||||
// .valids(~fills_active),
|
||||
// .index (enqueue_index),
|
||||
// .found (enqueue_found)
|
||||
|
|
|
@ -22,7 +22,7 @@ module VX_snp_fwd_arb
|
|||
|
||||
VX_generic_priority_encoder #(
|
||||
.N(NUM_BANKS)
|
||||
) vx_sel_ffsq(
|
||||
) sel_ffsq (
|
||||
.valids(qual_per_bank_snp_fwd),
|
||||
.index (fsq_bank),
|
||||
.found (fsq_valid)
|
||||
|
|
|
@ -110,7 +110,7 @@ module VX_tag_data_access #(
|
|||
.LLVQ_SIZE (LLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
) vx_tag_data_structure (
|
||||
) tag_data_structure (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall_bank_pipe(stall_bank_pipe),
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
`include "../VX_define.vh"
|
||||
|
||||
module VX_d_e_reg (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire in_branch_stall,
|
||||
input wire in_freeze,
|
||||
VX_frE_to_bckE_req_if vx_frE_to_bckE_req,
|
||||
VX_frE_to_bckE_req_if vx_bckE_req
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire in_branch_stall,
|
||||
input wire in_freeze,
|
||||
VX_frE_to_bckE_req_if frE_to_bckE_req_if,
|
||||
VX_frE_to_bckE_req_if bckE_req_if
|
||||
);
|
||||
|
||||
wire stall = in_freeze;
|
||||
|
@ -19,8 +19,8 @@ module VX_d_e_reg (
|
|||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (flush),
|
||||
.in ({vx_frE_to_bckE_req.csr_address, vx_frE_to_bckE_req.jalQual, vx_frE_to_bckE_req.ebreak, vx_frE_to_bckE_req.is_csr, vx_frE_to_bckE_req.csr_immed, vx_frE_to_bckE_req.csr_mask, vx_frE_to_bckE_req.rd, vx_frE_to_bckE_req.rs1, vx_frE_to_bckE_req.rs2, vx_frE_to_bckE_req.alu_op, vx_frE_to_bckE_req.wb, vx_frE_to_bckE_req.rs2_src, vx_frE_to_bckE_req.itype_immed, vx_frE_to_bckE_req.mem_read, vx_frE_to_bckE_req.mem_write, vx_frE_to_bckE_req.branch_type, vx_frE_to_bckE_req.upper_immed, vx_frE_to_bckE_req.curr_PC, vx_frE_to_bckE_req.jal, vx_frE_to_bckE_req.jal_offset, vx_frE_to_bckE_req.PC_next, vx_frE_to_bckE_req.valid, vx_frE_to_bckE_req.warp_num, vx_frE_to_bckE_req.is_wspawn, vx_frE_to_bckE_req.is_tmc, vx_frE_to_bckE_req.is_split, vx_frE_to_bckE_req.is_barrier}),
|
||||
.out ({vx_bckE_req.csr_address , vx_bckE_req.jalQual , vx_bckE_req.ebreak ,vx_bckE_req.is_csr , vx_bckE_req.csr_immed , vx_bckE_req.csr_mask , vx_bckE_req.rd , vx_bckE_req.rs1 , vx_bckE_req.rs2 , vx_bckE_req.alu_op , vx_bckE_req.wb , vx_bckE_req.rs2_src , vx_bckE_req.itype_immed , vx_bckE_req.mem_read , vx_bckE_req.mem_write , vx_bckE_req.branch_type , vx_bckE_req.upper_immed , vx_bckE_req.curr_PC , vx_bckE_req.jal , vx_bckE_req.jal_offset , vx_bckE_req.PC_next , vx_bckE_req.valid , vx_bckE_req.warp_num , vx_bckE_req.is_wspawn , vx_bckE_req.is_tmc , vx_bckE_req.is_split , vx_bckE_req.is_barrier })
|
||||
.in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.ebreak, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}),
|
||||
.out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.ebreak ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier })
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -28,21 +28,22 @@ module VX_priority_encoder_sm
|
|||
reg[`NUM_THREADS-1:0] left_requests;
|
||||
reg[`NUM_THREADS-1:0] serviced;
|
||||
|
||||
|
||||
wire[`NUM_THREADS-1:0] use_valid;
|
||||
|
||||
|
||||
wire requests_left = (|left_requests);
|
||||
|
||||
assign use_valid = (requests_left) ? left_requests : in_valid;
|
||||
|
||||
|
||||
wire[NB:0][`NUM_THREADS-1:0] bank_valids;
|
||||
VX_bank_valids #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_bank_valid(
|
||||
|
||||
VX_bank_valids #(
|
||||
.NB(NB),
|
||||
.BITS_PER_BANK(BITS_PER_BANK)
|
||||
) bank_valid (
|
||||
.in_valids(use_valid),
|
||||
.in_addr(in_address),
|
||||
.bank_valids(bank_valids)
|
||||
);
|
||||
);
|
||||
|
||||
wire[NB:0] more_than_one_valid;
|
||||
|
||||
|
@ -73,11 +74,13 @@ module VX_priority_encoder_sm
|
|||
generate
|
||||
for (curr_bank_o = 0; curr_bank_o <= NB; curr_bank_o = curr_bank_o + 1) begin : encoders
|
||||
|
||||
VX_generic_priority_encoder #(.N(NUM_REQ)) vx_priority_encoder(
|
||||
VX_generic_priority_encoder #(
|
||||
.N(NUM_REQ)
|
||||
) priority_encoder (
|
||||
.valids(bank_valids[curr_bank_o]),
|
||||
.index(internal_req_num[curr_bank_o]),
|
||||
.found(internal_out_valid[curr_bank_o])
|
||||
);
|
||||
);
|
||||
assign out_address[curr_bank_o] = internal_out_valid[curr_bank_o] ? in_address[internal_req_num[curr_bank_o]] : 0;
|
||||
assign out_data[curr_bank_o] = internal_out_valid[curr_bank_o] ? in_data[internal_req_num[curr_bank_o]] : 0;
|
||||
end
|
||||
|
@ -91,11 +94,9 @@ module VX_priority_encoder_sm
|
|||
end
|
||||
end
|
||||
|
||||
|
||||
assign req_num = internal_req_num;
|
||||
assign out_valid = internal_out_valid;
|
||||
|
||||
|
||||
wire[`NUM_THREADS-1:0] serviced_qual = in_valid & (serviced);
|
||||
|
||||
wire[`NUM_THREADS-1:0] new_left_requests = (left_requests == 0) ? (in_valid & ~serviced_qual) : (left_requests & ~ serviced_qual);
|
||||
|
|
|
@ -1,23 +1,21 @@
|
|||
`include "../VX_define.vh"
|
||||
|
||||
module VX_shared_memory
|
||||
#(
|
||||
parameter SM_SIZE = 4096, // Bytes
|
||||
parameter SM_BANKS = 4,
|
||||
parameter SM_BYTES_PER_READ = 16,
|
||||
parameter SM_WORDS_PER_READ = 4,
|
||||
parameter SM_LOG_WORDS_PER_READ = 2,
|
||||
parameter SM_HEIGHT = 128, // Bytes
|
||||
parameter SM_BANK_OFFSET_START = 2,
|
||||
parameter SM_BANK_OFFSET_END = 4,
|
||||
parameter SM_BLOCK_OFFSET_START = 5,
|
||||
parameter SM_BLOCK_OFFSET_END = 6,
|
||||
parameter SM_INDEX_START = 7,
|
||||
parameter SM_INDEX_END = 13,
|
||||
parameter NUM_REQ = 4,
|
||||
parameter BITS_PER_BANK = 3
|
||||
)
|
||||
(
|
||||
module VX_shared_memory #(
|
||||
parameter SM_SIZE = 4096, // Bytes
|
||||
parameter SM_BANKS = 4,
|
||||
parameter SM_BYTES_PER_READ = 16,
|
||||
parameter SM_WORDS_PER_READ = 4,
|
||||
parameter SM_LOG_WORDS_PER_READ = 2,
|
||||
parameter SM_HEIGHT = 128, // Bytes
|
||||
parameter SM_BANK_OFFSET_START = 2,
|
||||
parameter SM_BANK_OFFSET_END = 4,
|
||||
parameter SM_BLOCK_OFFSET_START = 5,
|
||||
parameter SM_BLOCK_OFFSET_END = 6,
|
||||
parameter SM_INDEX_START = 7,
|
||||
parameter SM_INDEX_END = 13,
|
||||
parameter NUM_REQ = 4,
|
||||
parameter BITS_PER_BANK = 3
|
||||
) (
|
||||
//INPUTS
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -30,148 +28,144 @@ module VX_shared_memory
|
|||
output wire[`NUM_THREADS-1:0] out_valid,
|
||||
output wire[`NUM_THREADS-1:0][31:0] out_data,
|
||||
output wire stall
|
||||
);
|
||||
|
||||
//reg [NB:0][31:0] temp_address;
|
||||
//reg [NB:0][31:0] temp_in_data;
|
||||
//reg [NB:0] temp_in_valid;
|
||||
reg [SM_BANKS - 1:0][31:0] temp_address;
|
||||
reg [SM_BANKS - 1:0][31:0] temp_in_data;
|
||||
reg [SM_BANKS - 1:0] temp_in_valid;
|
||||
|
||||
reg [`NUM_THREADS-1:0] temp_out_valid;
|
||||
reg [`NUM_THREADS-1:0][31:0] temp_out_data;
|
||||
|
||||
//reg [NB:0][6:0] block_addr;
|
||||
//reg [NB:0][3:0][31:0] block_wdata;
|
||||
//reg [NB:0][3:0][31:0] block_rdata;
|
||||
//reg [NB:0][1:0] block_we;
|
||||
reg [SM_BANKS - 1:0][$clog2(SM_HEIGHT) - 1:0] block_addr;
|
||||
reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_wdata;
|
||||
reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_rdata;
|
||||
reg [SM_BANKS - 1:0][SM_LOG_WORDS_PER_READ-1:0] block_we;
|
||||
|
||||
wire send_data;
|
||||
|
||||
//reg [NB:0][1:0] req_num;
|
||||
reg [SM_BANKS - 1:0][`LOG2UP(NUM_REQ) - 1:0] req_num; // not positive about this
|
||||
|
||||
wire [`NUM_THREADS-1:0] orig_in_valid;
|
||||
|
||||
genvar f;
|
||||
generate
|
||||
for(f = 0; f < `NUM_THREADS; f = f+1) begin : orig_in_valid_setup
|
||||
assign orig_in_valid[f] = in_valid[f];
|
||||
end
|
||||
|
||||
assign out_valid = send_data ? temp_out_valid : 0;
|
||||
assign out_data = send_data ? temp_out_data : 0;
|
||||
endgenerate
|
||||
|
||||
VX_priority_encoder_sm #(
|
||||
.NB(SM_BANKS - 1),
|
||||
.BITS_PER_BANK(BITS_PER_BANK),
|
||||
.NUM_REQ(NUM_REQ)
|
||||
) priority_encoder_sm (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.in_valid(orig_in_valid),
|
||||
.in_address(in_address),
|
||||
.in_data(in_data),
|
||||
|
||||
.out_valid(temp_in_valid),
|
||||
.out_address(temp_address),
|
||||
.out_data(temp_in_data),
|
||||
|
||||
.req_num(req_num),
|
||||
.stall(stall),
|
||||
.send_data(send_data)
|
||||
);
|
||||
|
||||
//reg[NB:0][31:0] temp_address;
|
||||
//reg[NB:0][31:0] temp_in_data;
|
||||
//reg[NB:0] temp_in_valid;
|
||||
reg[SM_BANKS - 1:0][31:0] temp_address;
|
||||
reg[SM_BANKS - 1:0][31:0] temp_in_data;
|
||||
reg[SM_BANKS - 1:0] temp_in_valid;
|
||||
genvar j;
|
||||
integer i;
|
||||
generate
|
||||
for (j=0; j<= SM_BANKS - 1; j=j+1) begin : shared_mem_blocks
|
||||
|
||||
reg[`NUM_THREADS-1:0] temp_out_valid;
|
||||
reg[`NUM_THREADS-1:0][31:0] temp_out_data;
|
||||
wire shm_write = (mem_write != `NO_MEM_WRITE) && temp_in_valid[j];
|
||||
|
||||
//reg [NB:0][6:0] block_addr;
|
||||
//reg [NB:0][3:0][31:0] block_wdata;
|
||||
//reg [NB:0][3:0][31:0] block_rdata;
|
||||
//reg [NB:0][1:0] block_we;
|
||||
reg [SM_BANKS - 1:0][$clog2(SM_HEIGHT) - 1:0] block_addr;
|
||||
reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_wdata;
|
||||
reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_rdata;
|
||||
reg [SM_BANKS - 1:0][SM_LOG_WORDS_PER_READ-1:0] block_we;
|
||||
VX_shared_memory_block #(
|
||||
.SMB_HEIGHT(SM_HEIGHT),
|
||||
.SMB_WORDS_PER_READ(SM_WORDS_PER_READ),
|
||||
.SMB_LOG_WORDS_PER_READ(SM_LOG_WORDS_PER_READ)
|
||||
) shared_memory_block (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.addr (block_addr[j]),
|
||||
.wdata (block_wdata[j]),
|
||||
.we (block_we[j]),
|
||||
.shm_write(shm_write),
|
||||
.data_out (block_rdata[j])
|
||||
);
|
||||
end
|
||||
|
||||
wire send_data;
|
||||
|
||||
//reg[NB:0][1:0] req_num;
|
||||
reg[SM_BANKS - 1:0][`LOG2UP(NUM_REQ) - 1:0] req_num; // not positive about this
|
||||
|
||||
wire [`NUM_THREADS-1:0] orig_in_valid;
|
||||
|
||||
genvar f;
|
||||
generate
|
||||
for(f = 0; f < `NUM_THREADS; f = f+1) begin : orig_in_valid_setup
|
||||
assign orig_in_valid[f] = in_valid[f];
|
||||
end
|
||||
|
||||
assign out_valid = send_data ? temp_out_valid : 0;
|
||||
assign out_data = send_data ? temp_out_data : 0;
|
||||
endgenerate
|
||||
|
||||
|
||||
//VX_priority_encoder_sm #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_priority_encoder_sm(
|
||||
VX_priority_encoder_sm #(.NB(SM_BANKS - 1), .BITS_PER_BANK(BITS_PER_BANK), .NUM_REQ(NUM_REQ)) vx_priority_encoder_sm(
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.in_valid(orig_in_valid),
|
||||
.in_address(in_address),
|
||||
.in_data(in_data),
|
||||
|
||||
.out_valid(temp_in_valid),
|
||||
.out_address(temp_address),
|
||||
.out_data(temp_in_data),
|
||||
|
||||
.req_num(req_num),
|
||||
.stall(stall),
|
||||
.send_data(send_data)
|
||||
);
|
||||
|
||||
|
||||
genvar j;
|
||||
integer i;
|
||||
generate
|
||||
//for(j=0; j<= NB; j=j+1) begin : sm_mem_block
|
||||
for(j=0; j<= SM_BANKS - 1; j=j+1) begin : shared_mem_blocks
|
||||
|
||||
wire shm_write = (mem_write != `NO_MEM_WRITE) && temp_in_valid[j];
|
||||
|
||||
VX_shared_memory_block#
|
||||
(
|
||||
.SMB_HEIGHT(SM_HEIGHT),
|
||||
.SMB_WORDS_PER_READ(SM_WORDS_PER_READ),
|
||||
.SMB_LOG_WORDS_PER_READ(SM_LOG_WORDS_PER_READ)
|
||||
) vx_shared_memory_block
|
||||
(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.addr (block_addr[j]),
|
||||
.wdata (block_wdata[j]),
|
||||
.we (block_we[j]),
|
||||
.shm_write(shm_write),
|
||||
.data_out (block_rdata[j])
|
||||
);
|
||||
end
|
||||
|
||||
|
||||
always @(*) begin
|
||||
block_addr = 0;
|
||||
block_we = 0;
|
||||
block_wdata = 0;
|
||||
//for(i = 0; i <= NB; i = i+1) begin
|
||||
for(i = 0; i <= SM_BANKS - 1; i = i+1) begin
|
||||
if(temp_in_valid[i] == 1'b1) begin
|
||||
//1. Check if the request is actually to the shared memory
|
||||
if((temp_address[i][31:24]) == 8'hFF) begin
|
||||
// STORES
|
||||
if(mem_write != `NO_MEM_WRITE) begin
|
||||
if(mem_write == `SB_MEM_WRITE) begin
|
||||
//TODO
|
||||
end
|
||||
else if(mem_write == `SH_MEM_WRITE) begin
|
||||
//TODO
|
||||
end
|
||||
else if(mem_write == `SW_MEM_WRITE) begin
|
||||
//block_addr[i] = temp_address[i][13:7];
|
||||
//block_we[i] = temp_address[i][6:5];
|
||||
//block_wdata[i][temp_address[i][6:5]] = temp_in_data[i];
|
||||
block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START];
|
||||
block_we[i] = temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START];
|
||||
block_wdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]] = temp_in_data[i];
|
||||
end
|
||||
end
|
||||
//LOADS
|
||||
else if(mem_read != `NO_MEM_READ) begin
|
||||
if(mem_read == `LB_MEM_READ) begin
|
||||
//TODO
|
||||
end
|
||||
else if (mem_read == `LH_MEM_READ)
|
||||
begin
|
||||
//TODO
|
||||
end
|
||||
else if (mem_read == `LW_MEM_READ)
|
||||
begin
|
||||
//block_addr[i] = temp_address[i][13:7];
|
||||
//temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][6:5]];
|
||||
//temp_out_valid[req_num[i]] = 1'b1;
|
||||
block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START];
|
||||
temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]];
|
||||
temp_out_valid[req_num[i]] = 1'b1;
|
||||
end
|
||||
else if (mem_read == `LBU_MEM_READ)
|
||||
begin
|
||||
//TODO
|
||||
end
|
||||
else if (mem_read == `LHU_MEM_READ)
|
||||
begin
|
||||
//TODO
|
||||
always @(*) begin
|
||||
block_addr = 0;
|
||||
block_we = 0;
|
||||
block_wdata = 0;
|
||||
//for(i = 0; i <= NB; i = i+1) begin
|
||||
for (i = 0; i <= SM_BANKS - 1; i = i+1) begin
|
||||
if (temp_in_valid[i] == 1'b1) begin
|
||||
//1. Check if the request is actually to the shared memory
|
||||
if ((temp_address[i][31:24]) == 8'hFF) begin
|
||||
// STORES
|
||||
if (mem_write != `NO_MEM_WRITE) begin
|
||||
if (mem_write == `SB_MEM_WRITE) begin
|
||||
//TODO
|
||||
end
|
||||
else if (mem_write == `SH_MEM_WRITE) begin
|
||||
//TODO
|
||||
end
|
||||
else if (mem_write == `SW_MEM_WRITE) begin
|
||||
//block_addr[i] = temp_address[i][13:7];
|
||||
//block_we[i] = temp_address[i][6:5];
|
||||
//block_wdata[i][temp_address[i][6:5]] = temp_in_data[i];
|
||||
block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START];
|
||||
block_we[i] = temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START];
|
||||
block_wdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]] = temp_in_data[i];
|
||||
end
|
||||
end
|
||||
//LOADS
|
||||
else if(mem_read != `NO_MEM_READ) begin
|
||||
if(mem_read == `LB_MEM_READ) begin
|
||||
//TODO
|
||||
end
|
||||
else if (mem_read == `LH_MEM_READ)
|
||||
begin
|
||||
//TODO
|
||||
end
|
||||
else if (mem_read == `LW_MEM_READ)
|
||||
begin
|
||||
//block_addr[i] = temp_address[i][13:7];
|
||||
//temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][6:5]];
|
||||
//temp_out_valid[req_num[i]] = 1'b1;
|
||||
block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START];
|
||||
temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]];
|
||||
temp_out_valid[req_num[i]] = 1'b1;
|
||||
end
|
||||
else if (mem_read == `LBU_MEM_READ)
|
||||
begin
|
||||
//TODO
|
||||
end
|
||||
else if (mem_read == `LHU_MEM_READ)
|
||||
begin
|
||||
//TODO
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endgenerate
|
||||
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue