mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
rtl refactoring
This commit is contained in:
parent
f142afac80
commit
b7e892ee16
39 changed files with 393 additions and 425 deletions
|
@ -90,7 +90,7 @@ module VX_alu_unit (
|
|||
assign alu_stall = inst_delay_stall;
|
||||
|
||||
always @(*) begin
|
||||
case(alu_op)
|
||||
case (alu_op)
|
||||
`DIV,
|
||||
`DIVU,
|
||||
`REM,
|
||||
|
@ -136,7 +136,7 @@ module VX_alu_unit (
|
|||
assign upper_immed = {upper_immed, {12{1'b0}}};
|
||||
|
||||
always @(*) begin
|
||||
case(alu_op)
|
||||
case (alu_op)
|
||||
`ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2);
|
||||
`SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2);
|
||||
`SLLA: alu_result = ALU_in1 << ALU_in2[4:0];
|
||||
|
@ -177,7 +177,7 @@ module VX_alu_unit (
|
|||
assign upper_immed_s = {upper_immed, {12{1'b0}}};
|
||||
|
||||
always @(*) begin
|
||||
case(alu_op)
|
||||
case (alu_op)
|
||||
`ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2);
|
||||
`SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2);
|
||||
`SLLA: alu_result = ALU_in1 << ALU_in2[4:0];
|
||||
|
|
|
@ -20,7 +20,9 @@ module VX_back_end #(
|
|||
VX_frE_to_bckE_req_if bckE_req_if,
|
||||
VX_wb_if writeback_if,
|
||||
|
||||
VX_warp_ctl_if warp_ctl_if
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
|
||||
output wire ebreak
|
||||
);
|
||||
|
||||
VX_wb_if wb_temp_if();
|
||||
|
@ -69,6 +71,8 @@ module VX_back_end #(
|
|||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
|
||||
assign ebreak = exec_unit_req_if.is_etype && (| exec_unit_req_if.valid);
|
||||
|
||||
VX_lsu_unit lsu_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -81,14 +85,14 @@ module VX_back_end #(
|
|||
);
|
||||
|
||||
VX_exec_unit exec_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.exec_unit_req_if(exec_unit_req_if),
|
||||
.inst_exec_wb_if (inst_exec_wb_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.delay (exec_delay),
|
||||
.no_slot_exec (no_slot_exec)
|
||||
.inst_exec_wb_if(inst_exec_wb_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.delay (exec_delay),
|
||||
.no_slot_exec (no_slot_exec)
|
||||
);
|
||||
|
||||
VX_gpu_inst gpu_inst (
|
||||
|
@ -119,6 +123,6 @@ module VX_back_end #(
|
|||
.no_slot_mem (no_slot_mem),
|
||||
.no_slot_exec (no_slot_exec),
|
||||
.no_slot_csr (no_slot_csr)
|
||||
);
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -9,14 +9,14 @@ module VX_csr_wrapper (
|
|||
wire[`NUM_THREADS-1:0][31:0] thread_ids;
|
||||
wire[`NUM_THREADS-1:0][31:0] warp_ids;
|
||||
|
||||
genvar cur_t, cur_tw;
|
||||
genvar i;
|
||||
generate
|
||||
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin : thread_ids_init
|
||||
assign thread_ids[cur_t] = cur_t;
|
||||
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : thread_ids_init
|
||||
assign thread_ids[i] = i;
|
||||
end
|
||||
|
||||
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init
|
||||
assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num};
|
||||
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : warp_ids_init
|
||||
assign warp_ids[i] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ module VX_dcache_io_arb (
|
|||
|
||||
assign core_req_if.core_req_ready = io_select ? io_core_req_if.core_req_ready : dcache_core_req_if.core_req_ready;
|
||||
|
||||
wire dcache_rsp_valid = (|dcache_core_rsp_if.core_rsp_valid);
|
||||
wire dcache_rsp_valid = (| dcache_core_rsp_if.core_rsp_valid);
|
||||
|
||||
assign core_rsp_if.core_rsp_valid = dcache_rsp_valid ? dcache_core_rsp_if.core_rsp_valid : io_core_rsp_if.core_rsp_valid;
|
||||
assign core_rsp_if.core_rsp_data = dcache_rsp_valid ? dcache_core_rsp_if.core_rsp_data : io_core_rsp_if.core_rsp_data;
|
||||
|
|
|
@ -8,18 +8,15 @@ module VX_decode(
|
|||
// Outputs
|
||||
VX_frE_to_bckE_req_if frE_to_bckE_req_if,
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if,
|
||||
|
||||
output wire terminate_sim
|
||||
VX_join_if join_if
|
||||
);
|
||||
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
||||
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
|
||||
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
|
||||
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
||||
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
|
||||
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
assign frE_to_bckE_req_if.curr_PC = in_curr_PC;
|
||||
|
||||
assign frE_to_bckE_req_if.curr_PC = in_curr_PC;
|
||||
|
||||
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
|
||||
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
|
||||
|
||||
wire[6:0] curr_opcode;
|
||||
|
||||
|
@ -34,7 +31,7 @@ module VX_decode(
|
|||
wire is_auipc;
|
||||
wire is_csr;
|
||||
wire is_csr_immed;
|
||||
wire is_e_inst;
|
||||
wire is_etype;
|
||||
|
||||
wire is_gpgpu;
|
||||
wire is_wspawn;
|
||||
|
@ -47,7 +44,6 @@ module VX_decode(
|
|||
wire[6:0] func7;
|
||||
wire[11:0] u_12;
|
||||
|
||||
|
||||
wire[7:0] jal_b_19_to_12;
|
||||
wire jal_b_11;
|
||||
wire[9:0] jal_b_10_to_1;
|
||||
|
@ -77,11 +73,11 @@ module VX_decode(
|
|||
reg[4:0] alu_op;
|
||||
reg[4:0] mul_alu;
|
||||
reg[19:0] temp_upper_immed;
|
||||
reg temp_jal;
|
||||
reg[31:0] temp_jal_offset;
|
||||
reg[31:0] temp_itype_immed;
|
||||
reg[2:0] temp_branch_type;
|
||||
reg temp_branch_stall;
|
||||
reg temp_jal;
|
||||
reg[31:0] temp_jal_offset;
|
||||
reg[31:0] temp_itype_immed;
|
||||
reg[2:0] temp_branch_type;
|
||||
reg temp_branch_stall;
|
||||
|
||||
assign frE_to_bckE_req_if.valid = fd_inst_meta_de.valid;
|
||||
|
||||
|
@ -89,12 +85,12 @@ module VX_decode(
|
|||
|
||||
assign curr_opcode = in_instruction[6:0];
|
||||
|
||||
assign frE_to_bckE_req_if.rd = in_instruction[11:7];
|
||||
assign frE_to_bckE_req_if.rs1 = in_instruction[19:15];
|
||||
assign frE_to_bckE_req_if.rs2 = in_instruction[24:20];
|
||||
assign func3 = in_instruction[14:12];
|
||||
assign func7 = in_instruction[31:25];
|
||||
assign u_12 = in_instruction[31:20];
|
||||
assign frE_to_bckE_req_if.rd = in_instruction[11:7];
|
||||
assign frE_to_bckE_req_if.rs1 = in_instruction[19:15];
|
||||
assign frE_to_bckE_req_if.rs2 = in_instruction[24:20];
|
||||
assign func3 = in_instruction[14:12];
|
||||
assign func7 = in_instruction[31:25];
|
||||
assign u_12 = in_instruction[31:20];
|
||||
|
||||
assign frE_to_bckE_req_if.PC_next = in_curr_PC + 32'h4;
|
||||
|
||||
|
@ -110,8 +106,6 @@ module VX_decode(
|
|||
assign is_auipc = (curr_opcode == `AUIPC_INST);
|
||||
assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0);
|
||||
assign is_csr_immed = (is_csr) && (func3[2] == 1);
|
||||
// assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0);
|
||||
assign is_e_inst = in_instruction == 32'h00000073;
|
||||
|
||||
assign is_gpgpu = (curr_opcode == `GPGPU_INST);
|
||||
|
||||
|
@ -132,10 +126,10 @@ module VX_decode(
|
|||
assign frE_to_bckE_req_if.csr_immed = is_csr_immed;
|
||||
assign frE_to_bckE_req_if.is_csr = is_csr;
|
||||
|
||||
assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
|
||||
is_linst ? `WB_MEM :
|
||||
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
|
||||
`NO_WB;
|
||||
assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_etype) ? `WB_JAL :
|
||||
is_linst ? `WB_MEM :
|
||||
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
|
||||
`NO_WB;
|
||||
|
||||
assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
|
||||
|
||||
|
@ -145,7 +139,7 @@ module VX_decode(
|
|||
|
||||
// UPPER IMMEDIATE
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
case (curr_opcode)
|
||||
`LUI_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
|
||||
`AUIPC_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
|
||||
default: temp_upper_immed = 20'h0;
|
||||
|
@ -165,34 +159,34 @@ module VX_decode(
|
|||
assign jalr_immed = {func7, frE_to_bckE_req_if.rs2};
|
||||
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
|
||||
|
||||
assign jal_sys_cond1 = func3 == 3'h0;
|
||||
assign jal_sys_cond2 = u_12 < 12'h2;
|
||||
assign jal_sys_cond1 = (func3 == 3'h0);
|
||||
assign jal_sys_cond2 = (u_12 < 12'h2);
|
||||
|
||||
assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0;
|
||||
assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef;
|
||||
|
||||
// JAL
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
case (curr_opcode)
|
||||
`JAL_INST:
|
||||
begin
|
||||
temp_jal = 1'b1 && (|in_valid);
|
||||
temp_jal = 1'b1 && (| in_valid);
|
||||
temp_jal_offset = jal_1_offset;
|
||||
end
|
||||
`JALR_INST:
|
||||
begin
|
||||
temp_jal = 1'b1 && (|in_valid);
|
||||
temp_jal = 1'b1 && (| in_valid);
|
||||
temp_jal_offset = jal_2_offset;
|
||||
end
|
||||
`SYS_INST:
|
||||
begin
|
||||
// $display("SYS EBREAK %h", (jal_sys_jal && (|in_valid)) );
|
||||
temp_jal = jal_sys_jal && (|in_valid);
|
||||
// $display("SYS EBREAK %h", (jal_sys_jal && (| in_valid)));
|
||||
temp_jal = jal_sys_jal && (| in_valid);
|
||||
temp_jal_offset = jal_sys_off;
|
||||
end
|
||||
default:
|
||||
begin
|
||||
temp_jal = 1'b0 && (|in_valid);
|
||||
temp_jal = 1'b0 && (| in_valid);
|
||||
temp_jal_offset = 32'hdeadbeef;
|
||||
end
|
||||
endcase
|
||||
|
@ -202,12 +196,9 @@ module VX_decode(
|
|||
assign frE_to_bckE_req_if.jal = temp_jal;
|
||||
assign frE_to_bckE_req_if.jal_offset = temp_jal_offset;
|
||||
|
||||
// wire is_ebreak;
|
||||
|
||||
// assign is_ebreak = is_e_inst;
|
||||
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
|
||||
assign frE_to_bckE_req_if.ebreak = ebreak;
|
||||
assign terminate_sim = is_e_inst;
|
||||
// ecall/ebreak
|
||||
assign is_etype = (curr_opcode == `SYS_INST) && jal_sys_jal;
|
||||
assign frE_to_bckE_req_if.is_etype = is_etype;
|
||||
|
||||
// CSR
|
||||
|
||||
|
@ -222,60 +213,55 @@ module VX_decode(
|
|||
assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12;
|
||||
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
|
||||
`S_INST: temp_itype_immed = {{20{func7[6]}}, func7, frE_to_bckE_req_if.rd};
|
||||
`L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12};
|
||||
`B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]};
|
||||
default: temp_itype_immed = 32'hdeadbeef;
|
||||
endcase
|
||||
case (curr_opcode)
|
||||
`ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
|
||||
`S_INST: temp_itype_immed = {{20{func7[6]}}, func7, frE_to_bckE_req_if.rd};
|
||||
`L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12};
|
||||
`B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]};
|
||||
default: temp_itype_immed = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign frE_to_bckE_req_if.itype_immed = temp_itype_immed;
|
||||
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`B_INST:
|
||||
begin
|
||||
// $display("BRANCH IN DECODE");
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
case(func3)
|
||||
3'h0: temp_branch_type = `BEQ;
|
||||
3'h1: temp_branch_type = `BNE;
|
||||
3'h4: temp_branch_type = `BLT;
|
||||
3'h5: temp_branch_type = `BGT;
|
||||
3'h6: temp_branch_type = `BLTU;
|
||||
3'h7: temp_branch_type = `BGTU;
|
||||
default: temp_branch_type = `NO_BRANCH;
|
||||
endcase
|
||||
end
|
||||
|
||||
`JAL_INST:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
end
|
||||
`JALR_INST:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b1 && (|in_valid);
|
||||
end
|
||||
default:
|
||||
begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b0 && (|in_valid);
|
||||
end
|
||||
case (curr_opcode)
|
||||
`B_INST: begin
|
||||
// $display("BRANCH IN DECODE");
|
||||
temp_branch_stall = 1'b1 && (| in_valid);
|
||||
case (func3)
|
||||
3'h0: temp_branch_type = `BEQ;
|
||||
3'h1: temp_branch_type = `BNE;
|
||||
3'h4: temp_branch_type = `BLT;
|
||||
3'h5: temp_branch_type = `BGT;
|
||||
3'h6: temp_branch_type = `BLTU;
|
||||
3'h7: temp_branch_type = `BGTU;
|
||||
default: temp_branch_type = `NO_BRANCH;
|
||||
endcase
|
||||
end
|
||||
`JAL_INST: begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b1 && (| in_valid);
|
||||
end
|
||||
`JALR_INST: begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b1 && (| in_valid);
|
||||
end
|
||||
default: begin
|
||||
temp_branch_type = `NO_BRANCH;
|
||||
temp_branch_stall = 1'b0 && (| in_valid);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign frE_to_bckE_req_if.branch_type = temp_branch_type;
|
||||
|
||||
assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid);
|
||||
assign wstall_if.warp_num = in_warp_num;
|
||||
assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (| in_valid);
|
||||
assign wstall_if.warp_num = in_warp_num;
|
||||
|
||||
always @(*) begin
|
||||
// ALU OP
|
||||
case(func3)
|
||||
case (func3)
|
||||
3'h0: alu_op = (curr_opcode == `ALU_INST) ? `ADD : (func7 == 7'h0 ? `ADD : `SUB);
|
||||
3'h1: alu_op = `SLLA;
|
||||
3'h2: alu_op = `SLT;
|
||||
|
@ -290,7 +276,7 @@ module VX_decode(
|
|||
|
||||
always @(*) begin
|
||||
// ALU OP
|
||||
case(func3)
|
||||
case (func3)
|
||||
3'h0: mul_alu = `MUL;
|
||||
3'h1: mul_alu = `MULH;
|
||||
3'h2: mul_alu = `MULHSU;
|
||||
|
@ -306,7 +292,7 @@ module VX_decode(
|
|||
assign csr_type = func3[1:0];
|
||||
|
||||
always @(*) begin
|
||||
case(csr_type)
|
||||
case (csr_type)
|
||||
2'h1: csr_alu = `CSR_ALU_RW;
|
||||
2'h2: csr_alu = `CSR_ALU_RS;
|
||||
2'h3: csr_alu = `CSR_ALU_RC;
|
||||
|
@ -326,7 +312,7 @@ module VX_decode(
|
|||
assign frE_to_bckE_req_if.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
|
||||
|
||||
/*always_comb begin
|
||||
if (1'($time & 1) && |fd_inst_meta_de.valid) begin
|
||||
if (1'($time & 1) && (| fd_inst_meta_de.valid)) begin
|
||||
$display("*** %t: decode: opcode=%h", $time, curr_opcode);
|
||||
end
|
||||
end*/
|
||||
|
|
|
@ -64,7 +64,7 @@ module VX_dmem_ctrl (
|
|||
.PRFQ_SIZE (`SPRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`SPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE),
|
||||
.SNOOP_FORWARDING_ENABLE(0),
|
||||
.SNOOP_FORWARDING (0),
|
||||
.DRAM_ENABLE (0),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
|
||||
|
@ -135,7 +135,7 @@ module VX_dmem_ctrl (
|
|||
.PRFQ_SIZE (`DPRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`DPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE),
|
||||
.SNOOP_FORWARDING_ENABLE(0),
|
||||
.SNOOP_FORWARDING (0),
|
||||
.DRAM_ENABLE (1),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
|
||||
|
@ -206,7 +206,7 @@ module VX_dmem_ctrl (
|
|||
.PRFQ_SIZE (`IPRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`IPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE),
|
||||
.SNOOP_FORWARDING_ENABLE(0),
|
||||
.SNOOP_FORWARDING (0),
|
||||
.DRAM_ENABLE (1),
|
||||
.WRITE_ENABLE (0),
|
||||
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
|
||||
|
|
|
@ -44,27 +44,28 @@ module VX_exec_unit (
|
|||
|
||||
wire[`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire[`NUM_THREADS-1:0] alu_stall;
|
||||
genvar index_out_reg;
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (index_out_reg = 0; index_out_reg < `NUM_THREADS; index_out_reg = index_out_reg + 1) begin : alu_defs
|
||||
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : alu_defs
|
||||
VX_alu_unit alu_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.src_a (in_a_reg_data[index_out_reg]),
|
||||
.src_b (in_b_reg_data[index_out_reg]),
|
||||
.src_a (in_a_reg_data[i]),
|
||||
.src_b (in_b_reg_data[i]),
|
||||
.src_rs2 (in_rs2_src),
|
||||
.itype_immed (in_itype_immed),
|
||||
.upper_immed (in_upper_immed),
|
||||
.alu_op (in_alu_op),
|
||||
.curr_PC (in_curr_PC),
|
||||
.alu_result (alu_result[index_out_reg]),
|
||||
.alu_stall (alu_stall[index_out_reg])
|
||||
.alu_result (alu_result[i]),
|
||||
.alu_stall (alu_stall[i])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire internal_stall;
|
||||
assign internal_stall = |alu_stall;
|
||||
assign internal_stall = (| alu_stall);
|
||||
|
||||
assign delay = no_slot_exec || internal_stall;
|
||||
|
||||
|
@ -98,11 +99,10 @@ module VX_exec_unit (
|
|||
endcase // in_branch_type
|
||||
end
|
||||
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data;
|
||||
genvar i;
|
||||
|
||||
generate
|
||||
for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup
|
||||
for (i = 0; i < `NUM_THREADS; i=i+1) begin
|
||||
assign duplicate_PC_data[i] = exec_unit_req_if.PC_next;
|
||||
end
|
||||
endgenerate
|
||||
|
@ -128,7 +128,7 @@ module VX_exec_unit (
|
|||
assign jal_rsp_temp_if.jal_warp_num = exec_unit_req_if.warp_num;
|
||||
|
||||
// Branch rsp
|
||||
assign branch_rsp_temp_if.valid_branch = (exec_unit_req_if.branch_type != `NO_BRANCH) && (|exec_unit_req_if.valid);
|
||||
assign branch_rsp_temp_if.valid_branch = (exec_unit_req_if.branch_type != `NO_BRANCH) && (| exec_unit_req_if.valid);
|
||||
assign branch_rsp_temp_if.branch_dir = temp_branch_dir;
|
||||
assign branch_rsp_temp_if.branch_warp_num = exec_unit_req_if.warp_num;
|
||||
assign branch_rsp_temp_if.branch_dest = $signed(exec_unit_req_if.curr_PC) + ($signed(exec_unit_req_if.itype_immed) << 1); // itype_immed = branch_offset
|
||||
|
@ -167,7 +167,7 @@ module VX_exec_unit (
|
|||
);
|
||||
|
||||
// always @(*) begin
|
||||
// case(in_alu_op)
|
||||
// case (in_alu_op)
|
||||
// `CSR_ALU_RW: out_csr_result = in_csr_mask;
|
||||
// `CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask;
|
||||
// `CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask);
|
||||
|
|
|
@ -9,8 +9,7 @@ module VX_fetch (
|
|||
input wire icache_stage_delay,
|
||||
input wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
input wire[`NUM_THREADS-1:0] icache_stage_valids,
|
||||
|
||||
output wire ebreak,
|
||||
output wire busy,
|
||||
VX_jal_rsp_if jal_rsp_if,
|
||||
VX_branch_rsp_if branch_rsp_if,
|
||||
VX_inst_meta_if fe_inst_meta_fi,
|
||||
|
@ -45,7 +44,7 @@ module VX_fetch (
|
|||
.ctm_warp_num (warp_ctl_if.warp_num),
|
||||
|
||||
// WHALT
|
||||
.whalt (warp_ctl_if.ebreak),
|
||||
.whalt (warp_ctl_if.whalt),
|
||||
.whalt_warp_num (warp_ctl_if.warp_num),
|
||||
|
||||
// Wstall
|
||||
|
@ -83,7 +82,7 @@ module VX_fetch (
|
|||
.thread_mask (thread_mask),
|
||||
.warp_num (warp_num),
|
||||
.warp_pc (warp_pc),
|
||||
.ebreak (ebreak),
|
||||
.busy (busy),
|
||||
.scheduled_warp (scheduled_warp)
|
||||
);
|
||||
|
||||
|
|
|
@ -15,8 +15,7 @@ module VX_front_end (
|
|||
VX_branch_rsp_if branch_rsp_if,
|
||||
|
||||
VX_frE_to_bckE_req_if bckE_req_if,
|
||||
|
||||
output wire fetch_ebreak
|
||||
output wire busy
|
||||
);
|
||||
|
||||
VX_inst_meta_if fe_inst_meta_fi();
|
||||
|
@ -29,18 +28,13 @@ module VX_front_end (
|
|||
wire total_freeze = schedule_delay;
|
||||
wire icache_stage_delay;
|
||||
|
||||
wire vortex_ebreak;
|
||||
wire terminate_sim;
|
||||
|
||||
wire[`NW_BITS-1:0] icache_stage_wid;
|
||||
wire[`NUM_THREADS-1:0] icache_stage_valids;
|
||||
wire[`NUM_THREADS-1:0] icache_stage_valids;
|
||||
|
||||
assign fetch_ebreak = vortex_ebreak || terminate_sim;
|
||||
VX_wstall_if wstall_if();
|
||||
VX_join_if join_if();
|
||||
|
||||
VX_wstall_if wstall_if();
|
||||
VX_join_if join_if();
|
||||
|
||||
VX_fetch fetch(
|
||||
VX_fetch fetch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
|
@ -52,7 +46,7 @@ module VX_front_end (
|
|||
.warp_ctl_if (warp_ctl_if),
|
||||
.icache_stage_delay (icache_stage_delay),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.ebreak (vortex_ebreak), // fetch_ebreak
|
||||
.busy (busy),
|
||||
.fe_inst_meta_fi (fe_inst_meta_fi)
|
||||
);
|
||||
|
||||
|
@ -91,9 +85,8 @@ module VX_front_end (
|
|||
.fd_inst_meta_de (fd_inst_meta_de),
|
||||
.frE_to_bckE_req_if (frE_to_bckE_req_if),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if),
|
||||
.terminate_sim (terminate_sim)
|
||||
);
|
||||
.join_if (join_if)
|
||||
);
|
||||
|
||||
wire no_br_stall = 0;
|
||||
|
||||
|
|
|
@ -29,13 +29,13 @@ module VX_gpr (
|
|||
);
|
||||
`else
|
||||
assign write_enable = valid_write_request && ((writeback_if.wb != 0));
|
||||
wire going_to_write = write_enable & (|writeback_if.wb_valid);
|
||||
wire going_to_write = write_enable & (| writeback_if.wb_valid);
|
||||
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
|
||||
|
||||
genvar curr_t;
|
||||
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin
|
||||
wire local_write = write_enable & writeback_if.wb_valid[curr_t];
|
||||
assign write_bit_mask[curr_t] = {`NUM_GPRS{~local_write}};
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_THREADS; i=i+1) begin
|
||||
wire local_write = write_enable & writeback_if.wb_valid[i];
|
||||
assign write_bit_mask[i] = {`NUM_GPRS{~local_write}};
|
||||
end
|
||||
|
||||
// wire cenb = !going_to_write;
|
||||
|
@ -50,14 +50,11 @@ module VX_gpr (
|
|||
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_b;
|
||||
|
||||
`ifndef SYN
|
||||
genvar thread;
|
||||
genvar curr_bit;
|
||||
for (thread = 0; thread < `NUM_THREADS; thread = thread + 1)
|
||||
begin
|
||||
for (curr_bit = 0; curr_bit < `NUM_GPRS; curr_bit=curr_bit+1)
|
||||
begin
|
||||
assign a_reg_data[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit];
|
||||
assign b_reg_data[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit];
|
||||
genvar j;
|
||||
for (i = 0; i < `NUM_THREADS; i = i + 1) begin
|
||||
for (j = 0; j < `NUM_GPRS; j=j+1) begin
|
||||
assign a_reg_data[i][j] = ((temp_a[i][j] === 1'dx) || cena_1 )? 1'b0 : temp_a[i][j];
|
||||
assign b_reg_data[i][j] = ((temp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : temp_b[i][j];
|
||||
end
|
||||
end
|
||||
`else
|
||||
|
@ -67,8 +64,7 @@ module VX_gpr (
|
|||
|
||||
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (writeback_if.rd != 0) ? writeback_if.write_data : 0;
|
||||
|
||||
genvar curr_base_thread;
|
||||
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)
|
||||
for (i = 0; i < 'NT; i=i+4)
|
||||
begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
rf2_32x128_wm1 first_ram (
|
||||
|
@ -77,17 +73,17 @@ module VX_gpr (
|
|||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(temp_a[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.QA(temp_a[(i+3):(i)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_1),
|
||||
.AA(gpr_read_if.rs1[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AA(gpr_read_if.rs1[(i+3):(i)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.WENB(write_bit_mask[(i+3):(i)]),
|
||||
.AB(writeback_if.rd[(i+3):(i)]),
|
||||
.DB(to_write[(i+3):(i)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
|
@ -116,17 +112,17 @@ module VX_gpr (
|
|||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(temp_b[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.QA(temp_b[(i+3):(i)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_2),
|
||||
.AA(gpr_read_if.rs2[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AA(gpr_read_if.rs2[(i+3):(i)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.WENB(write_bit_mask[(i+3):(i)]),
|
||||
.AB(writeback_if.rd[(i+3):(i)]),
|
||||
.DB(to_write[(i+3):(i)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
|
|
|
@ -20,13 +20,13 @@ module VX_gpr_ram (
|
|||
//--
|
||||
end else begin
|
||||
if (we) begin
|
||||
integer t;
|
||||
for (t = 0; t < `NUM_THREADS; t = t + 1) begin
|
||||
if (be[t]) begin
|
||||
ram[waddr][t][0] <= wdata[t][7:0];
|
||||
ram[waddr][t][1] <= wdata[t][15:8];
|
||||
ram[waddr][t][2] <= wdata[t][23:16];
|
||||
ram[waddr][t][3] <= wdata[t][31:24];
|
||||
integer i;
|
||||
for (i = 0; i < `NUM_THREADS; i = i + 1) begin
|
||||
if (be[i]) begin
|
||||
ram[waddr][i][0] <= wdata[i][7:0];
|
||||
ram[waddr][i][1] <= wdata[i][15:8];
|
||||
ram[waddr][i][2] <= wdata[i][23:16];
|
||||
ram[waddr][i][3] <= wdata[i][31:24];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -59,9 +59,6 @@ module VX_gpr_stage (
|
|||
.b_reg_data (gpr_datf_if.b_reg_data)
|
||||
);
|
||||
|
||||
// assign bckE_req_if.is_csr = is_csr;
|
||||
// assign bckE_req_out_if.csr_mask = (bckE_req_if.sr_immed == 1'b1) ? {27'h0, bckE_req_if.rs1} : gpr_data_if.a_reg_data[0];
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_if exec_unit_req_temp_if();
|
||||
VX_lsu_req_if lsu_req_temp_if();
|
||||
|
@ -77,7 +74,7 @@ module VX_gpr_stage (
|
|||
.csr_req_if (csr_req_temp_if)
|
||||
);
|
||||
`DEBUG_BEGIN
|
||||
wire is_lsu = (|lsu_req_temp_if.valid);
|
||||
wire is_lsu = (| lsu_req_temp_if.valid);
|
||||
`DEBUG_END
|
||||
wire stall_rest = 0;
|
||||
wire flush_rest = schedule_delay;
|
||||
|
@ -88,7 +85,7 @@ module VX_gpr_stage (
|
|||
wire stall_exec = exec_delay;
|
||||
wire flush_exec = schedule_delay && !stall_exec;
|
||||
|
||||
wire stall_csr = stall_gpr_csr && bckE_req_if.is_csr && (|bckE_req_if.valid);
|
||||
wire stall_csr = stall_gpr_csr && bckE_req_if.is_csr && (| bckE_req_if.valid);
|
||||
|
||||
assign gpr_stage_delay = stall_lsu || stall_exec || stall_csr;
|
||||
|
||||
|
@ -149,8 +146,8 @@ module VX_gpr_stage (
|
|||
.reset (reset),
|
||||
.stall (stall_exec),
|
||||
.flush (flush_exec),
|
||||
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
|
||||
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
|
||||
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
|
||||
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
|
||||
);
|
||||
|
||||
assign exec_unit_req_if.a_reg_data = real_base_address;
|
||||
|
@ -202,8 +199,8 @@ module VX_gpr_stage (
|
|||
.reset (reset),
|
||||
.stall (stall_exec),
|
||||
.flush (flush_exec),
|
||||
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
|
||||
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
|
||||
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
|
||||
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
|
|
|
@ -15,10 +15,10 @@ module VX_gpr_wrapper (
|
|||
wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_b_reg_data;
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] jal_data;
|
||||
genvar index;
|
||||
genvar i;
|
||||
generate
|
||||
for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign
|
||||
assign jal_data[index] = gpr_jal_if.curr_PC;
|
||||
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : jal_data_assign
|
||||
assign jal_data[i] = gpr_jal_if.curr_PC;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
@ -46,22 +46,19 @@ module VX_gpr_wrapper (
|
|||
|
||||
`endif
|
||||
|
||||
genvar warp_index;
|
||||
generate
|
||||
|
||||
for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs
|
||||
wire valid_write_request = warp_index == writeback_if.warp_num;
|
||||
generate
|
||||
for (i = 0; i < `NUM_WARPS; i = i + 1) begin : warp_gprs
|
||||
wire valid_write_request = i == writeback_if.warp_num;
|
||||
VX_gpr gpr(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_write_request (valid_write_request),
|
||||
.gpr_read_if (gpr_read_if),
|
||||
.writeback_if (writeback_if),
|
||||
.a_reg_data (temp_a_reg_data[warp_index]),
|
||||
.b_reg_data (temp_b_reg_data[warp_index])
|
||||
.a_reg_data (temp_a_reg_data[i]),
|
||||
.b_reg_data (temp_b_reg_data[i])
|
||||
);
|
||||
end
|
||||
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -13,31 +13,29 @@ module VX_gpu_inst (
|
|||
wire[`NUM_THREADS-1:0] tmc_new_mask;
|
||||
wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0];
|
||||
|
||||
genvar curr_t;
|
||||
genvar i;
|
||||
generate
|
||||
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init
|
||||
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < gpu_inst_req_if.a_reg_data[0];
|
||||
for (i = 0; i < `NUM_THREADS; i=i+1) begin : tmc_new_mask_init
|
||||
assign tmc_new_mask[i] = all_threads ? 1 : i < gpu_inst_req_if.a_reg_data[0];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire valid_inst = (|curr_valids);
|
||||
wire valid_inst = (| curr_valids);
|
||||
|
||||
assign warp_ctl_if.warp_num = gpu_inst_req_if.warp_num;
|
||||
assign warp_ctl_if.change_mask = (gpu_inst_req_if.is_tmc) && valid_inst;
|
||||
assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0;
|
||||
|
||||
// assign warp_ctl_if.ebreak = (gpu_inst_req_if.a_reg_data[0] == 0) && valid_inst;
|
||||
assign warp_ctl_if.ebreak = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0);
|
||||
assign warp_ctl_if.whalt = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0);
|
||||
|
||||
wire wspawn = gpu_inst_req_if.is_wspawn;
|
||||
wire[31:0] wspawn_pc = gpu_inst_req_if.rd2;
|
||||
wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0];
|
||||
wire[`NUM_WARPS-1:0] wspawn_new_active;
|
||||
|
||||
genvar curr_w;
|
||||
generate
|
||||
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init
|
||||
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < gpu_inst_req_if.a_reg_data[0];
|
||||
for (i = 0; i < `NUM_WARPS; i=i+1) begin : wspawn_new_active_init
|
||||
assign wspawn_new_active[i] = all_active ? 1 : i < gpu_inst_req_if.a_reg_data[0];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
@ -57,14 +55,11 @@ module VX_gpu_inst (
|
|||
wire[`NUM_THREADS-1:0] split_new_use_mask;
|
||||
wire[`NUM_THREADS-1:0] split_new_later_mask;
|
||||
|
||||
// VX_gpu_inst_req.pc
|
||||
genvar curr_s_t;
|
||||
generate
|
||||
for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init
|
||||
wire curr_bool = (gpu_inst_req_if.a_reg_data[curr_s_t] == 32'b1);
|
||||
|
||||
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
|
||||
assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool);
|
||||
for (i = 0; i < `NUM_THREADS; i=i+1) begin : masks_init
|
||||
wire curr_bool = (gpu_inst_req_if.a_reg_data[i] == 32'b1);
|
||||
assign split_new_use_mask[i] = curr_valids[i] & (curr_bool);
|
||||
assign split_new_later_mask[i] = curr_valids[i] & (!curr_bool);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ module VX_icache_stage (
|
|||
|
||||
reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0];
|
||||
|
||||
wire valid_inst = (|fe_inst_meta_fi.valid);
|
||||
wire valid_inst = (| fe_inst_meta_fi.valid);
|
||||
|
||||
// Icache Request
|
||||
assign icache_req_if.core_req_valid = valid_inst && !total_freeze;
|
||||
|
@ -45,11 +45,12 @@ module VX_icache_stage (
|
|||
// Core can't accept response
|
||||
assign icache_rsp_if.core_rsp_ready = ~total_freeze;
|
||||
|
||||
integer w;
|
||||
integer i;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (w = 0; w < `NUM_WARPS; w = w + 1) begin
|
||||
threads_active[w] <= 0;
|
||||
for (i = 0; i < `NUM_WARPS; i = i + 1) begin
|
||||
threads_active[i] <= 0;
|
||||
end
|
||||
end else begin
|
||||
if (valid_inst && !icache_stage_delay) begin
|
||||
|
|
|
@ -21,12 +21,12 @@ module VX_inst_multiplex (
|
|||
wire is_csr = bckE_req_if.is_csr;
|
||||
// wire is_gpu = 0;
|
||||
|
||||
genvar currT;
|
||||
genvar i;
|
||||
generate
|
||||
for (currT = 0; currT < `NUM_THREADS; currT = currT + 1) begin : mask_init
|
||||
assign is_mem_mask[currT] = is_mem;
|
||||
assign is_gpu_mask[currT] = is_gpu;
|
||||
assign is_csr_mask[currT] = is_csr;
|
||||
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : mask_init
|
||||
assign is_mem_mask[i] = is_mem;
|
||||
assign is_gpu_mask[i] = is_gpu;
|
||||
assign is_csr_mask[i] = is_csr;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
@ -64,7 +64,7 @@ module VX_inst_multiplex (
|
|||
assign exec_unit_req_if.jalQual = bckE_req_if.jalQual;
|
||||
assign exec_unit_req_if.jal = bckE_req_if.jal;
|
||||
assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset;
|
||||
assign exec_unit_req_if.ebreak = bckE_req_if.ebreak;
|
||||
assign exec_unit_req_if.is_etype = bckE_req_if.is_etype;
|
||||
|
||||
|
||||
// GPR Req
|
||||
|
|
|
@ -61,10 +61,10 @@ module VX_lsu_unit (
|
|||
assign {mem_wb_if.pc, mem_wb_if.wb, mem_wb_if.rd, mem_wb_if.warp_num} = dcache_rsp_if.core_rsp_tag;
|
||||
|
||||
/*always_comb begin
|
||||
if (1'($time & 1) && dcache_req_if.core_req_ready && |dcache_req_if.core_req_valid) begin
|
||||
if (1'($time & 1) && dcache_req_if.core_req_ready && (| dcache_req_if.core_req_valid)) begin
|
||||
$display("*** %t: D$ req: valid=%b, addr=%0h, r=%d, w=%d, pc=%0h, rd=%d, warp=%d, data=%0h", $time, use_valid, use_address, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
|
||||
end
|
||||
if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && |dcache_rsp_if.core_rsp_valid) begin
|
||||
if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && (| dcache_rsp_if.core_rsp_valid)) begin
|
||||
$display("*** %t: D$ rsp: valid=%b, pc=%0h, rd=%d, warp=%d, data=%0h", $time, mem_wb_if.valid, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
|
||||
end
|
||||
end*/
|
||||
|
|
|
@ -18,7 +18,7 @@ module VX_scheduler (
|
|||
|
||||
reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
||||
|
||||
wire valid_wb = (writeback_if.wb != 0) && (|writeback_if.valid) && (writeback_if.rd != 0);
|
||||
wire valid_wb = (writeback_if.wb != 0) && (| writeback_if.valid) && (writeback_if.rd != 0);
|
||||
wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0);
|
||||
|
||||
wire rs1_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0;
|
||||
|
@ -42,7 +42,7 @@ module VX_scheduler (
|
|||
|
||||
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
|
||||
|
||||
assign schedule_delay = ((rename_valid) && (|bckE_req_if.valid))
|
||||
assign schedule_delay = ((rename_valid) && (| bckE_req_if.valid))
|
||||
|| (memory_delay && is_mem)
|
||||
|| (gpr_stage_delay && (is_mem || is_exec))
|
||||
|| (exec_delay && is_exec);
|
||||
|
|
|
@ -25,12 +25,12 @@ module VX_warp (
|
|||
reg [`NUM_THREADS-1:0] valid_t;
|
||||
reg [`NUM_THREADS-1:0] valid_zero;
|
||||
|
||||
integer ti;
|
||||
integer i;
|
||||
initial begin
|
||||
real_PC = 0;
|
||||
for (ti = 1; ti < `NUM_THREADS; ti=ti+1) begin
|
||||
valid_t[ti] = 0; // Thread 1 active
|
||||
valid_zero[ti] = 0;
|
||||
for (i = 1; i < `NUM_THREADS; i=i+1) begin
|
||||
valid_t[i] = 0; // Thread 1 active
|
||||
valid_zero[i] = 0;
|
||||
end
|
||||
valid_t = 1;
|
||||
valid_zero[0] = 0;
|
||||
|
@ -44,10 +44,10 @@ module VX_warp (
|
|||
end
|
||||
end
|
||||
|
||||
genvar tv;
|
||||
genvar i;
|
||||
generate
|
||||
for (tv = 0; tv < `NUM_THREADS; tv = tv+1) begin : valid_assign
|
||||
assign valid[tv] = change_mask ? thread_mask[tv] : stall ? 1'b0 : valid_t[tv];
|
||||
for (i = 0; i < `NUM_THREADS; i = i+1) begin : valid_assign
|
||||
assign valid[i] = change_mask ? thread_mask[i] : stall ? 1'b0 : valid_t[i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ module VX_warp_sched (
|
|||
output wire[`NUM_THREADS-1:0] thread_mask,
|
||||
output wire[`NW_BITS-1:0] warp_num,
|
||||
output wire[31:0] warp_pc,
|
||||
output wire ebreak,
|
||||
output wire busy,
|
||||
output wire scheduled_warp,
|
||||
|
||||
input wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
|
@ -162,14 +162,14 @@ module VX_warp_sched (
|
|||
warp_pcs[join_warp_num] <= join_pc;
|
||||
end
|
||||
thread_masks[join_warp_num] <= join_tm;
|
||||
didnt_split <= 0;
|
||||
didnt_split <= 0;
|
||||
end else if (is_split) begin
|
||||
warp_stalled[split_warp_num] <= 0;
|
||||
warp_stalled[split_warp_num] <= 0;
|
||||
if (!dont_split) begin
|
||||
thread_masks[split_warp_num] <= split_new_mask;
|
||||
didnt_split <= 0;
|
||||
didnt_split <= 0;
|
||||
end else begin
|
||||
didnt_split <= 1;
|
||||
didnt_split <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -218,7 +218,7 @@ module VX_warp_sched (
|
|||
warp_lock[warp_num] <= 1'b1;
|
||||
// warp_lock <= {`NUM_WARPS{1'b1}};
|
||||
end
|
||||
if (|icache_stage_valids && !stall) begin
|
||||
if ((| icache_stage_valids) && !stall) begin
|
||||
warp_lock[icache_stage_wid] <= 1'b0;
|
||||
// warp_lock <= {`NUM_WARPS{1'b0}};
|
||||
end
|
||||
|
@ -251,15 +251,6 @@ module VX_warp_sched (
|
|||
|
||||
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
|
||||
|
||||
// integer curr_b;
|
||||
// always @(*) begin
|
||||
// total_barrier_stall = 0;
|
||||
// for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1)
|
||||
// begin
|
||||
// total_barrier_stall[`NUM_WARPS-1:0] = total_barrier_stall[`NUM_WARPS-1:0] | barrier_stall_mask[curr_b];
|
||||
// end
|
||||
// end
|
||||
|
||||
assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join);
|
||||
|
||||
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[split_warp_num]};
|
||||
|
@ -267,11 +258,11 @@ module VX_warp_sched (
|
|||
|
||||
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
|
||||
|
||||
genvar curr_warp;
|
||||
genvar i;
|
||||
generate
|
||||
for (curr_warp = 0; curr_warp < `NUM_WARPS; curr_warp = curr_warp + 1) begin : stacks
|
||||
wire correct_warp_s = (curr_warp == split_warp_num);
|
||||
wire correct_warp_j = (curr_warp == join_warp_num);
|
||||
for (i = 0; i < `NUM_WARPS; i = i + 1) begin : stacks
|
||||
wire correct_warp_s = (i == split_warp_num);
|
||||
wire correct_warp_j = (i == join_warp_num);
|
||||
|
||||
wire push = (is_split && !dont_split) && correct_warp_s;
|
||||
wire pop = is_join && correct_warp_j;
|
||||
|
@ -284,7 +275,7 @@ module VX_warp_sched (
|
|||
.reset(reset),
|
||||
.push (push),
|
||||
.pop (pop),
|
||||
.d (d[curr_warp]),
|
||||
.d (d[i]),
|
||||
.q1 (q1),
|
||||
.q2 (q2)
|
||||
);
|
||||
|
@ -330,6 +321,6 @@ module VX_warp_sched (
|
|||
// $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]);
|
||||
// end
|
||||
|
||||
assign ebreak = (warp_active == 0);
|
||||
assign busy = (warp_active != 0);
|
||||
|
||||
endmodule
|
|
@ -22,9 +22,9 @@ module VX_writeback (
|
|||
|
||||
VX_wb_if writeback_tmp_if();
|
||||
|
||||
wire exec_wb = (inst_exec_wb_if.wb != 0) && (|inst_exec_wb_if.valid);
|
||||
wire mem_wb = (mem_wb_if.wb != 0) && (|mem_wb_if.valid);
|
||||
wire csr_wb = (csr_wb_if.wb != 0) && (|csr_wb_if.valid);
|
||||
wire exec_wb = (inst_exec_wb_if.wb != 0) && (| inst_exec_wb_if.valid);
|
||||
wire mem_wb = (mem_wb_if.wb != 0) && (| mem_wb_if.valid);
|
||||
wire csr_wb = (csr_wb_if.wb != 0) && (| csr_wb_if.valid);
|
||||
|
||||
assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
|
||||
assign no_slot_csr = csr_wb && (exec_wb);
|
||||
|
@ -78,7 +78,7 @@ module VX_writeback (
|
|||
reg [31:0] last_data_wb /* verilator public */;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if ((|writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin
|
||||
if ( (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin
|
||||
last_data_wb <= use_wb_data[0];
|
||||
end
|
||||
end
|
||||
|
|
164
hw/rtl/Vortex.v
164
hw/rtl/Vortex.v
|
@ -56,7 +56,8 @@ module Vortex #(
|
|||
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
|
||||
output wire io_rsp_ready,
|
||||
|
||||
// Debug
|
||||
// Status
|
||||
output wire busy,
|
||||
output wire ebreak
|
||||
);
|
||||
`DEBUG_BEGIN
|
||||
|
@ -157,100 +158,101 @@ module Vortex #(
|
|||
assign icache_dram_rsp_if.dram_rsp_tag = I_dram_rsp_tag;
|
||||
assign I_dram_rsp_ready = icache_dram_rsp_if.dram_rsp_ready;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Front-end to Back-end
|
||||
VX_frE_to_bckE_req_if bckE_req_if(); // New instruction request to EXE/MEM
|
||||
// Front-end to Back-end
|
||||
VX_frE_to_bckE_req_if bckE_req_if(); // New instruction request to EXE/MEM
|
||||
|
||||
// Back-end to Front-end
|
||||
VX_wb_if writeback_if(); // Writeback to GPRs
|
||||
VX_branch_rsp_if branch_rsp_if(); // Branch Resolution to Fetch
|
||||
VX_jal_rsp_if jal_rsp_if(); // Jump resolution to Fetch
|
||||
// Back-end to Front-end
|
||||
VX_wb_if writeback_if(); // Writeback to GPRs
|
||||
VX_branch_rsp_if branch_rsp_if(); // Branch Resolution to Fetch
|
||||
VX_jal_rsp_if jal_rsp_if(); // Jump resolution to Fetch
|
||||
|
||||
// Warp controls
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
// Warp controls
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
|
||||
// Cache snooping
|
||||
VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if();
|
||||
// Cache snooping
|
||||
VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if();
|
||||
|
||||
assign dcache_snp_req_if.snp_req_valid = llc_snp_req_valid;
|
||||
assign dcache_snp_req_if.snp_req_addr = llc_snp_req_addr;
|
||||
assign llc_snp_req_ready = dcache_snp_req_if.snp_req_ready;
|
||||
assign dcache_snp_req_if.snp_req_valid = llc_snp_req_valid;
|
||||
assign dcache_snp_req_if.snp_req_addr = llc_snp_req_addr;
|
||||
assign llc_snp_req_ready = dcache_snp_req_if.snp_req_ready;
|
||||
|
||||
VX_front_end front_end (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.icache_rsp_if (icache_core_rsp_if),
|
||||
.icache_req_if (icache_core_req_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.fetch_ebreak (ebreak)
|
||||
);
|
||||
VX_front_end front_end (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.icache_rsp_if (icache_core_rsp_if),
|
||||
.icache_req_if (icache_core_req_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
VX_scheduler scheduler (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.memory_delay (memory_delay),
|
||||
.exec_delay (exec_delay),
|
||||
.gpr_stage_delay(gpr_stage_delay),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.writeback_if (writeback_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.is_empty (scheduler_empty)
|
||||
);
|
||||
VX_scheduler scheduler (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.memory_delay (memory_delay),
|
||||
.exec_delay (exec_delay),
|
||||
.gpr_stage_delay(gpr_stage_delay),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.writeback_if (writeback_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.is_empty (scheduler_empty)
|
||||
);
|
||||
|
||||
VX_back_end #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) back_end (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.schedule_delay (schedule_delay),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.dcache_req_if (dcache_io_core_req_if),
|
||||
.dcache_rsp_if (dcache_io_core_rsp_if),
|
||||
.writeback_if (writeback_if),
|
||||
.mem_delay (memory_delay),
|
||||
.exec_delay (exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
VX_back_end #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) back_end (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.schedule_delay (schedule_delay),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.dcache_req_if (dcache_io_core_req_if),
|
||||
.dcache_rsp_if (dcache_io_core_rsp_if),
|
||||
.writeback_if (writeback_if),
|
||||
.mem_delay (memory_delay),
|
||||
.exec_delay (exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay),
|
||||
.ebreak (ebreak)
|
||||
);
|
||||
|
||||
VX_dmem_ctrl dmem_ctrl (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
VX_dmem_ctrl dmem_ctrl (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core <-> Dcache
|
||||
.dcache_core_req_if (dcache_core_req_if),
|
||||
.dcache_core_rsp_if (dcache_core_rsp_if),
|
||||
// Core <-> Dcache
|
||||
.dcache_core_req_if (dcache_core_req_if),
|
||||
.dcache_core_rsp_if (dcache_core_rsp_if),
|
||||
|
||||
// Dram <-> Dcache
|
||||
.dcache_dram_req_if (dcache_dram_req_if),
|
||||
.dcache_dram_rsp_if (dcache_dram_rsp_if),
|
||||
.dcache_snp_req_if (dcache_snp_req_if),
|
||||
// Dram <-> Dcache
|
||||
.dcache_dram_req_if (dcache_dram_req_if),
|
||||
.dcache_dram_rsp_if (dcache_dram_rsp_if),
|
||||
.dcache_snp_req_if (dcache_snp_req_if),
|
||||
|
||||
// Core <-> Icache
|
||||
.icache_core_req_if (icache_core_req_if),
|
||||
.icache_core_rsp_if (icache_core_rsp_if),
|
||||
// Core <-> Icache
|
||||
.icache_core_req_if (icache_core_req_if),
|
||||
.icache_core_rsp_if (icache_core_rsp_if),
|
||||
|
||||
// Dram <-> Icache
|
||||
.icache_dram_req_if (icache_dram_req_if),
|
||||
.icache_dram_rsp_if (icache_dram_rsp_if)
|
||||
);
|
||||
// Dram <-> Icache
|
||||
.icache_dram_req_if (icache_dram_req_if),
|
||||
.icache_dram_rsp_if (icache_dram_rsp_if)
|
||||
);
|
||||
|
||||
VX_dcache_io_arb dcache_io_arb (
|
||||
.io_select (dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR),
|
||||
.core_req_if (dcache_io_core_req_if),
|
||||
.dcache_core_req_if (dcache_core_req_if),
|
||||
.io_core_req_if (io_core_req_if),
|
||||
.dcache_core_rsp_if (dcache_core_rsp_if),
|
||||
.io_core_rsp_if (io_core_rsp_if),
|
||||
.core_rsp_if (dcache_io_core_rsp_if)
|
||||
);
|
||||
VX_dcache_io_arb dcache_io_arb (
|
||||
.io_select (dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR),
|
||||
.core_req_if (dcache_io_core_req_if),
|
||||
.dcache_core_req_if (dcache_core_req_if),
|
||||
.io_core_req_if (io_core_req_if),
|
||||
.dcache_core_rsp_if (dcache_core_rsp_if),
|
||||
.io_core_rsp_if (io_core_rsp_if),
|
||||
.core_rsp_if (dcache_io_core_rsp_if)
|
||||
);
|
||||
|
||||
endmodule // Vortex
|
||||
|
||||
|
|
|
@ -42,7 +42,8 @@ module Vortex_Cluster #(
|
|||
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
|
||||
output wire io_rsp_ready,
|
||||
|
||||
// Debug
|
||||
// Status
|
||||
output wire busy,
|
||||
output wire ebreak
|
||||
);
|
||||
wire[`NUM_CORES-1:0] per_core_D_dram_req_read;
|
||||
|
@ -83,6 +84,7 @@ module Vortex_Cluster #(
|
|||
wire[`NUM_CORES-1:0] per_core_io_rsp_ready;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
wire[`NUM_CORES-1:0] per_core_busy;
|
||||
wire[`NUM_CORES-1:0] per_core_ebreak;
|
||||
|
||||
genvar i;
|
||||
|
@ -92,48 +94,49 @@ module Vortex_Cluster #(
|
|||
) vortex_core (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.D_dram_req_read (per_core_D_dram_req_read [i]),
|
||||
.D_dram_req_write (per_core_D_dram_req_write [i]),
|
||||
.D_dram_req_addr (per_core_D_dram_req_addr [i]),
|
||||
.D_dram_req_data (per_core_D_dram_req_data [i]),
|
||||
.D_dram_req_tag (per_core_D_dram_req_tag [i]),
|
||||
.D_dram_req_ready (per_core_D_dram_req_ready [i]),
|
||||
.D_dram_rsp_valid (per_core_D_dram_rsp_valid [i]),
|
||||
.D_dram_rsp_data (per_core_D_dram_rsp_data [i]),
|
||||
.D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]),
|
||||
.D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]),
|
||||
.I_dram_req_read (per_core_I_dram_req_read [i]),
|
||||
.D_dram_req_read (per_core_D_dram_req_read [i]),
|
||||
.D_dram_req_write (per_core_D_dram_req_write [i]),
|
||||
.D_dram_req_addr (per_core_D_dram_req_addr [i]),
|
||||
.D_dram_req_data (per_core_D_dram_req_data [i]),
|
||||
.D_dram_req_tag (per_core_D_dram_req_tag [i]),
|
||||
.D_dram_req_ready (per_core_D_dram_req_ready [i]),
|
||||
.D_dram_rsp_valid (per_core_D_dram_rsp_valid [i]),
|
||||
.D_dram_rsp_data (per_core_D_dram_rsp_data [i]),
|
||||
.D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]),
|
||||
.D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]),
|
||||
.I_dram_req_read (per_core_I_dram_req_read [i]),
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
.I_dram_req_write (),
|
||||
`IGNORE_WARNINGS_END
|
||||
.I_dram_req_addr (per_core_I_dram_req_addr [i]),
|
||||
.I_dram_req_data (per_core_I_dram_req_data [i]),
|
||||
.I_dram_req_tag (per_core_I_dram_req_tag [i]),
|
||||
.I_dram_req_ready (per_core_I_dram_req_ready [i]),
|
||||
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]),
|
||||
.I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]),
|
||||
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
|
||||
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
|
||||
.I_dram_req_addr (per_core_I_dram_req_addr [i]),
|
||||
.I_dram_req_data (per_core_I_dram_req_data [i]),
|
||||
.I_dram_req_tag (per_core_I_dram_req_tag [i]),
|
||||
.I_dram_req_ready (per_core_I_dram_req_ready [i]),
|
||||
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]),
|
||||
.I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]),
|
||||
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
|
||||
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
|
||||
|
||||
.llc_snp_req_valid (snp_fwd_valid),
|
||||
.llc_snp_req_addr (snp_fwd_addr),
|
||||
.llc_snp_req_ready (per_core_snp_fwd_ready [i]),
|
||||
.llc_snp_req_ready (per_core_snp_fwd_ready [i]),
|
||||
|
||||
.io_req_read (per_core_io_req_read [i]),
|
||||
.io_req_write (per_core_io_req_write [i]),
|
||||
.io_req_addr (per_core_io_req_addr [i]),
|
||||
.io_req_data (per_core_io_req_data [i]),
|
||||
.io_req_byteen (per_core_io_req_byteen [i]),
|
||||
.io_req_tag (per_core_io_req_tag [i]),
|
||||
.io_req_read (per_core_io_req_read [i]),
|
||||
.io_req_write (per_core_io_req_write [i]),
|
||||
.io_req_addr (per_core_io_req_addr [i]),
|
||||
.io_req_data (per_core_io_req_data [i]),
|
||||
.io_req_byteen (per_core_io_req_byteen [i]),
|
||||
.io_req_tag (per_core_io_req_tag [i]),
|
||||
.io_req_ready (io_req_ready),
|
||||
|
||||
.io_rsp_valid (io_rsp_valid),
|
||||
.io_rsp_data (io_rsp_data),
|
||||
.io_rsp_tag (io_rsp_tag),
|
||||
.io_rsp_ready (per_core_io_rsp_ready [i]),
|
||||
.io_rsp_ready (per_core_io_rsp_ready [i]),
|
||||
|
||||
.ebreak (per_core_ebreak [i])
|
||||
.busy (per_core_busy [i]),
|
||||
.ebreak (per_core_ebreak [i])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -145,7 +148,8 @@ module Vortex_Cluster #(
|
|||
assign io_req_tag = per_core_io_req_tag[0];
|
||||
|
||||
assign io_rsp_ready = per_core_io_rsp_ready[0];
|
||||
|
||||
|
||||
assign busy = (| per_core_busy);
|
||||
assign ebreak = (& per_core_ebreak);
|
||||
|
||||
if (`L2_ENABLE) begin
|
||||
|
@ -184,8 +188,8 @@ module Vortex_Cluster #(
|
|||
assign l2_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
|
||||
assign l2_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
|
||||
|
||||
assign per_core_D_dram_req_ready[(i/2)] = l2_core_req_ready;
|
||||
assign per_core_I_dram_req_ready[(i/2)] = l2_core_req_ready;
|
||||
assign per_core_D_dram_req_ready [(i/2)] = l2_core_req_ready;
|
||||
assign per_core_I_dram_req_ready [(i/2)] = l2_core_req_ready;
|
||||
|
||||
assign per_core_D_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i];
|
||||
assign per_core_I_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i+1];
|
||||
|
@ -221,7 +225,7 @@ module Vortex_Cluster #(
|
|||
.FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE),
|
||||
.DRAM_ENABLE (1),
|
||||
.WRITE_ENABLE (1),
|
||||
.SNOOP_FORWARDING_ENABLE(1),
|
||||
.SNOOP_FORWARDING (1),
|
||||
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH)
|
||||
|
|
|
@ -40,7 +40,8 @@ module Vortex_Socket (
|
|||
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
|
||||
output wire io_rsp_ready,
|
||||
|
||||
// Debug
|
||||
// Status
|
||||
output wire busy,
|
||||
output wire ebreak
|
||||
);
|
||||
if (`NUM_CLUSTERS == 1) begin
|
||||
|
@ -80,6 +81,7 @@ module Vortex_Socket (
|
|||
.io_rsp_tag (io_rsp_tag),
|
||||
.io_rsp_ready (io_rsp_ready),
|
||||
|
||||
.busy (busy),
|
||||
.ebreak (ebreak)
|
||||
);
|
||||
|
||||
|
@ -112,6 +114,7 @@ module Vortex_Socket (
|
|||
wire[`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
|
||||
|
||||
genvar i;
|
||||
|
@ -151,6 +154,7 @@ module Vortex_Socket (
|
|||
.io_rsp_tag (io_rsp_tag),
|
||||
.io_rsp_ready (per_cluster_io_rsp_ready [i]),
|
||||
|
||||
.busy (per_cluster_busy [i]),
|
||||
.ebreak (per_cluster_ebreak [i])
|
||||
);
|
||||
end
|
||||
|
@ -164,6 +168,7 @@ module Vortex_Socket (
|
|||
|
||||
assign io_rsp_ready = per_cluster_io_rsp_ready[0];
|
||||
|
||||
assign busy = (| per_cluster_busy);
|
||||
assign ebreak = (& per_cluster_ebreak);
|
||||
|
||||
// L3 Cache ///////////////////////////////////////////////////////////
|
||||
|
@ -219,7 +224,7 @@ module Vortex_Socket (
|
|||
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
|
||||
.DRAM_ENABLE (1),
|
||||
.WRITE_ENABLE (1),
|
||||
.SNOOP_FORWARDING_ENABLE(1),
|
||||
.SNOOP_FORWARDING (1),
|
||||
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH)
|
||||
|
|
31
hw/rtl/cache/VX_bank.v
vendored
31
hw/rtl/cache/VX_bank.v
vendored
|
@ -11,7 +11,7 @@ module VX_bank #(
|
|||
parameter WORD_SIZE = 4,
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
parameter NUM_REQUESTS = 2,
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
// Number of cycles to complete i 1 (read from memory)
|
||||
parameter STAGE_1_CYCLES = 2,
|
||||
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
|
@ -46,7 +46,7 @@ module VX_bank #(
|
|||
parameter DRAM_ENABLE = 1,
|
||||
|
||||
// Enable snoop forwarding
|
||||
parameter SNOOP_FORWARDING_ENABLE = 0,
|
||||
parameter SNOOP_FORWARDING = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
@ -108,7 +108,7 @@ module VX_bank #(
|
|||
if (reset) begin
|
||||
snoop_state <= 0;
|
||||
end else begin
|
||||
snoop_state <= (snoop_state | snp_req_valid) && SNOOP_FORWARDING_ENABLE;
|
||||
snoop_state <= (snoop_state | snp_req_valid) && SNOOP_FORWARDING;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -169,7 +169,7 @@ module VX_bank #(
|
|||
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0;
|
||||
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0;
|
||||
|
||||
assign reqq_push = core_req_ready && (|core_req_valids);
|
||||
assign reqq_push = core_req_ready && (| core_req_valids);
|
||||
|
||||
VX_cache_req_queue #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
|
@ -241,16 +241,16 @@ module VX_bank #(
|
|||
wire stall_bank_pipe;
|
||||
reg is_fill_in_pipe;
|
||||
|
||||
wire is_fill_st1 [STAGE_1_CYCLES-1:0];
|
||||
wire is_fill_st1 [STAGE_1_CYCLES-1:0];
|
||||
`DEBUG_BEGIN
|
||||
wire going_to_write_st1[STAGE_1_CYCLES-1:0];
|
||||
wire going_to_write_st1 [STAGE_1_CYCLES-1:0];
|
||||
`DEBUG_END
|
||||
|
||||
integer i;
|
||||
integer j;
|
||||
always @(*) begin
|
||||
is_fill_in_pipe = 0;
|
||||
for (i = 0; i < STAGE_1_CYCLES; i=i+1) begin
|
||||
if (is_fill_st1[i]) begin
|
||||
for (j = 0; j < STAGE_1_CYCLES; j=j+1) begin
|
||||
if (is_fill_st1[j]) begin
|
||||
is_fill_in_pipe = 1;
|
||||
end
|
||||
end
|
||||
|
@ -327,8 +327,8 @@ module VX_bank #(
|
|||
.out ({is_snp_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
|
||||
);
|
||||
|
||||
genvar stage;
|
||||
for (stage = 1; stage < STAGE_1_CYCLES; stage = stage + 1) begin
|
||||
genvar i;
|
||||
for (i = 1; i < STAGE_1_CYCLES; i = i + 1) begin
|
||||
VX_generic_register #(
|
||||
.N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + (`BANK_LINE_WORDS*`WORD_WIDTH))
|
||||
) s0_1_cc (
|
||||
|
@ -336,8 +336,8 @@ module VX_bank #(
|
|||
.reset(reset),
|
||||
.stall(stall_bank_pipe),
|
||||
.flush(0),
|
||||
.in ({is_snp_st1[stage-1], going_to_write_st1[stage-1], valid_st1[stage-1], addr_st1[stage-1], wsel_st1[stage-1], writeword_st1[stage-1], inst_meta_st1[stage-1], is_fill_st1[stage-1], writedata_st1[stage-1]}),
|
||||
.out ({is_snp_st1[stage], going_to_write_st1[stage], valid_st1[stage], addr_st1[stage], wsel_st1[stage], writeword_st1[stage], inst_meta_st1[stage], is_fill_st1[stage], writedata_st1[stage]})
|
||||
.in ({is_snp_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}),
|
||||
.out ({is_snp_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]})
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -506,9 +506,10 @@ module VX_bank #(
|
|||
);
|
||||
|
||||
// Enqueue to CWB Queue
|
||||
// TODO: should investigae the need for "SNOOP_FORWARDING" here
|
||||
wire cwbq_push = (valid_st2 && !miss_st2)
|
||||
&& !cwbq_full
|
||||
&& !(SNOOP_FORWARDING_ENABLE && (miss_add_mem_write == `BYTE_EN_NO))
|
||||
&& !(SNOOP_FORWARDING && (miss_add_mem_write == `BYTE_EN_NO))
|
||||
&& !((is_snp_st2 && valid_st2 && ffsq_full)
|
||||
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|
||||
|| (valid_st2 && miss_st2 && mrvq_full)
|
||||
|
@ -554,7 +555,7 @@ module VX_bank #(
|
|||
|
||||
wire[`BANK_LINE_WORDS-1:0][`WORD_WIDTH-1:0] dwbq_req_data;
|
||||
|
||||
if (SNOOP_FORWARDING_ENABLE) begin
|
||||
if (SNOOP_FORWARDING) begin
|
||||
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
|
||||
assign dwbq_req_addr = (should_flush && dwbq_push) ? addr_st2 : {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
|
||||
end else begin
|
||||
|
|
10
hw/rtl/cache/VX_cache.v
vendored
10
hw/rtl/cache/VX_cache.v
vendored
|
@ -47,7 +47,7 @@ module VX_cache #(
|
|||
parameter DRAM_ENABLE = 1,
|
||||
|
||||
// Enable snoop forwarding
|
||||
parameter SNOOP_FORWARDING_ENABLE = 0,
|
||||
parameter SNOOP_FORWARDING = 0,
|
||||
|
||||
// Prefetcher
|
||||
parameter PRFQ_SIZE = 64,
|
||||
|
@ -135,9 +135,9 @@ module VX_cache #(
|
|||
`DEBUG_END
|
||||
|
||||
assign dram_req_tag = dram_req_addr;
|
||||
assign core_req_ready = ~(|per_bank_reqq_full);
|
||||
assign snp_req_ready = ~(|per_bank_snp_req_full);
|
||||
assign dram_rsp_ready = (|per_bank_dram_fill_rsp_ready);
|
||||
assign core_req_ready = ~(| per_bank_reqq_full);
|
||||
assign snp_req_ready = ~(| per_bank_snp_req_full);
|
||||
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready);
|
||||
|
||||
VX_cache_core_req_bank_sel #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
|
@ -265,7 +265,7 @@ module VX_cache #(
|
|||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||
.DRAM_ENABLE (DRAM_ENABLE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.SNOOP_FORWARDING_ENABLE(SNOOP_FORWARDING_ENABLE),
|
||||
.SNOOP_FORWARDING (SNOOP_FORWARDING),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
|
||||
) bank (
|
||||
|
|
4
hw/rtl/cache/VX_cache_dfq_queue.v
vendored
4
hw/rtl/cache/VX_cache_dfq_queue.v
vendored
|
@ -63,8 +63,8 @@ module VX_cache_dfq_queue #(
|
|||
|
||||
wire o_empty;
|
||||
|
||||
wire use_empty = !(|use_per_bank_dram_fill_req_valid);
|
||||
wire out_empty = !(|out_per_bank_dram_fill_req_valid) || o_empty;
|
||||
wire use_empty = !(| use_per_bank_dram_fill_req_valid);
|
||||
wire out_empty = !(| out_per_bank_dram_fill_req_valid) || o_empty;
|
||||
|
||||
wire push_qual = dfqq_push && !dfqq_full;
|
||||
wire pop_qual = dfqq_pop && use_empty && !out_empty;
|
||||
|
|
2
hw/rtl/cache/VX_cache_dram_req_arb.v
vendored
2
hw/rtl/cache/VX_cache_dram_req_arb.v
vendored
|
@ -98,7 +98,7 @@ module VX_cache_dram_req_arb #(
|
|||
`DEBUG_END
|
||||
|
||||
wire dfqq_pop = !dwb_valid && dfqq_req && dram_req_ready; // If no dwb, and dfqq has valids, then pop
|
||||
wire dfqq_push = (|per_bank_dram_fill_req_valid);
|
||||
wire dfqq_push = (| per_bank_dram_fill_req_valid);
|
||||
|
||||
VX_cache_dfq_queue cache_dfq_queue(
|
||||
.clk (clk),
|
||||
|
|
8
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
8
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
|
@ -90,10 +90,10 @@ module VX_cache_miss_resrv #(
|
|||
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
|
||||
|
||||
reg [MRVQ_SIZE-1:0] make_ready;
|
||||
genvar curr_e;
|
||||
genvar i;
|
||||
generate
|
||||
for (curr_e = 0; curr_e < MRVQ_SIZE; curr_e=curr_e+1) begin
|
||||
assign make_ready[curr_e] = is_fill_st1 && valid_table[curr_e] && (addr_table[curr_e] == fill_addr_st1);
|
||||
for (i = 0; i < MRVQ_SIZE; i=i+1) begin
|
||||
assign make_ready[i] = is_fill_st1 && valid_table[i] && (addr_table[i] == fill_addr_st1);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
@ -107,7 +107,7 @@ module VX_cache_miss_resrv #(
|
|||
wire mrvq_push = miss_add && enqueue_possible && (MRVQ_SIZE != 2);
|
||||
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
|
||||
|
||||
wire update_ready = (|make_ready);
|
||||
wire update_ready = (| make_ready);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
|
4
hw/rtl/cache/VX_cache_req_queue.v
vendored
4
hw/rtl/cache/VX_cache_req_queue.v
vendored
|
@ -97,8 +97,8 @@ module VX_cache_req_queue #(
|
|||
|
||||
wire o_empty;
|
||||
|
||||
wire use_empty = !(|use_per_valids);
|
||||
wire out_empty = !(|out_per_valids) || o_empty;
|
||||
wire use_empty = !(| use_per_valids);
|
||||
wire out_empty = !(| out_per_valids) || o_empty;
|
||||
|
||||
wire push_qual = reqq_push && !reqq_full;
|
||||
wire pop_qual = !out_empty && use_empty;
|
||||
|
|
6
hw/rtl/cache/VX_fill_invalidator.v
vendored
6
hw/rtl/cache/VX_fill_invalidator.v
vendored
|
@ -60,10 +60,10 @@ module VX_fill_invalidator #(
|
|||
reg [FILL_INVALIDAOR_SIZE-1:0] matched_fill;
|
||||
wire matched;
|
||||
|
||||
integer fi;
|
||||
integer i;
|
||||
always @(*) begin
|
||||
for (fi = 0; fi < FILL_INVALIDAOR_SIZE; fi+=1) begin
|
||||
matched_fill[fi] = fills_active[fi] && (fills_address[fi] == fill_addr);
|
||||
for (i = 0; i < FILL_INVALIDAOR_SIZE; i+=1) begin
|
||||
matched_fill[i] = fills_active[i] && (fills_address[i] == fill_addr);
|
||||
end
|
||||
end
|
||||
|
||||
|
|
2
hw/rtl/cache/VX_tag_data_structure.v
vendored
2
hw/rtl/cache/VX_tag_data_structure.v
vendored
|
@ -68,7 +68,7 @@ module VX_tag_data_structure #(
|
|||
assign read_tag = tag [read_addr];
|
||||
assign read_data = data [read_addr];
|
||||
|
||||
wire going_to_write = (|write_enable);
|
||||
wire going_to_write = (| write_enable);
|
||||
|
||||
integer i;
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -33,10 +33,8 @@ interface VX_exec_unit_req_if ();
|
|||
wire jal;
|
||||
wire [31:0] jal_offset;
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire ebreak;
|
||||
wire is_etype;
|
||||
wire wspawn;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
// CSR info
|
||||
wire is_csr;
|
||||
|
|
|
@ -21,9 +21,7 @@ interface VX_frE_to_bckE_req_if ();
|
|||
wire [2:0] branch_type;
|
||||
wire [19:0] upper_immed;
|
||||
wire [31:0] curr_PC;
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire ebreak;
|
||||
`IGNORE_WARNINGS_END
|
||||
wire is_etype;
|
||||
wire jalQual;
|
||||
wire jal;
|
||||
wire [31:0] jal_offset;
|
||||
|
|
|
@ -14,7 +14,7 @@ interface VX_warp_ctl_if ();
|
|||
wire [31:0] wspawn_pc;
|
||||
wire [`NUM_WARPS-1:0] wspawn_new_active;
|
||||
|
||||
wire ebreak;
|
||||
wire whalt;
|
||||
|
||||
// barrier
|
||||
wire is_barrier;
|
||||
|
|
|
@ -65,16 +65,16 @@ module VX_divide #(
|
|||
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
|
||||
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
|
||||
|
||||
genvar pipe_stage;
|
||||
for (pipe_stage = 0; pipe_stage < PIPELINE-1; pipe_stage = pipe_stage+1) begin : pipe_stages
|
||||
genvar i;
|
||||
for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages
|
||||
always @(posedge clock or posedge aclr) begin
|
||||
if (aclr) begin
|
||||
numer_pipe[pipe_stage+1] <= 0;
|
||||
denom_pipe[pipe_stage+1] <= 0;
|
||||
numer_pipe[i+1] <= 0;
|
||||
denom_pipe[i+1] <= 0;
|
||||
end
|
||||
else if (clken) begin
|
||||
numer_pipe[pipe_stage+1] <= numer_pipe[pipe_stage];
|
||||
denom_pipe[pipe_stage+1] <= denom_pipe[pipe_stage];
|
||||
numer_pipe[i+1] <= numer_pipe[i];
|
||||
denom_pipe[i+1] <= denom_pipe[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -83,16 +83,16 @@ module VX_mult #(
|
|||
reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1];
|
||||
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
|
||||
|
||||
genvar pipe_stage;
|
||||
for (pipe_stage = 0; pipe_stage < PIPELINE-1; pipe_stage = pipe_stage+1) begin : pipe_stages
|
||||
genvar i;
|
||||
for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages
|
||||
always @(posedge clock or posedge aclr) begin
|
||||
if (aclr) begin
|
||||
dataa_pipe[pipe_stage+1] <= 0;
|
||||
datab_pipe[pipe_stage+1] <= 0;
|
||||
dataa_pipe[i+1] <= 0;
|
||||
datab_pipe[i+1] <= 0;
|
||||
end
|
||||
else if (clken) begin
|
||||
dataa_pipe[pipe_stage+1] <= dataa_pipe[pipe_stage];
|
||||
datab_pipe[pipe_stage+1] <= datab_pipe[pipe_stage];
|
||||
dataa_pipe[i+1] <= dataa_pipe[i];
|
||||
datab_pipe[i+1] <= datab_pipe[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -19,8 +19,8 @@ module VX_d_e_reg (
|
|||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (flush),
|
||||
.in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.ebreak, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}),
|
||||
.out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.ebreak ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier })
|
||||
.in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.is_etype, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}),
|
||||
.out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.is_etype ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier })
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -141,7 +141,7 @@ void Simulator::wait(uint32_t cycles) {
|
|||
}
|
||||
|
||||
bool Simulator::is_busy() {
|
||||
return (0 == vortex_->ebreak);
|
||||
return vortex_->busy;
|
||||
}
|
||||
|
||||
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
||||
|
@ -174,7 +174,8 @@ bool Simulator::run() {
|
|||
this->reset();
|
||||
|
||||
// execute program
|
||||
while (!vortex_->ebreak) {
|
||||
while (vortex_->busy
|
||||
&& !vortex_->ebreak) {
|
||||
this->step();
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue