rtl refactoring

This commit is contained in:
Blaise Tine 2020-05-05 10:46:48 -04:00
parent f142afac80
commit b7e892ee16
39 changed files with 393 additions and 425 deletions

View file

@ -90,7 +90,7 @@ module VX_alu_unit (
assign alu_stall = inst_delay_stall;
always @(*) begin
case(alu_op)
case (alu_op)
`DIV,
`DIVU,
`REM,
@ -136,7 +136,7 @@ module VX_alu_unit (
assign upper_immed = {upper_immed, {12{1'b0}}};
always @(*) begin
case(alu_op)
case (alu_op)
`ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2);
`SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2);
`SLLA: alu_result = ALU_in1 << ALU_in2[4:0];
@ -177,7 +177,7 @@ module VX_alu_unit (
assign upper_immed_s = {upper_immed, {12{1'b0}}};
always @(*) begin
case(alu_op)
case (alu_op)
`ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2);
`SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2);
`SLLA: alu_result = ALU_in1 << ALU_in2[4:0];

View file

@ -20,7 +20,9 @@ module VX_back_end #(
VX_frE_to_bckE_req_if bckE_req_if,
VX_wb_if writeback_if,
VX_warp_ctl_if warp_ctl_if
VX_warp_ctl_if warp_ctl_if,
output wire ebreak
);
VX_wb_if wb_temp_if();
@ -69,6 +71,8 @@ module VX_back_end #(
.gpr_stage_delay (gpr_stage_delay)
);
assign ebreak = exec_unit_req_if.is_etype && (| exec_unit_req_if.valid);
VX_lsu_unit lsu_unit (
.clk (clk),
.reset (reset),
@ -81,14 +85,14 @@ module VX_back_end #(
);
VX_exec_unit exec_unit (
.clk (clk),
.reset (reset),
.clk (clk),
.reset (reset),
.exec_unit_req_if(exec_unit_req_if),
.inst_exec_wb_if (inst_exec_wb_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.delay (exec_delay),
.no_slot_exec (no_slot_exec)
.inst_exec_wb_if(inst_exec_wb_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.delay (exec_delay),
.no_slot_exec (no_slot_exec)
);
VX_gpu_inst gpu_inst (
@ -119,6 +123,6 @@ module VX_back_end #(
.no_slot_mem (no_slot_mem),
.no_slot_exec (no_slot_exec),
.no_slot_csr (no_slot_csr)
);
);
endmodule

View file

@ -9,14 +9,14 @@ module VX_csr_wrapper (
wire[`NUM_THREADS-1:0][31:0] thread_ids;
wire[`NUM_THREADS-1:0][31:0] warp_ids;
genvar cur_t, cur_tw;
genvar i;
generate
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin : thread_ids_init
assign thread_ids[cur_t] = cur_t;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : thread_ids_init
assign thread_ids[i] = i;
end
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init
assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num};
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : warp_ids_init
assign warp_ids[i] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num};
end
endgenerate

View file

@ -37,7 +37,7 @@ module VX_dcache_io_arb (
assign core_req_if.core_req_ready = io_select ? io_core_req_if.core_req_ready : dcache_core_req_if.core_req_ready;
wire dcache_rsp_valid = (|dcache_core_rsp_if.core_rsp_valid);
wire dcache_rsp_valid = (| dcache_core_rsp_if.core_rsp_valid);
assign core_rsp_if.core_rsp_valid = dcache_rsp_valid ? dcache_core_rsp_if.core_rsp_valid : io_core_rsp_if.core_rsp_valid;
assign core_rsp_if.core_rsp_data = dcache_rsp_valid ? dcache_core_rsp_if.core_rsp_data : io_core_rsp_if.core_rsp_data;

View file

@ -8,18 +8,15 @@ module VX_decode(
// Outputs
VX_frE_to_bckE_req_if frE_to_bckE_req_if,
VX_wstall_if wstall_if,
VX_join_if join_if,
output wire terminate_sim
VX_join_if join_if
);
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
assign frE_to_bckE_req_if.curr_PC = in_curr_PC;
assign frE_to_bckE_req_if.curr_PC = in_curr_PC;
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
wire[6:0] curr_opcode;
@ -34,7 +31,7 @@ module VX_decode(
wire is_auipc;
wire is_csr;
wire is_csr_immed;
wire is_e_inst;
wire is_etype;
wire is_gpgpu;
wire is_wspawn;
@ -47,7 +44,6 @@ module VX_decode(
wire[6:0] func7;
wire[11:0] u_12;
wire[7:0] jal_b_19_to_12;
wire jal_b_11;
wire[9:0] jal_b_10_to_1;
@ -77,11 +73,11 @@ module VX_decode(
reg[4:0] alu_op;
reg[4:0] mul_alu;
reg[19:0] temp_upper_immed;
reg temp_jal;
reg[31:0] temp_jal_offset;
reg[31:0] temp_itype_immed;
reg[2:0] temp_branch_type;
reg temp_branch_stall;
reg temp_jal;
reg[31:0] temp_jal_offset;
reg[31:0] temp_itype_immed;
reg[2:0] temp_branch_type;
reg temp_branch_stall;
assign frE_to_bckE_req_if.valid = fd_inst_meta_de.valid;
@ -89,12 +85,12 @@ module VX_decode(
assign curr_opcode = in_instruction[6:0];
assign frE_to_bckE_req_if.rd = in_instruction[11:7];
assign frE_to_bckE_req_if.rs1 = in_instruction[19:15];
assign frE_to_bckE_req_if.rs2 = in_instruction[24:20];
assign func3 = in_instruction[14:12];
assign func7 = in_instruction[31:25];
assign u_12 = in_instruction[31:20];
assign frE_to_bckE_req_if.rd = in_instruction[11:7];
assign frE_to_bckE_req_if.rs1 = in_instruction[19:15];
assign frE_to_bckE_req_if.rs2 = in_instruction[24:20];
assign func3 = in_instruction[14:12];
assign func7 = in_instruction[31:25];
assign u_12 = in_instruction[31:20];
assign frE_to_bckE_req_if.PC_next = in_curr_PC + 32'h4;
@ -110,8 +106,6 @@ module VX_decode(
assign is_auipc = (curr_opcode == `AUIPC_INST);
assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0);
assign is_csr_immed = (is_csr) && (func3[2] == 1);
// assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0);
assign is_e_inst = in_instruction == 32'h00000073;
assign is_gpgpu = (curr_opcode == `GPGPU_INST);
@ -132,10 +126,10 @@ module VX_decode(
assign frE_to_bckE_req_if.csr_immed = is_csr_immed;
assign frE_to_bckE_req_if.is_csr = is_csr;
assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
is_linst ? `WB_MEM :
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
`NO_WB;
assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_etype) ? `WB_JAL :
is_linst ? `WB_MEM :
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
`NO_WB;
assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
@ -145,7 +139,7 @@ module VX_decode(
// UPPER IMMEDIATE
always @(*) begin
case(curr_opcode)
case (curr_opcode)
`LUI_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
`AUIPC_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
default: temp_upper_immed = 20'h0;
@ -165,34 +159,34 @@ module VX_decode(
assign jalr_immed = {func7, frE_to_bckE_req_if.rs2};
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
assign jal_sys_cond1 = func3 == 3'h0;
assign jal_sys_cond2 = u_12 < 12'h2;
assign jal_sys_cond1 = (func3 == 3'h0);
assign jal_sys_cond2 = (u_12 < 12'h2);
assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0;
assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef;
// JAL
always @(*) begin
case(curr_opcode)
case (curr_opcode)
`JAL_INST:
begin
temp_jal = 1'b1 && (|in_valid);
temp_jal = 1'b1 && (| in_valid);
temp_jal_offset = jal_1_offset;
end
`JALR_INST:
begin
temp_jal = 1'b1 && (|in_valid);
temp_jal = 1'b1 && (| in_valid);
temp_jal_offset = jal_2_offset;
end
`SYS_INST:
begin
// $display("SYS EBREAK %h", (jal_sys_jal && (|in_valid)) );
temp_jal = jal_sys_jal && (|in_valid);
// $display("SYS EBREAK %h", (jal_sys_jal && (| in_valid)));
temp_jal = jal_sys_jal && (| in_valid);
temp_jal_offset = jal_sys_off;
end
default:
begin
temp_jal = 1'b0 && (|in_valid);
temp_jal = 1'b0 && (| in_valid);
temp_jal_offset = 32'hdeadbeef;
end
endcase
@ -202,12 +196,9 @@ module VX_decode(
assign frE_to_bckE_req_if.jal = temp_jal;
assign frE_to_bckE_req_if.jal_offset = temp_jal_offset;
// wire is_ebreak;
// assign is_ebreak = is_e_inst;
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
assign frE_to_bckE_req_if.ebreak = ebreak;
assign terminate_sim = is_e_inst;
// ecall/ebreak
assign is_etype = (curr_opcode == `SYS_INST) && jal_sys_jal;
assign frE_to_bckE_req_if.is_etype = is_etype;
// CSR
@ -222,60 +213,55 @@ module VX_decode(
assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12;
always @(*) begin
case(curr_opcode)
`ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
`S_INST: temp_itype_immed = {{20{func7[6]}}, func7, frE_to_bckE_req_if.rd};
`L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12};
`B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]};
default: temp_itype_immed = 32'hdeadbeef;
endcase
case (curr_opcode)
`ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
`S_INST: temp_itype_immed = {{20{func7[6]}}, func7, frE_to_bckE_req_if.rd};
`L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12};
`B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]};
default: temp_itype_immed = 32'hdeadbeef;
endcase
end
assign frE_to_bckE_req_if.itype_immed = temp_itype_immed;
always @(*) begin
case(curr_opcode)
`B_INST:
begin
// $display("BRANCH IN DECODE");
temp_branch_stall = 1'b1 && (|in_valid);
case(func3)
3'h0: temp_branch_type = `BEQ;
3'h1: temp_branch_type = `BNE;
3'h4: temp_branch_type = `BLT;
3'h5: temp_branch_type = `BGT;
3'h6: temp_branch_type = `BLTU;
3'h7: temp_branch_type = `BGTU;
default: temp_branch_type = `NO_BRANCH;
endcase
end
`JAL_INST:
begin
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b1 && (|in_valid);
end
`JALR_INST:
begin
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b1 && (|in_valid);
end
default:
begin
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b0 && (|in_valid);
end
case (curr_opcode)
`B_INST: begin
// $display("BRANCH IN DECODE");
temp_branch_stall = 1'b1 && (| in_valid);
case (func3)
3'h0: temp_branch_type = `BEQ;
3'h1: temp_branch_type = `BNE;
3'h4: temp_branch_type = `BLT;
3'h5: temp_branch_type = `BGT;
3'h6: temp_branch_type = `BLTU;
3'h7: temp_branch_type = `BGTU;
default: temp_branch_type = `NO_BRANCH;
endcase
end
`JAL_INST: begin
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b1 && (| in_valid);
end
`JALR_INST: begin
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b1 && (| in_valid);
end
default: begin
temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b0 && (| in_valid);
end
endcase
end
assign frE_to_bckE_req_if.branch_type = temp_branch_type;
assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid);
assign wstall_if.warp_num = in_warp_num;
assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (| in_valid);
assign wstall_if.warp_num = in_warp_num;
always @(*) begin
// ALU OP
case(func3)
case (func3)
3'h0: alu_op = (curr_opcode == `ALU_INST) ? `ADD : (func7 == 7'h0 ? `ADD : `SUB);
3'h1: alu_op = `SLLA;
3'h2: alu_op = `SLT;
@ -290,7 +276,7 @@ module VX_decode(
always @(*) begin
// ALU OP
case(func3)
case (func3)
3'h0: mul_alu = `MUL;
3'h1: mul_alu = `MULH;
3'h2: mul_alu = `MULHSU;
@ -306,7 +292,7 @@ module VX_decode(
assign csr_type = func3[1:0];
always @(*) begin
case(csr_type)
case (csr_type)
2'h1: csr_alu = `CSR_ALU_RW;
2'h2: csr_alu = `CSR_ALU_RS;
2'h3: csr_alu = `CSR_ALU_RC;
@ -326,7 +312,7 @@ module VX_decode(
assign frE_to_bckE_req_if.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
/*always_comb begin
if (1'($time & 1) && |fd_inst_meta_de.valid) begin
if (1'($time & 1) && (| fd_inst_meta_de.valid)) begin
$display("*** %t: decode: opcode=%h", $time, curr_opcode);
end
end*/

View file

@ -64,7 +64,7 @@ module VX_dmem_ctrl (
.PRFQ_SIZE (`SPRFQ_SIZE),
.PRFQ_STRIDE (`SPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE),
.SNOOP_FORWARDING_ENABLE(0),
.SNOOP_FORWARDING (0),
.DRAM_ENABLE (0),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
@ -135,7 +135,7 @@ module VX_dmem_ctrl (
.PRFQ_SIZE (`DPRFQ_SIZE),
.PRFQ_STRIDE (`DPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE),
.SNOOP_FORWARDING_ENABLE(0),
.SNOOP_FORWARDING (0),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
@ -206,7 +206,7 @@ module VX_dmem_ctrl (
.PRFQ_SIZE (`IPRFQ_SIZE),
.PRFQ_STRIDE (`IPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE),
.SNOOP_FORWARDING_ENABLE(0),
.SNOOP_FORWARDING (0),
.DRAM_ENABLE (1),
.WRITE_ENABLE (0),
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),

View file

@ -44,27 +44,28 @@ module VX_exec_unit (
wire[`NUM_THREADS-1:0][31:0] alu_result;
wire[`NUM_THREADS-1:0] alu_stall;
genvar index_out_reg;
genvar i;
generate
for (index_out_reg = 0; index_out_reg < `NUM_THREADS; index_out_reg = index_out_reg + 1) begin : alu_defs
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : alu_defs
VX_alu_unit alu_unit (
.clk (clk),
.reset (reset),
.src_a (in_a_reg_data[index_out_reg]),
.src_b (in_b_reg_data[index_out_reg]),
.src_a (in_a_reg_data[i]),
.src_b (in_b_reg_data[i]),
.src_rs2 (in_rs2_src),
.itype_immed (in_itype_immed),
.upper_immed (in_upper_immed),
.alu_op (in_alu_op),
.curr_PC (in_curr_PC),
.alu_result (alu_result[index_out_reg]),
.alu_stall (alu_stall[index_out_reg])
.alu_result (alu_result[i]),
.alu_stall (alu_stall[i])
);
end
endgenerate
wire internal_stall;
assign internal_stall = |alu_stall;
assign internal_stall = (| alu_stall);
assign delay = no_slot_exec || internal_stall;
@ -98,11 +99,10 @@ module VX_exec_unit (
endcase // in_branch_type
end
wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data;
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup
for (i = 0; i < `NUM_THREADS; i=i+1) begin
assign duplicate_PC_data[i] = exec_unit_req_if.PC_next;
end
endgenerate
@ -128,7 +128,7 @@ module VX_exec_unit (
assign jal_rsp_temp_if.jal_warp_num = exec_unit_req_if.warp_num;
// Branch rsp
assign branch_rsp_temp_if.valid_branch = (exec_unit_req_if.branch_type != `NO_BRANCH) && (|exec_unit_req_if.valid);
assign branch_rsp_temp_if.valid_branch = (exec_unit_req_if.branch_type != `NO_BRANCH) && (| exec_unit_req_if.valid);
assign branch_rsp_temp_if.branch_dir = temp_branch_dir;
assign branch_rsp_temp_if.branch_warp_num = exec_unit_req_if.warp_num;
assign branch_rsp_temp_if.branch_dest = $signed(exec_unit_req_if.curr_PC) + ($signed(exec_unit_req_if.itype_immed) << 1); // itype_immed = branch_offset
@ -167,7 +167,7 @@ module VX_exec_unit (
);
// always @(*) begin
// case(in_alu_op)
// case (in_alu_op)
// `CSR_ALU_RW: out_csr_result = in_csr_mask;
// `CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask;
// `CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask);

View file

@ -9,8 +9,7 @@ module VX_fetch (
input wire icache_stage_delay,
input wire[`NW_BITS-1:0] icache_stage_wid,
input wire[`NUM_THREADS-1:0] icache_stage_valids,
output wire ebreak,
output wire busy,
VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if,
VX_inst_meta_if fe_inst_meta_fi,
@ -45,7 +44,7 @@ module VX_fetch (
.ctm_warp_num (warp_ctl_if.warp_num),
// WHALT
.whalt (warp_ctl_if.ebreak),
.whalt (warp_ctl_if.whalt),
.whalt_warp_num (warp_ctl_if.warp_num),
// Wstall
@ -83,7 +82,7 @@ module VX_fetch (
.thread_mask (thread_mask),
.warp_num (warp_num),
.warp_pc (warp_pc),
.ebreak (ebreak),
.busy (busy),
.scheduled_warp (scheduled_warp)
);

View file

@ -15,8 +15,7 @@ module VX_front_end (
VX_branch_rsp_if branch_rsp_if,
VX_frE_to_bckE_req_if bckE_req_if,
output wire fetch_ebreak
output wire busy
);
VX_inst_meta_if fe_inst_meta_fi();
@ -29,18 +28,13 @@ module VX_front_end (
wire total_freeze = schedule_delay;
wire icache_stage_delay;
wire vortex_ebreak;
wire terminate_sim;
wire[`NW_BITS-1:0] icache_stage_wid;
wire[`NUM_THREADS-1:0] icache_stage_valids;
wire[`NUM_THREADS-1:0] icache_stage_valids;
assign fetch_ebreak = vortex_ebreak || terminate_sim;
VX_wstall_if wstall_if();
VX_join_if join_if();
VX_wstall_if wstall_if();
VX_join_if join_if();
VX_fetch fetch(
VX_fetch fetch (
.clk (clk),
.reset (reset),
.icache_stage_wid (icache_stage_wid),
@ -52,7 +46,7 @@ module VX_front_end (
.warp_ctl_if (warp_ctl_if),
.icache_stage_delay (icache_stage_delay),
.branch_rsp_if (branch_rsp_if),
.ebreak (vortex_ebreak), // fetch_ebreak
.busy (busy),
.fe_inst_meta_fi (fe_inst_meta_fi)
);
@ -91,9 +85,8 @@ module VX_front_end (
.fd_inst_meta_de (fd_inst_meta_de),
.frE_to_bckE_req_if (frE_to_bckE_req_if),
.wstall_if (wstall_if),
.join_if (join_if),
.terminate_sim (terminate_sim)
);
.join_if (join_if)
);
wire no_br_stall = 0;

View file

@ -29,13 +29,13 @@ module VX_gpr (
);
`else
assign write_enable = valid_write_request && ((writeback_if.wb != 0));
wire going_to_write = write_enable & (|writeback_if.wb_valid);
wire going_to_write = write_enable & (| writeback_if.wb_valid);
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
genvar curr_t;
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin
wire local_write = write_enable & writeback_if.wb_valid[curr_t];
assign write_bit_mask[curr_t] = {`NUM_GPRS{~local_write}};
genvar i;
for (i = 0; i < `NUM_THREADS; i=i+1) begin
wire local_write = write_enable & writeback_if.wb_valid[i];
assign write_bit_mask[i] = {`NUM_GPRS{~local_write}};
end
// wire cenb = !going_to_write;
@ -50,14 +50,11 @@ module VX_gpr (
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_b;
`ifndef SYN
genvar thread;
genvar curr_bit;
for (thread = 0; thread < `NUM_THREADS; thread = thread + 1)
begin
for (curr_bit = 0; curr_bit < `NUM_GPRS; curr_bit=curr_bit+1)
begin
assign a_reg_data[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit];
assign b_reg_data[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit];
genvar j;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin
for (j = 0; j < `NUM_GPRS; j=j+1) begin
assign a_reg_data[i][j] = ((temp_a[i][j] === 1'dx) || cena_1 )? 1'b0 : temp_a[i][j];
assign b_reg_data[i][j] = ((temp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : temp_b[i][j];
end
end
`else
@ -67,8 +64,7 @@ module VX_gpr (
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (writeback_if.rd != 0) ? writeback_if.write_data : 0;
genvar curr_base_thread;
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)
for (i = 0; i < 'NT; i=i+4)
begin
`IGNORE_WARNINGS_BEGIN
rf2_32x128_wm1 first_ram (
@ -77,17 +73,17 @@ module VX_gpr (
.CENYB(),
.WENYB(),
.AYB(),
.QA(temp_a[(curr_base_thread+3):(curr_base_thread)]),
.QA(temp_a[(i+3):(i)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_1),
.AA(gpr_read_if.rs1[(curr_base_thread+3):(curr_base_thread)]),
.AA(gpr_read_if.rs1[(i+3):(i)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
.AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
.WENB(write_bit_mask[(i+3):(i)]),
.AB(writeback_if.rd[(i+3):(i)]),
.DB(to_write[(i+3):(i)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
@ -116,17 +112,17 @@ module VX_gpr (
.CENYB(),
.WENYB(),
.AYB(),
.QA(temp_b[(curr_base_thread+3):(curr_base_thread)]),
.QA(temp_b[(i+3):(i)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_2),
.AA(gpr_read_if.rs2[(curr_base_thread+3):(curr_base_thread)]),
.AA(gpr_read_if.rs2[(i+3):(i)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
.AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
.WENB(write_bit_mask[(i+3):(i)]),
.AB(writeback_if.rd[(i+3):(i)]),
.DB(to_write[(i+3):(i)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),

View file

@ -20,13 +20,13 @@ module VX_gpr_ram (
//--
end else begin
if (we) begin
integer t;
for (t = 0; t < `NUM_THREADS; t = t + 1) begin
if (be[t]) begin
ram[waddr][t][0] <= wdata[t][7:0];
ram[waddr][t][1] <= wdata[t][15:8];
ram[waddr][t][2] <= wdata[t][23:16];
ram[waddr][t][3] <= wdata[t][31:24];
integer i;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin
if (be[i]) begin
ram[waddr][i][0] <= wdata[i][7:0];
ram[waddr][i][1] <= wdata[i][15:8];
ram[waddr][i][2] <= wdata[i][23:16];
ram[waddr][i][3] <= wdata[i][31:24];
end
end
end

View file

@ -59,9 +59,6 @@ module VX_gpr_stage (
.b_reg_data (gpr_datf_if.b_reg_data)
);
// assign bckE_req_if.is_csr = is_csr;
// assign bckE_req_out_if.csr_mask = (bckE_req_if.sr_immed == 1'b1) ? {27'h0, bckE_req_if.rs1} : gpr_data_if.a_reg_data[0];
// Outputs
VX_exec_unit_req_if exec_unit_req_temp_if();
VX_lsu_req_if lsu_req_temp_if();
@ -77,7 +74,7 @@ module VX_gpr_stage (
.csr_req_if (csr_req_temp_if)
);
`DEBUG_BEGIN
wire is_lsu = (|lsu_req_temp_if.valid);
wire is_lsu = (| lsu_req_temp_if.valid);
`DEBUG_END
wire stall_rest = 0;
wire flush_rest = schedule_delay;
@ -88,7 +85,7 @@ module VX_gpr_stage (
wire stall_exec = exec_delay;
wire flush_exec = schedule_delay && !stall_exec;
wire stall_csr = stall_gpr_csr && bckE_req_if.is_csr && (|bckE_req_if.valid);
wire stall_csr = stall_gpr_csr && bckE_req_if.is_csr && (| bckE_req_if.valid);
assign gpr_stage_delay = stall_lsu || stall_exec || stall_csr;
@ -149,8 +146,8 @@ module VX_gpr_stage (
.reset (reset),
.stall (stall_exec),
.flush (flush_exec),
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
);
assign exec_unit_req_if.a_reg_data = real_base_address;
@ -202,8 +199,8 @@ module VX_gpr_stage (
.reset (reset),
.stall (stall_exec),
.flush (flush_exec),
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
);
VX_generic_register #(

View file

@ -15,10 +15,10 @@ module VX_gpr_wrapper (
wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_b_reg_data;
wire[`NUM_THREADS-1:0][31:0] jal_data;
genvar index;
genvar i;
generate
for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign
assign jal_data[index] = gpr_jal_if.curr_PC;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : jal_data_assign
assign jal_data[i] = gpr_jal_if.curr_PC;
end
endgenerate
@ -46,22 +46,19 @@ module VX_gpr_wrapper (
`endif
genvar warp_index;
generate
for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs
wire valid_write_request = warp_index == writeback_if.warp_num;
generate
for (i = 0; i < `NUM_WARPS; i = i + 1) begin : warp_gprs
wire valid_write_request = i == writeback_if.warp_num;
VX_gpr gpr(
.clk (clk),
.reset (reset),
.valid_write_request (valid_write_request),
.gpr_read_if (gpr_read_if),
.writeback_if (writeback_if),
.a_reg_data (temp_a_reg_data[warp_index]),
.b_reg_data (temp_b_reg_data[warp_index])
.a_reg_data (temp_a_reg_data[i]),
.b_reg_data (temp_b_reg_data[i])
);
end
endgenerate
endmodule

View file

@ -13,31 +13,29 @@ module VX_gpu_inst (
wire[`NUM_THREADS-1:0] tmc_new_mask;
wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0];
genvar curr_t;
genvar i;
generate
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < gpu_inst_req_if.a_reg_data[0];
for (i = 0; i < `NUM_THREADS; i=i+1) begin : tmc_new_mask_init
assign tmc_new_mask[i] = all_threads ? 1 : i < gpu_inst_req_if.a_reg_data[0];
end
endgenerate
wire valid_inst = (|curr_valids);
wire valid_inst = (| curr_valids);
assign warp_ctl_if.warp_num = gpu_inst_req_if.warp_num;
assign warp_ctl_if.change_mask = (gpu_inst_req_if.is_tmc) && valid_inst;
assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0;
// assign warp_ctl_if.ebreak = (gpu_inst_req_if.a_reg_data[0] == 0) && valid_inst;
assign warp_ctl_if.ebreak = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0);
assign warp_ctl_if.whalt = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0);
wire wspawn = gpu_inst_req_if.is_wspawn;
wire[31:0] wspawn_pc = gpu_inst_req_if.rd2;
wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0];
wire[`NUM_WARPS-1:0] wspawn_new_active;
genvar curr_w;
generate
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < gpu_inst_req_if.a_reg_data[0];
for (i = 0; i < `NUM_WARPS; i=i+1) begin : wspawn_new_active_init
assign wspawn_new_active[i] = all_active ? 1 : i < gpu_inst_req_if.a_reg_data[0];
end
endgenerate
@ -57,14 +55,11 @@ module VX_gpu_inst (
wire[`NUM_THREADS-1:0] split_new_use_mask;
wire[`NUM_THREADS-1:0] split_new_later_mask;
// VX_gpu_inst_req.pc
genvar curr_s_t;
generate
for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init
wire curr_bool = (gpu_inst_req_if.a_reg_data[curr_s_t] == 32'b1);
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool);
for (i = 0; i < `NUM_THREADS; i=i+1) begin : masks_init
wire curr_bool = (gpu_inst_req_if.a_reg_data[i] == 32'b1);
assign split_new_use_mask[i] = curr_valids[i] & (curr_bool);
assign split_new_later_mask[i] = curr_valids[i] & (!curr_bool);
end
endgenerate

View file

@ -16,7 +16,7 @@ module VX_icache_stage (
reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0];
wire valid_inst = (|fe_inst_meta_fi.valid);
wire valid_inst = (| fe_inst_meta_fi.valid);
// Icache Request
assign icache_req_if.core_req_valid = valid_inst && !total_freeze;
@ -45,11 +45,12 @@ module VX_icache_stage (
// Core can't accept response
assign icache_rsp_if.core_rsp_ready = ~total_freeze;
integer w;
integer i;
always @(posedge clk) begin
if (reset) begin
for (w = 0; w < `NUM_WARPS; w = w + 1) begin
threads_active[w] <= 0;
for (i = 0; i < `NUM_WARPS; i = i + 1) begin
threads_active[i] <= 0;
end
end else begin
if (valid_inst && !icache_stage_delay) begin

View file

@ -21,12 +21,12 @@ module VX_inst_multiplex (
wire is_csr = bckE_req_if.is_csr;
// wire is_gpu = 0;
genvar currT;
genvar i;
generate
for (currT = 0; currT < `NUM_THREADS; currT = currT + 1) begin : mask_init
assign is_mem_mask[currT] = is_mem;
assign is_gpu_mask[currT] = is_gpu;
assign is_csr_mask[currT] = is_csr;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : mask_init
assign is_mem_mask[i] = is_mem;
assign is_gpu_mask[i] = is_gpu;
assign is_csr_mask[i] = is_csr;
end
endgenerate
@ -64,7 +64,7 @@ module VX_inst_multiplex (
assign exec_unit_req_if.jalQual = bckE_req_if.jalQual;
assign exec_unit_req_if.jal = bckE_req_if.jal;
assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset;
assign exec_unit_req_if.ebreak = bckE_req_if.ebreak;
assign exec_unit_req_if.is_etype = bckE_req_if.is_etype;
// GPR Req

View file

@ -61,10 +61,10 @@ module VX_lsu_unit (
assign {mem_wb_if.pc, mem_wb_if.wb, mem_wb_if.rd, mem_wb_if.warp_num} = dcache_rsp_if.core_rsp_tag;
/*always_comb begin
if (1'($time & 1) && dcache_req_if.core_req_ready && |dcache_req_if.core_req_valid) begin
if (1'($time & 1) && dcache_req_if.core_req_ready && (| dcache_req_if.core_req_valid)) begin
$display("*** %t: D$ req: valid=%b, addr=%0h, r=%d, w=%d, pc=%0h, rd=%d, warp=%d, data=%0h", $time, use_valid, use_address, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
end
if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && |dcache_rsp_if.core_rsp_valid) begin
if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && (| dcache_rsp_if.core_rsp_valid)) begin
$display("*** %t: D$ rsp: valid=%b, pc=%0h, rd=%d, warp=%d, data=%0h", $time, mem_wb_if.valid, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
end
end*/

View file

@ -18,7 +18,7 @@ module VX_scheduler (
reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
wire valid_wb = (writeback_if.wb != 0) && (|writeback_if.valid) && (writeback_if.rd != 0);
wire valid_wb = (writeback_if.wb != 0) && (| writeback_if.valid) && (writeback_if.rd != 0);
wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0);
wire rs1_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0;
@ -42,7 +42,7 @@ module VX_scheduler (
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
assign schedule_delay = ((rename_valid) && (|bckE_req_if.valid))
assign schedule_delay = ((rename_valid) && (| bckE_req_if.valid))
|| (memory_delay && is_mem)
|| (gpr_stage_delay && (is_mem || is_exec))
|| (exec_delay && is_exec);

View file

@ -25,12 +25,12 @@ module VX_warp (
reg [`NUM_THREADS-1:0] valid_t;
reg [`NUM_THREADS-1:0] valid_zero;
integer ti;
integer i;
initial begin
real_PC = 0;
for (ti = 1; ti < `NUM_THREADS; ti=ti+1) begin
valid_t[ti] = 0; // Thread 1 active
valid_zero[ti] = 0;
for (i = 1; i < `NUM_THREADS; i=i+1) begin
valid_t[i] = 0; // Thread 1 active
valid_zero[i] = 0;
end
valid_t = 1;
valid_zero[0] = 0;
@ -44,10 +44,10 @@ module VX_warp (
end
end
genvar tv;
genvar i;
generate
for (tv = 0; tv < `NUM_THREADS; tv = tv+1) begin : valid_assign
assign valid[tv] = change_mask ? thread_mask[tv] : stall ? 1'b0 : valid_t[tv];
for (i = 0; i < `NUM_THREADS; i = i+1) begin : valid_assign
assign valid[i] = change_mask ? thread_mask[i] : stall ? 1'b0 : valid_t[i];
end
endgenerate

View file

@ -56,7 +56,7 @@ module VX_warp_sched (
output wire[`NUM_THREADS-1:0] thread_mask,
output wire[`NW_BITS-1:0] warp_num,
output wire[31:0] warp_pc,
output wire ebreak,
output wire busy,
output wire scheduled_warp,
input wire[`NW_BITS-1:0] icache_stage_wid,
@ -162,14 +162,14 @@ module VX_warp_sched (
warp_pcs[join_warp_num] <= join_pc;
end
thread_masks[join_warp_num] <= join_tm;
didnt_split <= 0;
didnt_split <= 0;
end else if (is_split) begin
warp_stalled[split_warp_num] <= 0;
warp_stalled[split_warp_num] <= 0;
if (!dont_split) begin
thread_masks[split_warp_num] <= split_new_mask;
didnt_split <= 0;
didnt_split <= 0;
end else begin
didnt_split <= 1;
didnt_split <= 1;
end
end
@ -218,7 +218,7 @@ module VX_warp_sched (
warp_lock[warp_num] <= 1'b1;
// warp_lock <= {`NUM_WARPS{1'b1}};
end
if (|icache_stage_valids && !stall) begin
if ((| icache_stage_valids) && !stall) begin
warp_lock[icache_stage_wid] <= 1'b0;
// warp_lock <= {`NUM_WARPS{1'b0}};
end
@ -251,15 +251,6 @@ module VX_warp_sched (
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
// integer curr_b;
// always @(*) begin
// total_barrier_stall = 0;
// for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1)
// begin
// total_barrier_stall[`NUM_WARPS-1:0] = total_barrier_stall[`NUM_WARPS-1:0] | barrier_stall_mask[curr_b];
// end
// end
assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join);
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[split_warp_num]};
@ -267,11 +258,11 @@ module VX_warp_sched (
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
genvar curr_warp;
genvar i;
generate
for (curr_warp = 0; curr_warp < `NUM_WARPS; curr_warp = curr_warp + 1) begin : stacks
wire correct_warp_s = (curr_warp == split_warp_num);
wire correct_warp_j = (curr_warp == join_warp_num);
for (i = 0; i < `NUM_WARPS; i = i + 1) begin : stacks
wire correct_warp_s = (i == split_warp_num);
wire correct_warp_j = (i == join_warp_num);
wire push = (is_split && !dont_split) && correct_warp_s;
wire pop = is_join && correct_warp_j;
@ -284,7 +275,7 @@ module VX_warp_sched (
.reset(reset),
.push (push),
.pop (pop),
.d (d[curr_warp]),
.d (d[i]),
.q1 (q1),
.q2 (q2)
);
@ -330,6 +321,6 @@ module VX_warp_sched (
// $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]);
// end
assign ebreak = (warp_active == 0);
assign busy = (warp_active != 0);
endmodule

View file

@ -22,9 +22,9 @@ module VX_writeback (
VX_wb_if writeback_tmp_if();
wire exec_wb = (inst_exec_wb_if.wb != 0) && (|inst_exec_wb_if.valid);
wire mem_wb = (mem_wb_if.wb != 0) && (|mem_wb_if.valid);
wire csr_wb = (csr_wb_if.wb != 0) && (|csr_wb_if.valid);
wire exec_wb = (inst_exec_wb_if.wb != 0) && (| inst_exec_wb_if.valid);
wire mem_wb = (mem_wb_if.wb != 0) && (| mem_wb_if.valid);
wire csr_wb = (csr_wb_if.wb != 0) && (| csr_wb_if.valid);
assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
assign no_slot_csr = csr_wb && (exec_wb);
@ -78,7 +78,7 @@ module VX_writeback (
reg [31:0] last_data_wb /* verilator public */;
always @(posedge clk) begin
if ((|writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin
if ( (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin
last_data_wb <= use_wb_data[0];
end
end

View file

@ -56,7 +56,8 @@ module Vortex #(
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// Debug
// Status
output wire busy,
output wire ebreak
);
`DEBUG_BEGIN
@ -157,100 +158,101 @@ module Vortex #(
assign icache_dram_rsp_if.dram_rsp_tag = I_dram_rsp_tag;
assign I_dram_rsp_ready = icache_dram_rsp_if.dram_rsp_ready;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Front-end to Back-end
VX_frE_to_bckE_req_if bckE_req_if(); // New instruction request to EXE/MEM
// Front-end to Back-end
VX_frE_to_bckE_req_if bckE_req_if(); // New instruction request to EXE/MEM
// Back-end to Front-end
VX_wb_if writeback_if(); // Writeback to GPRs
VX_branch_rsp_if branch_rsp_if(); // Branch Resolution to Fetch
VX_jal_rsp_if jal_rsp_if(); // Jump resolution to Fetch
// Back-end to Front-end
VX_wb_if writeback_if(); // Writeback to GPRs
VX_branch_rsp_if branch_rsp_if(); // Branch Resolution to Fetch
VX_jal_rsp_if jal_rsp_if(); // Jump resolution to Fetch
// Warp controls
VX_warp_ctl_if warp_ctl_if();
// Warp controls
VX_warp_ctl_if warp_ctl_if();
// Cache snooping
VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if();
// Cache snooping
VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if();
assign dcache_snp_req_if.snp_req_valid = llc_snp_req_valid;
assign dcache_snp_req_if.snp_req_addr = llc_snp_req_addr;
assign llc_snp_req_ready = dcache_snp_req_if.snp_req_ready;
assign dcache_snp_req_if.snp_req_valid = llc_snp_req_valid;
assign dcache_snp_req_if.snp_req_addr = llc_snp_req_addr;
assign llc_snp_req_ready = dcache_snp_req_if.snp_req_ready;
VX_front_end front_end (
.clk (clk),
.reset (reset),
.warp_ctl_if (warp_ctl_if),
.bckE_req_if (bckE_req_if),
.schedule_delay (schedule_delay),
.icache_rsp_if (icache_core_rsp_if),
.icache_req_if (icache_core_req_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.fetch_ebreak (ebreak)
);
VX_front_end front_end (
.clk (clk),
.reset (reset),
.warp_ctl_if (warp_ctl_if),
.bckE_req_if (bckE_req_if),
.schedule_delay (schedule_delay),
.icache_rsp_if (icache_core_rsp_if),
.icache_req_if (icache_core_req_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.busy (busy)
);
VX_scheduler scheduler (
.clk (clk),
.reset (reset),
.memory_delay (memory_delay),
.exec_delay (exec_delay),
.gpr_stage_delay(gpr_stage_delay),
.bckE_req_if (bckE_req_if),
.writeback_if (writeback_if),
.schedule_delay (schedule_delay),
.is_empty (scheduler_empty)
);
VX_scheduler scheduler (
.clk (clk),
.reset (reset),
.memory_delay (memory_delay),
.exec_delay (exec_delay),
.gpr_stage_delay(gpr_stage_delay),
.bckE_req_if (bckE_req_if),
.writeback_if (writeback_if),
.schedule_delay (schedule_delay),
.is_empty (scheduler_empty)
);
VX_back_end #(
.CORE_ID(CORE_ID)
) back_end (
.clk (clk),
.reset (reset),
.schedule_delay (schedule_delay),
.warp_ctl_if (warp_ctl_if),
.bckE_req_if (bckE_req_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.dcache_req_if (dcache_io_core_req_if),
.dcache_rsp_if (dcache_io_core_rsp_if),
.writeback_if (writeback_if),
.mem_delay (memory_delay),
.exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay)
);
VX_back_end #(
.CORE_ID(CORE_ID)
) back_end (
.clk (clk),
.reset (reset),
.schedule_delay (schedule_delay),
.warp_ctl_if (warp_ctl_if),
.bckE_req_if (bckE_req_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.dcache_req_if (dcache_io_core_req_if),
.dcache_rsp_if (dcache_io_core_rsp_if),
.writeback_if (writeback_if),
.mem_delay (memory_delay),
.exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay),
.ebreak (ebreak)
);
VX_dmem_ctrl dmem_ctrl (
.clk (clk),
.reset (reset),
VX_dmem_ctrl dmem_ctrl (
.clk (clk),
.reset (reset),
// Core <-> Dcache
.dcache_core_req_if (dcache_core_req_if),
.dcache_core_rsp_if (dcache_core_rsp_if),
// Core <-> Dcache
.dcache_core_req_if (dcache_core_req_if),
.dcache_core_rsp_if (dcache_core_rsp_if),
// Dram <-> Dcache
.dcache_dram_req_if (dcache_dram_req_if),
.dcache_dram_rsp_if (dcache_dram_rsp_if),
.dcache_snp_req_if (dcache_snp_req_if),
// Dram <-> Dcache
.dcache_dram_req_if (dcache_dram_req_if),
.dcache_dram_rsp_if (dcache_dram_rsp_if),
.dcache_snp_req_if (dcache_snp_req_if),
// Core <-> Icache
.icache_core_req_if (icache_core_req_if),
.icache_core_rsp_if (icache_core_rsp_if),
// Core <-> Icache
.icache_core_req_if (icache_core_req_if),
.icache_core_rsp_if (icache_core_rsp_if),
// Dram <-> Icache
.icache_dram_req_if (icache_dram_req_if),
.icache_dram_rsp_if (icache_dram_rsp_if)
);
// Dram <-> Icache
.icache_dram_req_if (icache_dram_req_if),
.icache_dram_rsp_if (icache_dram_rsp_if)
);
VX_dcache_io_arb dcache_io_arb (
.io_select (dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR),
.core_req_if (dcache_io_core_req_if),
.dcache_core_req_if (dcache_core_req_if),
.io_core_req_if (io_core_req_if),
.dcache_core_rsp_if (dcache_core_rsp_if),
.io_core_rsp_if (io_core_rsp_if),
.core_rsp_if (dcache_io_core_rsp_if)
);
VX_dcache_io_arb dcache_io_arb (
.io_select (dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR),
.core_req_if (dcache_io_core_req_if),
.dcache_core_req_if (dcache_core_req_if),
.io_core_req_if (io_core_req_if),
.dcache_core_rsp_if (dcache_core_rsp_if),
.io_core_rsp_if (io_core_rsp_if),
.core_rsp_if (dcache_io_core_rsp_if)
);
endmodule // Vortex

View file

@ -42,7 +42,8 @@ module Vortex_Cluster #(
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// Debug
// Status
output wire busy,
output wire ebreak
);
wire[`NUM_CORES-1:0] per_core_D_dram_req_read;
@ -83,6 +84,7 @@ module Vortex_Cluster #(
wire[`NUM_CORES-1:0] per_core_io_rsp_ready;
`IGNORE_WARNINGS_END
wire[`NUM_CORES-1:0] per_core_busy;
wire[`NUM_CORES-1:0] per_core_ebreak;
genvar i;
@ -92,48 +94,49 @@ module Vortex_Cluster #(
) vortex_core (
.clk (clk),
.reset (reset),
.D_dram_req_read (per_core_D_dram_req_read [i]),
.D_dram_req_write (per_core_D_dram_req_write [i]),
.D_dram_req_addr (per_core_D_dram_req_addr [i]),
.D_dram_req_data (per_core_D_dram_req_data [i]),
.D_dram_req_tag (per_core_D_dram_req_tag [i]),
.D_dram_req_ready (per_core_D_dram_req_ready [i]),
.D_dram_rsp_valid (per_core_D_dram_rsp_valid [i]),
.D_dram_rsp_data (per_core_D_dram_rsp_data [i]),
.D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]),
.D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]),
.I_dram_req_read (per_core_I_dram_req_read [i]),
.D_dram_req_read (per_core_D_dram_req_read [i]),
.D_dram_req_write (per_core_D_dram_req_write [i]),
.D_dram_req_addr (per_core_D_dram_req_addr [i]),
.D_dram_req_data (per_core_D_dram_req_data [i]),
.D_dram_req_tag (per_core_D_dram_req_tag [i]),
.D_dram_req_ready (per_core_D_dram_req_ready [i]),
.D_dram_rsp_valid (per_core_D_dram_rsp_valid [i]),
.D_dram_rsp_data (per_core_D_dram_rsp_data [i]),
.D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]),
.D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]),
.I_dram_req_read (per_core_I_dram_req_read [i]),
`IGNORE_WARNINGS_BEGIN
.I_dram_req_write (),
`IGNORE_WARNINGS_END
.I_dram_req_addr (per_core_I_dram_req_addr [i]),
.I_dram_req_data (per_core_I_dram_req_data [i]),
.I_dram_req_tag (per_core_I_dram_req_tag [i]),
.I_dram_req_ready (per_core_I_dram_req_ready [i]),
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]),
.I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]),
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
.I_dram_req_addr (per_core_I_dram_req_addr [i]),
.I_dram_req_data (per_core_I_dram_req_data [i]),
.I_dram_req_tag (per_core_I_dram_req_tag [i]),
.I_dram_req_ready (per_core_I_dram_req_ready [i]),
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]),
.I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]),
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
.llc_snp_req_valid (snp_fwd_valid),
.llc_snp_req_addr (snp_fwd_addr),
.llc_snp_req_ready (per_core_snp_fwd_ready [i]),
.llc_snp_req_ready (per_core_snp_fwd_ready [i]),
.io_req_read (per_core_io_req_read [i]),
.io_req_write (per_core_io_req_write [i]),
.io_req_addr (per_core_io_req_addr [i]),
.io_req_data (per_core_io_req_data [i]),
.io_req_byteen (per_core_io_req_byteen [i]),
.io_req_tag (per_core_io_req_tag [i]),
.io_req_read (per_core_io_req_read [i]),
.io_req_write (per_core_io_req_write [i]),
.io_req_addr (per_core_io_req_addr [i]),
.io_req_data (per_core_io_req_data [i]),
.io_req_byteen (per_core_io_req_byteen [i]),
.io_req_tag (per_core_io_req_tag [i]),
.io_req_ready (io_req_ready),
.io_rsp_valid (io_rsp_valid),
.io_rsp_data (io_rsp_data),
.io_rsp_tag (io_rsp_tag),
.io_rsp_ready (per_core_io_rsp_ready [i]),
.io_rsp_ready (per_core_io_rsp_ready [i]),
.ebreak (per_core_ebreak [i])
.busy (per_core_busy [i]),
.ebreak (per_core_ebreak [i])
);
end
@ -145,7 +148,8 @@ module Vortex_Cluster #(
assign io_req_tag = per_core_io_req_tag[0];
assign io_rsp_ready = per_core_io_rsp_ready[0];
assign busy = (| per_core_busy);
assign ebreak = (& per_core_ebreak);
if (`L2_ENABLE) begin
@ -184,8 +188,8 @@ module Vortex_Cluster #(
assign l2_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
assign l2_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
assign per_core_D_dram_req_ready[(i/2)] = l2_core_req_ready;
assign per_core_I_dram_req_ready[(i/2)] = l2_core_req_ready;
assign per_core_D_dram_req_ready [(i/2)] = l2_core_req_ready;
assign per_core_I_dram_req_ready [(i/2)] = l2_core_req_ready;
assign per_core_D_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i];
assign per_core_I_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i+1];
@ -221,7 +225,7 @@ module Vortex_Cluster #(
.FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1),
.SNOOP_FORWARDING_ENABLE(1),
.SNOOP_FORWARDING (1),
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH)

View file

@ -40,7 +40,8 @@ module Vortex_Socket (
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// Debug
// Status
output wire busy,
output wire ebreak
);
if (`NUM_CLUSTERS == 1) begin
@ -80,6 +81,7 @@ module Vortex_Socket (
.io_rsp_tag (io_rsp_tag),
.io_rsp_ready (io_rsp_ready),
.busy (busy),
.ebreak (ebreak)
);
@ -112,6 +114,7 @@ module Vortex_Socket (
wire[`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
`IGNORE_WARNINGS_END
wire[`NUM_CLUSTERS-1:0] per_cluster_busy;
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
genvar i;
@ -151,6 +154,7 @@ module Vortex_Socket (
.io_rsp_tag (io_rsp_tag),
.io_rsp_ready (per_cluster_io_rsp_ready [i]),
.busy (per_cluster_busy [i]),
.ebreak (per_cluster_ebreak [i])
);
end
@ -164,6 +168,7 @@ module Vortex_Socket (
assign io_rsp_ready = per_cluster_io_rsp_ready[0];
assign busy = (| per_cluster_busy);
assign ebreak = (& per_cluster_ebreak);
// L3 Cache ///////////////////////////////////////////////////////////
@ -219,7 +224,7 @@ module Vortex_Socket (
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1),
.SNOOP_FORWARDING_ENABLE(1),
.SNOOP_FORWARDING (1),
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH)

View file

@ -11,7 +11,7 @@ module VX_bank #(
parameter WORD_SIZE = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
// Number of cycles to complete i 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
@ -46,7 +46,7 @@ module VX_bank #(
parameter DRAM_ENABLE = 1,
// Enable snoop forwarding
parameter SNOOP_FORWARDING_ENABLE = 0,
parameter SNOOP_FORWARDING = 0,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
@ -108,7 +108,7 @@ module VX_bank #(
if (reset) begin
snoop_state <= 0;
end else begin
snoop_state <= (snoop_state | snp_req_valid) && SNOOP_FORWARDING_ENABLE;
snoop_state <= (snoop_state | snp_req_valid) && SNOOP_FORWARDING;
end
end
@ -169,7 +169,7 @@ module VX_bank #(
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0;
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0;
assign reqq_push = core_req_ready && (|core_req_valids);
assign reqq_push = core_req_ready && (| core_req_valids);
VX_cache_req_queue #(
.CACHE_SIZE (CACHE_SIZE),
@ -241,16 +241,16 @@ module VX_bank #(
wire stall_bank_pipe;
reg is_fill_in_pipe;
wire is_fill_st1 [STAGE_1_CYCLES-1:0];
wire is_fill_st1 [STAGE_1_CYCLES-1:0];
`DEBUG_BEGIN
wire going_to_write_st1[STAGE_1_CYCLES-1:0];
wire going_to_write_st1 [STAGE_1_CYCLES-1:0];
`DEBUG_END
integer i;
integer j;
always @(*) begin
is_fill_in_pipe = 0;
for (i = 0; i < STAGE_1_CYCLES; i=i+1) begin
if (is_fill_st1[i]) begin
for (j = 0; j < STAGE_1_CYCLES; j=j+1) begin
if (is_fill_st1[j]) begin
is_fill_in_pipe = 1;
end
end
@ -327,8 +327,8 @@ module VX_bank #(
.out ({is_snp_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
);
genvar stage;
for (stage = 1; stage < STAGE_1_CYCLES; stage = stage + 1) begin
genvar i;
for (i = 1; i < STAGE_1_CYCLES; i = i + 1) begin
VX_generic_register #(
.N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + (`BANK_LINE_WORDS*`WORD_WIDTH))
) s0_1_cc (
@ -336,8 +336,8 @@ module VX_bank #(
.reset(reset),
.stall(stall_bank_pipe),
.flush(0),
.in ({is_snp_st1[stage-1], going_to_write_st1[stage-1], valid_st1[stage-1], addr_st1[stage-1], wsel_st1[stage-1], writeword_st1[stage-1], inst_meta_st1[stage-1], is_fill_st1[stage-1], writedata_st1[stage-1]}),
.out ({is_snp_st1[stage], going_to_write_st1[stage], valid_st1[stage], addr_st1[stage], wsel_st1[stage], writeword_st1[stage], inst_meta_st1[stage], is_fill_st1[stage], writedata_st1[stage]})
.in ({is_snp_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}),
.out ({is_snp_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]})
);
end
@ -506,9 +506,10 @@ module VX_bank #(
);
// Enqueue to CWB Queue
// TODO: should investigae the need for "SNOOP_FORWARDING" here
wire cwbq_push = (valid_st2 && !miss_st2)
&& !cwbq_full
&& !(SNOOP_FORWARDING_ENABLE && (miss_add_mem_write == `BYTE_EN_NO))
&& !(SNOOP_FORWARDING && (miss_add_mem_write == `BYTE_EN_NO))
&& !((is_snp_st2 && valid_st2 && ffsq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
@ -554,7 +555,7 @@ module VX_bank #(
wire[`BANK_LINE_WORDS-1:0][`WORD_WIDTH-1:0] dwbq_req_data;
if (SNOOP_FORWARDING_ENABLE) begin
if (SNOOP_FORWARDING) begin
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
assign dwbq_req_addr = (should_flush && dwbq_push) ? addr_st2 : {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
end else begin

View file

@ -47,7 +47,7 @@ module VX_cache #(
parameter DRAM_ENABLE = 1,
// Enable snoop forwarding
parameter SNOOP_FORWARDING_ENABLE = 0,
parameter SNOOP_FORWARDING = 0,
// Prefetcher
parameter PRFQ_SIZE = 64,
@ -135,9 +135,9 @@ module VX_cache #(
`DEBUG_END
assign dram_req_tag = dram_req_addr;
assign core_req_ready = ~(|per_bank_reqq_full);
assign snp_req_ready = ~(|per_bank_snp_req_full);
assign dram_rsp_ready = (|per_bank_dram_fill_rsp_ready);
assign core_req_ready = ~(| per_bank_reqq_full);
assign snp_req_ready = ~(| per_bank_snp_req_full);
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready);
VX_cache_core_req_bank_sel #(
.CACHE_SIZE (CACHE_SIZE),
@ -265,7 +265,7 @@ module VX_cache #(
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE),
.SNOOP_FORWARDING_ENABLE(SNOOP_FORWARDING_ENABLE),
.SNOOP_FORWARDING (SNOOP_FORWARDING),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) bank (

View file

@ -63,8 +63,8 @@ module VX_cache_dfq_queue #(
wire o_empty;
wire use_empty = !(|use_per_bank_dram_fill_req_valid);
wire out_empty = !(|out_per_bank_dram_fill_req_valid) || o_empty;
wire use_empty = !(| use_per_bank_dram_fill_req_valid);
wire out_empty = !(| out_per_bank_dram_fill_req_valid) || o_empty;
wire push_qual = dfqq_push && !dfqq_full;
wire pop_qual = dfqq_pop && use_empty && !out_empty;

View file

@ -98,7 +98,7 @@ module VX_cache_dram_req_arb #(
`DEBUG_END
wire dfqq_pop = !dwb_valid && dfqq_req && dram_req_ready; // If no dwb, and dfqq has valids, then pop
wire dfqq_push = (|per_bank_dram_fill_req_valid);
wire dfqq_push = (| per_bank_dram_fill_req_valid);
VX_cache_dfq_queue cache_dfq_queue(
.clk (clk),

View file

@ -90,10 +90,10 @@ module VX_cache_miss_resrv #(
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
reg [MRVQ_SIZE-1:0] make_ready;
genvar curr_e;
genvar i;
generate
for (curr_e = 0; curr_e < MRVQ_SIZE; curr_e=curr_e+1) begin
assign make_ready[curr_e] = is_fill_st1 && valid_table[curr_e] && (addr_table[curr_e] == fill_addr_st1);
for (i = 0; i < MRVQ_SIZE; i=i+1) begin
assign make_ready[i] = is_fill_st1 && valid_table[i] && (addr_table[i] == fill_addr_st1);
end
endgenerate
@ -107,7 +107,7 @@ module VX_cache_miss_resrv #(
wire mrvq_push = miss_add && enqueue_possible && (MRVQ_SIZE != 2);
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
wire update_ready = (|make_ready);
wire update_ready = (| make_ready);
always @(posedge clk) begin
if (reset) begin

View file

@ -97,8 +97,8 @@ module VX_cache_req_queue #(
wire o_empty;
wire use_empty = !(|use_per_valids);
wire out_empty = !(|out_per_valids) || o_empty;
wire use_empty = !(| use_per_valids);
wire out_empty = !(| out_per_valids) || o_empty;
wire push_qual = reqq_push && !reqq_full;
wire pop_qual = !out_empty && use_empty;

View file

@ -60,10 +60,10 @@ module VX_fill_invalidator #(
reg [FILL_INVALIDAOR_SIZE-1:0] matched_fill;
wire matched;
integer fi;
integer i;
always @(*) begin
for (fi = 0; fi < FILL_INVALIDAOR_SIZE; fi+=1) begin
matched_fill[fi] = fills_active[fi] && (fills_address[fi] == fill_addr);
for (i = 0; i < FILL_INVALIDAOR_SIZE; i+=1) begin
matched_fill[i] = fills_active[i] && (fills_address[i] == fill_addr);
end
end

View file

@ -68,7 +68,7 @@ module VX_tag_data_structure #(
assign read_tag = tag [read_addr];
assign read_data = data [read_addr];
wire going_to_write = (|write_enable);
wire going_to_write = (| write_enable);
integer i;
always @(posedge clk) begin

View file

@ -33,10 +33,8 @@ interface VX_exec_unit_req_if ();
wire jal;
wire [31:0] jal_offset;
`IGNORE_WARNINGS_BEGIN
wire ebreak;
wire is_etype;
wire wspawn;
`IGNORE_WARNINGS_END
// CSR info
wire is_csr;

View file

@ -21,9 +21,7 @@ interface VX_frE_to_bckE_req_if ();
wire [2:0] branch_type;
wire [19:0] upper_immed;
wire [31:0] curr_PC;
`IGNORE_WARNINGS_BEGIN
wire ebreak;
`IGNORE_WARNINGS_END
wire is_etype;
wire jalQual;
wire jal;
wire [31:0] jal_offset;

View file

@ -14,7 +14,7 @@ interface VX_warp_ctl_if ();
wire [31:0] wspawn_pc;
wire [`NUM_WARPS-1:0] wspawn_new_active;
wire ebreak;
wire whalt;
// barrier
wire is_barrier;

View file

@ -65,16 +65,16 @@ module VX_divide #(
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
genvar pipe_stage;
for (pipe_stage = 0; pipe_stage < PIPELINE-1; pipe_stage = pipe_stage+1) begin : pipe_stages
genvar i;
for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages
always @(posedge clock or posedge aclr) begin
if (aclr) begin
numer_pipe[pipe_stage+1] <= 0;
denom_pipe[pipe_stage+1] <= 0;
numer_pipe[i+1] <= 0;
denom_pipe[i+1] <= 0;
end
else if (clken) begin
numer_pipe[pipe_stage+1] <= numer_pipe[pipe_stage];
denom_pipe[pipe_stage+1] <= denom_pipe[pipe_stage];
numer_pipe[i+1] <= numer_pipe[i];
denom_pipe[i+1] <= denom_pipe[i];
end
end
end

View file

@ -83,16 +83,16 @@ module VX_mult #(
reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1];
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
genvar pipe_stage;
for (pipe_stage = 0; pipe_stage < PIPELINE-1; pipe_stage = pipe_stage+1) begin : pipe_stages
genvar i;
for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages
always @(posedge clock or posedge aclr) begin
if (aclr) begin
dataa_pipe[pipe_stage+1] <= 0;
datab_pipe[pipe_stage+1] <= 0;
dataa_pipe[i+1] <= 0;
datab_pipe[i+1] <= 0;
end
else if (clken) begin
dataa_pipe[pipe_stage+1] <= dataa_pipe[pipe_stage];
datab_pipe[pipe_stage+1] <= datab_pipe[pipe_stage];
dataa_pipe[i+1] <= dataa_pipe[i];
datab_pipe[i+1] <= datab_pipe[i];
end
end
end

View file

@ -19,8 +19,8 @@ module VX_d_e_reg (
.reset (reset),
.stall (stall),
.flush (flush),
.in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.ebreak, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}),
.out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.ebreak ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier })
.in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.is_etype, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}),
.out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.is_etype ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier })
);
endmodule

View file

@ -141,7 +141,7 @@ void Simulator::wait(uint32_t cycles) {
}
bool Simulator::is_busy() {
return (0 == vortex_->ebreak);
return vortex_->busy;
}
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
@ -174,7 +174,8 @@ bool Simulator::run() {
this->reset();
// execute program
while (!vortex_->ebreak) {
while (vortex_->busy
&& !vortex_->ebreak) {
this->step();
}