mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
pipeline refactoring
This commit is contained in:
parent
9cf8bf6149
commit
25f66e6490
71 changed files with 2242 additions and 2379 deletions
|
@ -116,9 +116,9 @@ static const scope_signal_t scope_signals[] = {
|
|||
{ NUM_THREADS, "writeback_valid" },
|
||||
|
||||
{ 1, "schedule_delay" },
|
||||
{ 1, "memory_delay" },
|
||||
{ 1, "mem_delay" },
|
||||
{ 1, "exec_delay" },
|
||||
{ 1, "gpr_stage_delay" },
|
||||
{ 1, "gpr_delay" },
|
||||
{ 1, "busy" },
|
||||
|
||||
{ 1, "bank_valid_st0" },
|
||||
|
|
|
@ -13,14 +13,15 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
|||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
|
||||
#DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
|
||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
#DEBUG=1
|
||||
DEBUG=1
|
||||
#AFU=1
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
|
|
@ -1,132 +1,66 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_unit (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [31:0] src_a,
|
||||
input wire [31:0] src_b,
|
||||
input wire src_rs2,
|
||||
input wire [31:0] itype_immed,
|
||||
input wire [19:0] upper_immed,
|
||||
input wire [4:0] alu_op,
|
||||
input wire [31:0] curr_PC,
|
||||
output reg [31:0] alu_result,
|
||||
output reg alu_stall
|
||||
);
|
||||
wire[31:0] div_result_unsigned;
|
||||
wire[31:0] div_result_signed;
|
||||
|
||||
wire[31:0] rem_result_unsigned;
|
||||
wire[31:0] rem_result_signed;
|
||||
|
||||
wire[63:0] mul_result;
|
||||
|
||||
wire[31:0] alu_in1 = src_a;
|
||||
wire[31:0] alu_in2 = (src_rs2 == `RS2_IMMED) ? itype_immed : src_b;
|
||||
|
||||
wire[31:0] upper_immed_s = {upper_immed, {12{1'b0}}};
|
||||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
reg [7:0] inst_delay;
|
||||
reg [7:0] curr_inst_delay;
|
||||
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`ALU_DIV,
|
||||
`ALU_DIVU,
|
||||
`ALU_REM,
|
||||
`ALU_REMU: inst_delay = `DIV_LATENCY;
|
||||
`ALU_MUL,
|
||||
`ALU_MULH,
|
||||
`ALU_MULHSU,
|
||||
`ALU_MULHU: inst_delay = `MUL_LATENCY;
|
||||
default: inst_delay = 0;
|
||||
endcase
|
||||
// Inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_wb_if alu_wb_if
|
||||
);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire [`NUM_THREADS-1:0][32:0] sub_result;
|
||||
wire [`NUM_THREADS-1:0][32:0] shift_result;
|
||||
`UNUSED_VAR (shift_result);
|
||||
|
||||
wire [`ALU_BITS-1:0] alu_op = alu_req_if.alu_op;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
||||
wire [32:0] sub_in1 = {(alu_op != `ALU_SLTU) & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] sub_in2 = {(alu_op != `ALU_SLTU) & alu_in2[i][31], alu_in2[i]};
|
||||
assign sub_result[i] = $signed(sub_in1) - $signed(sub_in2);
|
||||
|
||||
wire [32:0] shift_in1 = {(alu_op == `ALU_SRA) & alu_in1[i][31], alu_in1[i]};
|
||||
assign shift_result[i] = $signed(shift_in1) >>> alu_in2[i][4:0];
|
||||
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`ALU_SUB: alu_result[i] = sub_result[i][31:0];
|
||||
`ALU_SLL: alu_result[i] = alu_in1[i] << alu_in2[i][4:0];
|
||||
`ALU_SLT,
|
||||
`ALU_SLTU: alu_result[i] = 32'(sub_result[i][32]);
|
||||
`ALU_XOR: alu_result[i] = alu_in1[i] ^ alu_in2[i];
|
||||
`ALU_SRL,
|
||||
`ALU_SRA: alu_result[i] = shift_result[i][31:0];
|
||||
`ALU_OR: alu_result[i] = alu_in1[i] | alu_in2[i];
|
||||
`ALU_AND: alu_result[i] = alu_in1[i] & alu_in2[i];
|
||||
default: alu_result[i] = alu_in1[i] + alu_in2[i]; // ADD, LUI, AUIPC
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
wire inst_stalled = (curr_inst_delay != inst_delay);
|
||||
wire stall = ~alu_wb_if.ready && (| alu_wb_if.valid);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
curr_inst_delay <= 0;
|
||||
end else begin
|
||||
curr_inst_delay <= inst_stalled ? (curr_inst_delay + 1) : 0;
|
||||
end
|
||||
end
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)),
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.rd, alu_req_if.wb, alu_result}),
|
||||
.out ({alu_wb_if.valid, alu_wb_if.warp_num, alu_wb_if.curr_PC, alu_wb_if.rd, alu_wb_if.wb, alu_wb_if.data})
|
||||
);
|
||||
|
||||
assign alu_stall = inst_stalled;
|
||||
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`ALU_ADD: alu_result = $signed(alu_in1) + $signed(alu_in2);
|
||||
`ALU_SUB: alu_result = $signed(alu_in1) - $signed(alu_in2);
|
||||
`ALU_SLLA: alu_result = alu_in1 << alu_in2[4:0];
|
||||
`ALU_SLT: alu_result = ($signed(alu_in1) < $signed(alu_in2)) ? 32'h1 : 32'h0;
|
||||
`ALU_SLTU: alu_result = alu_in1 < alu_in2 ? 32'h1 : 32'h0;
|
||||
`ALU_XOR: alu_result = alu_in1 ^ alu_in2;
|
||||
`ALU_SRL: alu_result = alu_in1 >> alu_in2[4:0];
|
||||
`ALU_SRA: alu_result = $signed(alu_in1) >>> alu_in2[4:0];
|
||||
`ALU_OR: alu_result = alu_in1 | alu_in2;
|
||||
`ALU_AND: alu_result = alu_in2 & alu_in1;
|
||||
`ALU_SUBU: alu_result = (alu_in1 >= alu_in2) ? 32'h0 : 32'hffffffff;
|
||||
`ALU_LUI: alu_result = upper_immed_s;
|
||||
`ALU_AUIPC: alu_result = $signed(curr_PC) + $signed(upper_immed_s);
|
||||
`ALU_MUL: alu_result = mul_result[31:0];
|
||||
`ALU_MULH: alu_result = mul_result[63:32];
|
||||
`ALU_MULHSU: alu_result = mul_result[63:32];
|
||||
`ALU_MULHU: alu_result = mul_result[63:32];
|
||||
`ALU_DIV: alu_result = (alu_in2 == 0) ? 32'hffffffff : div_result_signed;
|
||||
`ALU_DIVU: alu_result = (alu_in2 == 0) ? 32'hffffffff : div_result_unsigned;
|
||||
`ALU_REM: alu_result = (alu_in2 == 0) ? alu_in1 : rem_result_signed;
|
||||
`ALU_REMU: alu_result = (alu_in2 == 0) ? alu_in1 : rem_result_unsigned;
|
||||
default: alu_result = 32'h0;
|
||||
endcase
|
||||
end
|
||||
|
||||
VX_divide #(
|
||||
.WIDTHN(32),
|
||||
.WIDTHD(32),
|
||||
.NSIGNED(0),
|
||||
.DSIGNED(0),
|
||||
.PIPELINE(`DIV_LATENCY)
|
||||
) udiv (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.numer(alu_in1),
|
||||
.denom(alu_in2),
|
||||
.quotient(div_result_unsigned),
|
||||
.remainder(rem_result_unsigned)
|
||||
);
|
||||
|
||||
VX_divide #(
|
||||
.WIDTHN(32),
|
||||
.WIDTHD(32),
|
||||
.NSIGNED(1),
|
||||
.DSIGNED(1),
|
||||
.PIPELINE(`DIV_LATENCY)
|
||||
) sdiv (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.numer(alu_in1),
|
||||
.denom(alu_in2),
|
||||
.quotient(div_result_signed),
|
||||
.remainder(rem_result_signed)
|
||||
);
|
||||
|
||||
wire [32:0] mul_dataa = {(alu_op == `ALU_MULHU) ? 1'b0 : alu_in1[31], alu_in1};
|
||||
wire [32:0] mul_datab = {(alu_op == `ALU_MULHU || alu_op == `ALU_MULHSU) ? 1'b0 : alu_in2[31], alu_in2};
|
||||
|
||||
VX_mult #(
|
||||
.WIDTHA(33),
|
||||
.WIDTHB(33),
|
||||
.WIDTHP(64),
|
||||
.SIGNED(1),
|
||||
.PIPELINE(`MUL_LATENCY)
|
||||
) multiplier (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.dataa(mul_dataa),
|
||||
.datab(mul_datab),
|
||||
.result(mul_result)
|
||||
);
|
||||
assign alu_req_if.ready = ~stall;
|
||||
|
||||
endmodule
|
|
@ -1,171 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_back_end #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_SIGNALS_LSU_IO
|
||||
`SCOPE_SIGNALS_BE_IO
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
|
||||
input wire schedule_delay,
|
||||
|
||||
VX_cache_core_req_if dcache_req_if,
|
||||
VX_cache_core_rsp_if dcache_rsp_if,
|
||||
VX_jal_rsp_if jal_rsp_if,
|
||||
VX_branch_rsp_if branch_rsp_if,
|
||||
|
||||
VX_backend_req_if bckE_req_if,
|
||||
VX_wb_if writeback_if,
|
||||
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
|
||||
output wire mem_delay,
|
||||
output wire exec_delay,
|
||||
output wire gpr_stage_delay,
|
||||
|
||||
output wire ebreak
|
||||
);
|
||||
|
||||
wire no_slot_mem;
|
||||
wire no_slot_exec;
|
||||
|
||||
|
||||
// LSU input + output
|
||||
VX_lsu_req_if lsu_req_if();
|
||||
VX_wb_if mem_wb_if();
|
||||
|
||||
// Exec unit input + output
|
||||
VX_exec_unit_req_if exec_unit_req_if();
|
||||
VX_wb_if inst_exec_wb_if();
|
||||
|
||||
// GPU unit input
|
||||
VX_gpu_inst_req_if gpu_inst_req_if();
|
||||
|
||||
// CSR unit inputs
|
||||
VX_csr_req_if csr_req_if();
|
||||
VX_wb_if csr_wb_if();
|
||||
wire no_slot_csr;
|
||||
wire stall_gpr_csr;
|
||||
|
||||
VX_gpr_stage gpr_stage (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.schedule_delay (schedule_delay),
|
||||
.writeback_if (writeback_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
// New
|
||||
.exec_unit_req_if (exec_unit_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.gpu_inst_req_if (gpu_inst_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.stall_gpr_csr (stall_gpr_csr),
|
||||
// End new
|
||||
.memory_delay (mem_delay),
|
||||
.exec_delay (exec_delay),
|
||||
.delay (gpr_stage_delay)
|
||||
);
|
||||
|
||||
assign ebreak = exec_unit_req_if.is_etype && (| exec_unit_req_if.valid);
|
||||
|
||||
VX_lsu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) lsu_unit (
|
||||
`SCOPE_SIGNALS_LSU_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.mem_wb_if (mem_wb_if),
|
||||
.dcache_req_if (dcache_req_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
.delay (mem_delay),
|
||||
.no_slot_mem (no_slot_mem)
|
||||
);
|
||||
|
||||
VX_exec_unit exec_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.exec_unit_req_if(exec_unit_req_if),
|
||||
.inst_exec_wb_if(inst_exec_wb_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.delay (exec_delay),
|
||||
.no_slot_exec (no_slot_exec)
|
||||
);
|
||||
|
||||
VX_gpu_inst gpu_inst (
|
||||
.gpu_inst_req_if(gpu_inst_req_if),
|
||||
.warp_ctl_if (warp_ctl_if)
|
||||
);
|
||||
|
||||
VX_csr_req_if issued_csr_req_if();
|
||||
|
||||
VX_wb_if csr_pipe_rsp_if();
|
||||
|
||||
VX_csr_arb csr_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.csr_pipe_stall (stall_gpr_csr),
|
||||
|
||||
.csr_core_req_if (csr_req_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.issued_csr_req_if(issued_csr_req_if),
|
||||
|
||||
.csr_pipe_rsp_if (csr_pipe_rsp_if),
|
||||
.csr_wb_if (csr_wb_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if)
|
||||
);
|
||||
|
||||
VX_csr_pipe #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) csr_pipe (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.no_slot_csr (no_slot_csr),
|
||||
.csr_req_if (issued_csr_req_if),
|
||||
.writeback_if (writeback_if),
|
||||
.csr_wb_if (csr_pipe_rsp_if),
|
||||
.stall_gpr_csr (stall_gpr_csr)
|
||||
);
|
||||
|
||||
VX_writeback writeback (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.mem_wb_if (mem_wb_if),
|
||||
.inst_exec_wb_if(inst_exec_wb_if),
|
||||
.csr_wb_if (csr_wb_if),
|
||||
|
||||
.writeback_if (writeback_if),
|
||||
.no_slot_mem (no_slot_mem),
|
||||
.no_slot_exec (no_slot_exec),
|
||||
.no_slot_csr (no_slot_csr)
|
||||
);
|
||||
|
||||
`SCOPE_ASSIGN(scope_decode_valid, bckE_req_if.valid);
|
||||
`SCOPE_ASSIGN(scope_decode_warp_num, bckE_req_if.warp_num);
|
||||
`SCOPE_ASSIGN(scope_decode_curr_PC, bckE_req_if.curr_PC);
|
||||
`SCOPE_ASSIGN(scope_decode_is_jal, bckE_req_if.is_jal);
|
||||
`SCOPE_ASSIGN(scope_decode_rs1, bckE_req_if.rs1);
|
||||
`SCOPE_ASSIGN(scope_decode_rs2, bckE_req_if.rs2);
|
||||
|
||||
`SCOPE_ASSIGN(scope_execute_valid, exec_unit_req_if.valid);
|
||||
`SCOPE_ASSIGN(scope_execute_warp_num, exec_unit_req_if.warp_num);
|
||||
`SCOPE_ASSIGN(scope_execute_curr_PC, exec_unit_req_if.curr_PC);
|
||||
`SCOPE_ASSIGN(scope_execute_rd, exec_unit_req_if.rd);
|
||||
`SCOPE_ASSIGN(scope_execute_a, exec_unit_req_if.a_reg_data);
|
||||
`SCOPE_ASSIGN(scope_execute_b, exec_unit_req_if.b_reg_data);
|
||||
|
||||
`SCOPE_ASSIGN(scope_writeback_valid, writeback_if.valid);
|
||||
`SCOPE_ASSIGN(scope_writeback_warp_num, writeback_if.warp_num);
|
||||
`SCOPE_ASSIGN(scope_writeback_curr_PC, writeback_if.curr_PC);
|
||||
`SCOPE_ASSIGN(scope_writeback_wb, writeback_if.wb);
|
||||
`SCOPE_ASSIGN(scope_writeback_rd, writeback_if.rd);
|
||||
`SCOPE_ASSIGN(scope_writeback_data, writeback_if.data);
|
||||
|
||||
endmodule
|
82
hw/rtl/VX_branch_unit.v
Normal file
82
hw/rtl/VX_branch_unit.v
Normal file
|
@ -0,0 +1,82 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_branch_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_branch_req_if branch_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_branch_rsp_if branch_rsp_if,
|
||||
VX_wb_if branch_wb_if
|
||||
);
|
||||
|
||||
wire [`NT_BITS-1:0] br_result_index;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N(`NUM_THREADS)
|
||||
) choose_alu_result (
|
||||
.data_in (alu_req_if.valid),
|
||||
.data_out (br_result_index),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire [`BR_BITS-1:0] br_op = branch_req_if.br_op;
|
||||
wire [31:0] rs1_data = branch_req_if.rs1_data[br_result_index];
|
||||
wire [31:0] rs2_data = branch_req_if.rs2_data[br_result_index];
|
||||
|
||||
wire [32:0] sub_in1 = {(br_op != `BR_LTU) & (br_op != `BR_GEU) & rs1_data[31], rs1_data};
|
||||
wire [32:0] sub_in2 = {(br_op != `BR_LTU) & (br_op != `BR_GEU) & rs2_data[31], rs2_data};
|
||||
wire [32:0] sub_res = $signed(sub_in1) - $signed(sub_in2);
|
||||
|
||||
wire sub_sign = sub_res[32];
|
||||
wire sub_nzero = (| sub_res[31:0]);
|
||||
|
||||
reg br_taken;
|
||||
always @(*) begin
|
||||
case (br_op)
|
||||
`BR_NE: br_taken = sub_nzero;
|
||||
`BR_EQ: br_taken = ~sub_nzero;
|
||||
`BR_LT,
|
||||
`BR_LTU: br_taken = sub_sign;
|
||||
`BR_GE,
|
||||
`BR_GEU: br_taken = ~sub_sign;
|
||||
default: br_taken = 1'b1;
|
||||
endcase
|
||||
end
|
||||
|
||||
wire in_valid = (| branch_req_if.valid);
|
||||
|
||||
wire [31:0] base_addr = (br_op == `BR_JALR) ? rs1_data : branch_req_if.curr_PC;
|
||||
wire [31:0] br_dest = $signed(base_addr) + $signed(branch_req_if.offset);
|
||||
|
||||
wire stall = (~branch_wb_if.ready && (| branch_wb_if.valid));
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + 1 + 32)
|
||||
) rsp_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({in_valid, branch_req_if.warp_num, br_taken, br_dest}),
|
||||
.out ({branch_rsp_if.valid, branch_rsp_if.warp_num, branch_rsp_if.taken, branch_rsp_if.dest})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)),
|
||||
) wb_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({branch_req_if.valid, branch_req_if.warp_num, branch_req_if.curr_PC, branch_req_if.rd, branch_req_if.wb, {`NUM_THREADS{branch_req_if.next_PC}}}),
|
||||
.out ({branch_wb_if.valid, branch_wb_if.warp_num, branch_wb_if.curr_PC, branch_wb_if.rd, branch_wb_if.wb, branch_wb_if.data})
|
||||
);
|
||||
|
||||
assign branch_req_if.ready = ~stall;
|
||||
|
||||
endmodule
|
|
@ -59,6 +59,10 @@
|
|||
`define L3_ENABLE (`NUM_CLUSTERS > 1)
|
||||
`endif
|
||||
|
||||
`ifndef EXT_M_ENABLE
|
||||
`define EXT_M_ENABLE 1
|
||||
`endif
|
||||
|
||||
// Configuration Values =======================================================
|
||||
|
||||
`define VENDOR_ID 0
|
||||
|
@ -85,6 +89,8 @@
|
|||
`define CSR_INSTR_L 12'hC02
|
||||
`define CSR_INSTR_H 12'hC82
|
||||
|
||||
`define CSR_MISA 12'h301
|
||||
|
||||
// Dcache Configurable Knobs ==================================================
|
||||
|
||||
// Size of cache in bytes
|
||||
|
|
|
@ -250,7 +250,7 @@ module VX_core #(
|
|||
assign dcache_snp_req_if.addr = snp_req_addr;
|
||||
assign dcache_snp_req_if.invalidate = snp_req_invalidate;
|
||||
assign dcache_snp_req_if.tag = snp_req_tag;
|
||||
assign snp_req_ready = dcache_snp_req_if.ready;
|
||||
assign snp_req_ready = dcache_snp_req_if.ready;
|
||||
|
||||
assign snp_rsp_valid = dcache_snp_rsp_if.valid;
|
||||
assign snp_rsp_tag = dcache_snp_rsp_if.tag;
|
||||
|
@ -283,18 +283,20 @@ module VX_core #(
|
|||
.icache_dram_rsp_if (icache_dram_rsp_if)
|
||||
);
|
||||
|
||||
// select io address
|
||||
// select io bus
|
||||
wire is_io_addr = ({core_dcache_req_if.addr[0], 2'b0} >= `IO_BUS_BASE_ADDR);
|
||||
wire io_select = (| core_dcache_req_if.valid) ? is_io_addr : 0;
|
||||
wire io_req_select = (| core_dcache_req_if.valid) ? is_io_addr : 0;
|
||||
wire io_rsp_select = (| arb_io_rsp_if.valid);
|
||||
|
||||
VX_dcache_arb dcache_io_arb (
|
||||
.req_select (io_select),
|
||||
.in_core_req_if (core_dcache_req_if),
|
||||
.out0_core_req_if (arb_dcache_req_if),
|
||||
.out1_core_req_if (arb_io_req_if),
|
||||
.in0_core_rsp_if (arb_dcache_rsp_if),
|
||||
.in1_core_rsp_if (arb_io_rsp_if),
|
||||
.out_core_rsp_if (core_dcache_rsp_if)
|
||||
VX_dcache_arb dcache_io_arb (
|
||||
.core_req_in_if (core_dcache_req_if),
|
||||
.core_req_out0_if (arb_dcache_req_if),
|
||||
.core_req_out1_if (arb_io_req_if),
|
||||
.core_rsp_in0_if (arb_dcache_rsp_if),
|
||||
.core_rsp_in1_if (arb_io_rsp_if),
|
||||
.core_rsp_out_if (core_dcache_rsp_if),
|
||||
.select_req (io_req_select),
|
||||
.select_rsp (io_rsp_select)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,51 +1,54 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_arb (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire csr_pipe_stall,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_csr_req_if csr_core_req_if,
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
VX_csr_req_if issued_csr_req_if,
|
||||
|
||||
VX_wb_if csr_pipe_rsp_if,
|
||||
VX_wb_if csr_wb_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if
|
||||
// output
|
||||
VX_csr_req_if csr_req_if,
|
||||
|
||||
// input
|
||||
VX_wb_if csr_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
VX_wb_if csr_wb_if
|
||||
);
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire pick_core = (| csr_core_req_if.valid);
|
||||
wire core_select = ~(| csr_io_req_if.valid);
|
||||
|
||||
// Mux between core and io
|
||||
assign issued_csr_req_if.valid = pick_core ? csr_core_req_if.valid : {`NUM_THREADS{csr_io_req_if.valid}};
|
||||
assign issued_csr_req_if.is_csr = pick_core ? csr_core_req_if.is_csr : 1'b1;
|
||||
assign issued_csr_req_if.alu_op = pick_core ? csr_core_req_if.alu_op : (csr_io_req_if.rw ? `ALU_CSR_RW : `ALU_CSR_RS);
|
||||
assign issued_csr_req_if.csr_addr = pick_core ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
|
||||
assign issued_csr_req_if.csr_immed = pick_core ? csr_core_req_if.csr_immed : 0;
|
||||
assign issued_csr_req_if.csr_mask = pick_core ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
||||
assign issued_csr_req_if.is_io = !pick_core;
|
||||
assign issued_csr_req_if.warp_num = csr_core_req_if.warp_num;
|
||||
assign issued_csr_req_if.rd = csr_core_req_if.rd;
|
||||
assign issued_csr_req_if.wb = csr_core_req_if.wb;
|
||||
// requests
|
||||
assign csr_req_if.valid = core_select ? csr_core_req_if.valid : {`NUM_THREADS{csr_io_req_if.valid}};
|
||||
assign csr_req_if.warp_num = core_select ? csr_core_req_if.warp_num : 0;
|
||||
assign csr_req_if.curr_PC = core_select ? csr_core_req_if.curr_PC : 0;
|
||||
assign csr_req_if.csr_op = core_select ? csr_core_req_if.csr_op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||
assign csr_req_if.csr_addr = core_select ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
|
||||
assign csr_req_if.csr_mask = core_select ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
||||
assign csr_req_if.rd = core_select ? csr_core_req_if.rd : 0;
|
||||
assign csr_req_if.wb = core_select ? csr_core_req_if.wb : 0;
|
||||
assign csr_req_if.is_io = ~core_select;
|
||||
|
||||
assign csr_io_req_if.ready = !(csr_pipe_stall || pick_core);
|
||||
|
||||
// Core Writeback
|
||||
assign csr_wb_if.valid = csr_pipe_rsp_if.valid & {`NUM_THREADS{~csr_pipe_rsp_if.is_io}};
|
||||
assign csr_wb_if.data = csr_pipe_rsp_if.data;
|
||||
assign csr_wb_if.warp_num = csr_pipe_rsp_if.warp_num;
|
||||
assign csr_wb_if.rd = csr_pipe_rsp_if.rd;
|
||||
assign csr_wb_if.wb = csr_pipe_rsp_if.wb;
|
||||
assign csr_wb_if.curr_PC = csr_pipe_rsp_if.curr_PC;
|
||||
assign csr_core_req_if.ready = csr_req_if.ready && core_select;
|
||||
assign csr_io_req_if.ready = csr_req_if.ready && ~core_select;
|
||||
|
||||
// CSR I/O response
|
||||
assign csr_io_rsp_if.valid = csr_pipe_rsp_if.valid[0] & csr_pipe_rsp_if.is_io;
|
||||
assign csr_io_rsp_if.data = csr_pipe_rsp_if.data[0];
|
||||
wire x = csr_io_rsp_if.ready;
|
||||
`UNUSED_VAR(x)
|
||||
// responses
|
||||
assign csr_io_rsp_if.valid = csr_rsp_if.valid[0] & csr_rsp_if.is_io;
|
||||
assign csr_io_rsp_if.data = csr_rsp_if.data[0];
|
||||
|
||||
assign csr_wb_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~csr_rsp_if.is_io}};
|
||||
assign csr_wb_if.warp_num = csr_rsp_if.warp_num;
|
||||
assign csr_wb_if.curr_PC = csr_rsp_if.curr_PC;
|
||||
assign csr_wb_if.data = csr_rsp_if.data;
|
||||
assign csr_wb_if.rd = csr_rsp_if.rd;
|
||||
assign csr_wb_if.wb = csr_rsp_if.wb;
|
||||
|
||||
assign csr_rsp_if.ready = csr_rsp_if.is_io ? csr_io_rsp_if.ready : csr_wb_if.ready;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -15,7 +15,7 @@ module VX_csr_data #(
|
|||
`IGNORE_WARNINGS_END
|
||||
input wire[`CSR_WIDTH-1:0] write_data,
|
||||
input wire[`NW_BITS-1:0] warp_num,
|
||||
input wire wb_valid
|
||||
input wire notify_commit
|
||||
);
|
||||
reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0];
|
||||
|
||||
|
@ -35,7 +35,7 @@ module VX_csr_data #(
|
|||
csr_table[wr_addr] <= write_data;
|
||||
end
|
||||
num_cycles <= num_cycles + 1;
|
||||
if (wb_valid) begin
|
||||
if (notify_commit) begin
|
||||
num_instrs <= num_instrs + 1;
|
||||
end
|
||||
end
|
||||
|
@ -57,6 +57,7 @@ module VX_csr_data #(
|
|||
`CSR_VEND_ID : read_data = `VENDOR_ID;
|
||||
`CSR_ARCH_ID : read_data = `ARCHITECTURE_ID;
|
||||
`CSR_IMPL_ID : read_data = `IMPLEMENTATION_ID;
|
||||
`CSR_MISA : read_data = `ISA_CODE;
|
||||
default : read_data = 32'(csr_table[rd_addr]);
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
module VX_csr_io_arb #(
|
||||
parameter NUM_REQUESTS = 1,
|
||||
parameter REQS_BITS = `CLOG2(NUM_REQUESTS)
|
||||
parameter REQS_BITS = `LOG2UP(NUM_REQUESTS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -37,6 +37,7 @@ module VX_csr_io_arb #(
|
|||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (request_id)
|
||||
|
||||
assign out_csr_io_req_valid = in_csr_io_req_valid;
|
||||
assign out_csr_io_req_rw = in_csr_io_req_rw;
|
||||
|
|
|
@ -3,79 +3,86 @@
|
|||
module VX_csr_pipe #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire no_slot_csr,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_wb_if csr_wb_if,
|
||||
output wire stall_gpr_csr
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
VX_wb_if csr_wb_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
input wire notify_commit
|
||||
);
|
||||
VX_csr_req_if csr_pipe_req_if();
|
||||
VX_wb_if csr_pipe_wb_if();
|
||||
|
||||
wire[`NUM_THREADS-1:0] valid_s2;
|
||||
wire[`NW_BITS-1:0] warp_num_s2;
|
||||
wire[4:0] rd_s2;
|
||||
wire[1:0] wb_s2;
|
||||
wire is_csr_s2;
|
||||
wire[`CSR_ADDR_SIZE-1:0] csr_addr_s2;
|
||||
wire[31:0] csr_read_data_s2;
|
||||
wire[31:0] csr_updated_data_s2;
|
||||
VX_csr_arb csr_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.csr_core_req_if (csr_req_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_req_if (csr_pipe_req_if),
|
||||
.csr_rsp_if (csr_pipe_wb_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.csr_wb_if (csr_wb_if)
|
||||
);
|
||||
|
||||
wire[31:0] csr_read_data_unqual;
|
||||
wire[31:0] csr_read_data;
|
||||
wire [`CSR_ADDR_SIZE-1:0] csr_addr_s2;
|
||||
wire [31:0] csr_read_data_s2;
|
||||
wire [31:0] csr_updated_data_s2;
|
||||
wire [31:0] csr_read_data_unqual;
|
||||
|
||||
wire is_csr_s2 = (| csr_pipe_wb_if.valid);
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) csr_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read_addr (csr_req_if.csr_addr),
|
||||
.read_addr (csr_pipe_req_if.csr_addr),
|
||||
.read_data (csr_read_data_unqual),
|
||||
.write_enable (is_csr_s2),
|
||||
.write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
|
||||
.write_addr (csr_addr_s2),
|
||||
.warp_num (csr_req_if.warp_num),
|
||||
.wb_valid (| writeback_if.valid)
|
||||
.warp_num (csr_pipe_req_if.warp_num),
|
||||
.notify_commit (notify_commit)
|
||||
);
|
||||
|
||||
wire car_hazard = (csr_addr_s2 == csr_req_if.csr_addr) & (warp_num_s2 == csr_req_if.warp_num) & |(valid_s2) & is_csr_s2;
|
||||
wire csr_hazard = (csr_addr_s2 == csr_pipe_req_if.csr_addr)
|
||||
&& (csr_pipe_wb_if.warp_num == csr_pipe_req_if.warp_num)
|
||||
&& is_csr_s2;
|
||||
|
||||
assign csr_read_data = car_hazard ? csr_updated_data_s2 : csr_read_data_unqual;
|
||||
wire [31:0] csr_read_data = csr_hazard ? csr_updated_data_s2 : csr_read_data_unqual;
|
||||
|
||||
reg [31:0] csr_updated_data;
|
||||
|
||||
always @(*) begin
|
||||
case (csr_req_if.alu_op)
|
||||
`ALU_CSR_RW: csr_updated_data = csr_req_if.csr_mask;
|
||||
`ALU_CSR_RS: csr_updated_data = csr_read_data | csr_req_if.csr_mask;
|
||||
`ALU_CSR_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - csr_req_if.csr_mask);
|
||||
default: csr_updated_data = 32'hdeadbeef;
|
||||
case (csr_pipe_req_if.csr_op)
|
||||
`CSR_RW: csr_updated_data = csr_pipe_req_if.csr_mask;
|
||||
`CSR_RS: csr_updated_data = csr_read_data | csr_pipe_req_if.csr_mask;
|
||||
`CSR_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - csr_pipe_req_if.csr_mask);
|
||||
default: csr_updated_data = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
wire stall = ~csr_pipe_wb_if.ready && (| csr_pipe_wb_if.valid);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(32 + 32 + 12 + 1 + 1 + 2 + 5 + (`NW_BITS-1+1) + `NUM_THREADS)
|
||||
) csr_reg_s2 (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(no_slot_csr),
|
||||
.flush(1'b0),
|
||||
.in ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_csr, csr_req_if.csr_addr, csr_req_if.is_io, csr_read_data , csr_updated_data }),
|
||||
.out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_addr_s2 , csr_wb_if.is_io , csr_read_data_s2, csr_updated_data_s2})
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + `CSR_ADDR_SIZE + 1 + 32 + 32)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.warp_num, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
|
||||
.out ({csr_pipe_wb_if.valid, csr_pipe_wb_if.warp_num, csr_pipe_wb_if.curr_PC, csr_pipe_wb_if.rd, csr_pipe_wb_if.wb, csr_addr_s2, csr_pipe_wb_if.is_io, csr_read_data_s2, csr_updated_data_s2})
|
||||
);
|
||||
|
||||
assign csr_wb_if.valid = valid_s2;
|
||||
assign csr_wb_if.warp_num = warp_num_s2;
|
||||
assign csr_wb_if.rd = rd_s2;
|
||||
assign csr_wb_if.wb = wb_s2;
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign csr_wb_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i :
|
||||
(csr_addr_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
|
||||
csr_read_data_s2;
|
||||
end
|
||||
assign csr_pipe_wb_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i :
|
||||
(csr_addr_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
|
||||
csr_read_data_s2;
|
||||
end
|
||||
|
||||
assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && (| csr_req_if.valid);
|
||||
assign csr_pipe_req_if.ready = ~stall;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,48 +1,50 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_dcache_arb (
|
||||
input wire req_select,
|
||||
|
||||
// input request
|
||||
VX_cache_core_req_if in_core_req_if,
|
||||
VX_cache_core_req_if core_req_in_if,
|
||||
|
||||
// output 0 request
|
||||
VX_cache_core_req_if out0_core_req_if,
|
||||
VX_cache_core_req_if core_req_out0_if,
|
||||
|
||||
// output 1 request
|
||||
VX_cache_core_req_if out1_core_req_if,
|
||||
VX_cache_core_req_if core_req_out1_if,
|
||||
|
||||
// input 0 response
|
||||
VX_cache_core_rsp_if in0_core_rsp_if,
|
||||
VX_cache_core_rsp_if core_rsp_in0_if,
|
||||
|
||||
// input 1 response
|
||||
VX_cache_core_rsp_if in1_core_rsp_if,
|
||||
VX_cache_core_rsp_if core_rsp_in1_if,
|
||||
|
||||
// output response
|
||||
VX_cache_core_rsp_if out_core_rsp_if
|
||||
VX_cache_core_rsp_if core_rsp_out_if,
|
||||
|
||||
// bus select
|
||||
input wire select_req,
|
||||
input wire select_rsp
|
||||
);
|
||||
assign out0_core_req_if.valid = in_core_req_if.valid & {`NUM_THREADS{~req_select}};
|
||||
assign out0_core_req_if.rw = in_core_req_if.rw;
|
||||
assign out0_core_req_if.byteen = in_core_req_if.byteen;
|
||||
assign out0_core_req_if.addr = in_core_req_if.addr;
|
||||
assign out0_core_req_if.data = in_core_req_if.data;
|
||||
assign out0_core_req_if.tag = in_core_req_if.tag;
|
||||
// select request
|
||||
assign core_req_out0_if.valid = core_req_in_if.valid & {`NUM_THREADS{~select_req}};
|
||||
assign core_req_out0_if.rw = core_req_in_if.rw;
|
||||
assign core_req_out0_if.byteen = core_req_in_if.byteen;
|
||||
assign core_req_out0_if.addr = core_req_in_if.addr;
|
||||
assign core_req_out0_if.data = core_req_in_if.data;
|
||||
assign core_req_out0_if.tag = core_req_in_if.tag;
|
||||
|
||||
assign out1_core_req_if.valid = in_core_req_if.valid & {`NUM_THREADS{req_select}};
|
||||
assign out1_core_req_if.rw = in_core_req_if.rw;
|
||||
assign out1_core_req_if.byteen = in_core_req_if.byteen;
|
||||
assign out1_core_req_if.addr = in_core_req_if.addr;
|
||||
assign out1_core_req_if.data = in_core_req_if.data;
|
||||
assign out1_core_req_if.tag = in_core_req_if.tag;
|
||||
assign core_req_out1_if.valid = core_req_in_if.valid & {`NUM_THREADS{select_req}};
|
||||
assign core_req_out1_if.rw = core_req_in_if.rw;
|
||||
assign core_req_out1_if.byteen = core_req_in_if.byteen;
|
||||
assign core_req_out1_if.addr = core_req_in_if.addr;
|
||||
assign core_req_out1_if.data = core_req_in_if.data;
|
||||
assign core_req_out1_if.tag = core_req_in_if.tag;
|
||||
|
||||
assign in_core_req_if.ready = req_select ? out1_core_req_if.ready : out0_core_req_if.ready;
|
||||
assign core_req_in_if.ready = select_req ? core_req_out1_if.ready : core_req_out0_if.ready;
|
||||
|
||||
wire rsp_select0 = (| in0_core_rsp_if.valid);
|
||||
|
||||
assign out_core_rsp_if.valid = rsp_select0 ? in0_core_rsp_if.valid : in1_core_rsp_if.valid;
|
||||
assign out_core_rsp_if.data = rsp_select0 ? in0_core_rsp_if.data : in1_core_rsp_if.data;
|
||||
assign out_core_rsp_if.tag = rsp_select0 ? in0_core_rsp_if.tag : in1_core_rsp_if.tag;
|
||||
assign in0_core_rsp_if.ready = out_core_rsp_if.ready && rsp_select0;
|
||||
assign in1_core_rsp_if.ready = out_core_rsp_if.ready && !rsp_select0;
|
||||
// select response
|
||||
assign core_rsp_out_if.valid = select_rsp ? core_rsp_in1_if.valid : core_rsp_in0_if.valid;
|
||||
assign core_rsp_out_if.data = select_rsp ? core_rsp_in1_if.data : core_rsp_in0_if.data;
|
||||
assign core_rsp_out_if.tag = select_rsp ? core_rsp_in1_if.tag : core_rsp_in0_if.tag;
|
||||
assign core_rsp_in0_if.ready = core_rsp_out_if.ready && ~select_rsp;
|
||||
assign core_rsp_in1_if.ready = core_rsp_out_if.ready && select_rsp;
|
||||
|
||||
endmodule
|
|
@ -1,321 +1,279 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_decode(
|
||||
// Fetch Inputs
|
||||
VX_inst_meta_if fd_inst_meta_de,
|
||||
module VX_decode #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Outputs
|
||||
VX_backend_req_if frE_to_bckE_req_if,
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if
|
||||
// inputs
|
||||
VX_ifetch_rsp_if ifetch_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_decode_if decode_if,
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if
|
||||
);
|
||||
wire in_valid = (| fd_inst_meta_de.valid);
|
||||
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
||||
wire[31:0] in_curr_PC = fd_inst_meta_de.curr_PC;
|
||||
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
wire in_valid = (| ifetch_rsp_if.valid);
|
||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||
|
||||
assign frE_to_bckE_req_if.curr_PC = in_curr_PC;
|
||||
reg [`ALU_BITS-1:0] alu_op;
|
||||
reg [`BR_BITS-1:0] br_op;
|
||||
reg [`MUL_BITS-1:0] mul_op;
|
||||
wire [`LSU_BITS-1:0] lsu_op;
|
||||
reg [`CSR_BITS-1:0] csr_op;
|
||||
reg [`GPU_BITS-1:0] gpu_op;
|
||||
|
||||
wire[6:0] curr_opcode;
|
||||
reg [19:0] upper_imm;
|
||||
reg [31:0] jalx_offset;
|
||||
reg [31:0] src2_imm;
|
||||
|
||||
wire is_itype;
|
||||
wire is_rtype;
|
||||
wire is_stype;
|
||||
wire is_btype;
|
||||
wire is_linst;
|
||||
wire is_jal;
|
||||
wire is_jalr;
|
||||
wire is_lui;
|
||||
wire is_auipc;
|
||||
wire is_csr;
|
||||
wire is_csr_immed;
|
||||
wire is_etype;
|
||||
wire [6:0] opcode = instr[6:0];
|
||||
wire [2:0] func3 = instr[14:12];
|
||||
wire [6:0] func7 = instr[31:25];
|
||||
wire [11:0] u_12 = instr[31:20];
|
||||
|
||||
wire is_gpgpu;
|
||||
wire is_wspawn;
|
||||
wire is_tmc;
|
||||
wire is_split;
|
||||
wire is_join;
|
||||
wire is_barrier;
|
||||
wire [`NR_BITS-1:0] rd = instr[11:7];
|
||||
wire [`NR_BITS-1:0] rs1 = instr[19:15];
|
||||
wire [`NR_BITS-1:0] rs2 = instr[24:20];
|
||||
|
||||
wire[2:0] func3;
|
||||
wire[6:0] func7;
|
||||
wire[11:0] u_12;
|
||||
|
||||
wire[7:0] jal_b_19_to_12;
|
||||
wire jal_b_11;
|
||||
wire[9:0] jal_b_10_to_1;
|
||||
wire jal_b_20;
|
||||
wire jal_b_0;
|
||||
wire[20:0] jal_unsigned_offset;
|
||||
wire[31:0] jal_1_offset;
|
||||
|
||||
wire[11:0] jalr_immed;
|
||||
wire[31:0] jal_2_offset;
|
||||
|
||||
wire jal_sys_cond1;
|
||||
wire jal_sys_cond2;
|
||||
wire jal_sys_jal;
|
||||
wire[31:0] jal_sys_off;
|
||||
|
||||
wire csr_cond1;
|
||||
wire csr_cond2;
|
||||
|
||||
wire[11:0] alu_tempp;
|
||||
wire alu_shift_i;
|
||||
wire[11:0] alu_shift_i_immed;
|
||||
|
||||
wire[1:0] csr_type;
|
||||
|
||||
reg[4:0] csr_alu;
|
||||
reg[4:0] alu_op;
|
||||
reg[4:0] mul_alu;
|
||||
reg[19:0] temp_upper_immed;
|
||||
reg temp_jal;
|
||||
reg[31:0] temp_jal_offset;
|
||||
reg[31:0] temp_itype_immed;
|
||||
reg[2:0] temp_branch_type;
|
||||
reg temp_branch_stall;
|
||||
|
||||
assign frE_to_bckE_req_if.valid = fd_inst_meta_de.valid;
|
||||
|
||||
assign frE_to_bckE_req_if.warp_num = in_warp_num;
|
||||
|
||||
assign curr_opcode = in_instruction[6:0];
|
||||
|
||||
assign frE_to_bckE_req_if.rd = in_instruction[11:7];
|
||||
assign frE_to_bckE_req_if.rs1 = in_instruction[19:15];
|
||||
assign frE_to_bckE_req_if.rs2 = in_instruction[24:20];
|
||||
assign func3 = in_instruction[14:12];
|
||||
assign func7 = in_instruction[31:25];
|
||||
assign u_12 = in_instruction[31:20];
|
||||
|
||||
assign frE_to_bckE_req_if.next_PC = in_curr_PC + 32'h4;
|
||||
|
||||
// Write Back sigal
|
||||
assign is_rtype = (curr_opcode == `INST_R);
|
||||
assign is_linst = (curr_opcode == `INST_L);
|
||||
assign is_itype = (curr_opcode == `INST_ALU) || is_linst;
|
||||
assign is_stype = (curr_opcode == `INST_S);
|
||||
assign is_btype = (curr_opcode == `INST_B);
|
||||
assign is_jal = (curr_opcode == `INST_JAL);
|
||||
assign is_jalr = (curr_opcode == `INST_JALR);
|
||||
assign is_lui = (curr_opcode == `INST_LUI);
|
||||
assign is_auipc = (curr_opcode == `INST_AUIPC);
|
||||
assign is_csr = (curr_opcode == `INST_SYS) && (func3 != 0);
|
||||
assign is_csr_immed = is_csr && (func3[2] == 1);
|
||||
|
||||
assign is_gpgpu = (curr_opcode == `INST_GPGPU);
|
||||
|
||||
assign is_tmc = is_gpgpu && (func3 == 0); // Goes to BE
|
||||
assign is_wspawn = is_gpgpu && (func3 == 1); // Goes to BE
|
||||
assign is_barrier = is_gpgpu && (func3 == 4); // Goes to BE
|
||||
assign is_split = is_gpgpu && (func3 == 2); // Goes to BE
|
||||
assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE
|
||||
|
||||
assign join_if.is_join = is_join && in_valid;
|
||||
assign join_if.warp_num = in_warp_num;
|
||||
|
||||
assign frE_to_bckE_req_if.is_wspawn = is_wspawn;
|
||||
assign frE_to_bckE_req_if.is_tmc = is_tmc;
|
||||
assign frE_to_bckE_req_if.is_split = is_split;
|
||||
assign frE_to_bckE_req_if.is_barrier = is_barrier;
|
||||
|
||||
assign frE_to_bckE_req_if.csr_immed = is_csr_immed;
|
||||
assign frE_to_bckE_req_if.is_csr = is_csr;
|
||||
|
||||
assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_etype) ? `WB_JAL :
|
||||
is_linst ? `WB_MEM :
|
||||
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
|
||||
`WB_NO;
|
||||
|
||||
assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
|
||||
|
||||
// MEM signals
|
||||
assign frE_to_bckE_req_if.mem_read = (is_linst) ? func3 : `BYTE_EN_NO;
|
||||
assign frE_to_bckE_req_if.mem_write = (is_stype) ? func3 : `BYTE_EN_NO;
|
||||
|
||||
// UPPER IMMEDIATE
|
||||
// opcode types
|
||||
wire is_rtype = (opcode == `INST_R);
|
||||
wire is_ltype = (opcode == `INST_L);
|
||||
wire is_itype = (opcode == `INST_I);
|
||||
wire is_stype = (opcode == `INST_S);
|
||||
wire is_btype = (opcode == `INST_B);
|
||||
wire is_jal = (opcode == `INST_JAL);
|
||||
wire is_jalr = (opcode == `INST_JALR);
|
||||
wire is_lui = (opcode == `INST_LUI);
|
||||
wire is_auipc = (opcode == `INST_AUIPC);
|
||||
wire is_jals = (opcode == `INST_SYS) && (func3 == 0);
|
||||
wire is_csr = (opcode == `INST_SYS) && (func3 != 0);
|
||||
wire is_gpu = (opcode == `INST_GPU);
|
||||
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
|
||||
wire is_mul = is_rtype && (func7 == 7'h1);
|
||||
|
||||
// upper immediate
|
||||
always @(*) begin
|
||||
case (curr_opcode)
|
||||
`INST_LUI: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
|
||||
`INST_AUIPC: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
|
||||
default: temp_upper_immed = 20'h0;
|
||||
endcase // curr_opcode
|
||||
end
|
||||
|
||||
assign frE_to_bckE_req_if.upper_immed = temp_upper_immed;
|
||||
|
||||
assign jal_b_19_to_12 = in_instruction[19:12];
|
||||
assign jal_b_11 = in_instruction[20];
|
||||
assign jal_b_10_to_1 = in_instruction[30:21];
|
||||
assign jal_b_20 = in_instruction[31];
|
||||
assign jal_b_0 = 1'b0;
|
||||
assign jal_unsigned_offset = {jal_b_20, jal_b_19_to_12, jal_b_11, jal_b_10_to_1, jal_b_0};
|
||||
assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset};
|
||||
|
||||
assign jalr_immed = {func7, frE_to_bckE_req_if.rs2};
|
||||
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
|
||||
|
||||
assign jal_sys_cond1 = (func3 == 3'h0);
|
||||
assign jal_sys_cond2 = (u_12 < 12'h2);
|
||||
|
||||
assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0;
|
||||
assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef;
|
||||
|
||||
// JAL
|
||||
always @(*) begin
|
||||
case (curr_opcode)
|
||||
`INST_JAL:
|
||||
begin
|
||||
temp_jal = in_valid;
|
||||
temp_jal_offset = jal_1_offset;
|
||||
end
|
||||
`INST_JALR:
|
||||
begin
|
||||
temp_jal = in_valid;
|
||||
temp_jal_offset = jal_2_offset;
|
||||
end
|
||||
`INST_SYS:
|
||||
begin
|
||||
// $display("SYS EBREAK %h", (jal_sys_jal && in_valid));
|
||||
temp_jal = jal_sys_jal && in_valid;
|
||||
temp_jal_offset = jal_sys_off;
|
||||
end
|
||||
default:
|
||||
begin
|
||||
temp_jal = 1'b0;
|
||||
temp_jal_offset = 32'hdeadbeef;
|
||||
end
|
||||
case (opcode)
|
||||
`INST_LUI: upper_imm = {func7, rs2, rs1, func3};
|
||||
`INST_AUIPC: upper_imm = {func7, rs2, rs1, func3};
|
||||
default: upper_imm = 20'h0;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign frE_to_bckE_req_if.is_jal = is_jal;
|
||||
assign frE_to_bckE_req_if.jal = temp_jal;
|
||||
assign frE_to_bckE_req_if.jal_offset = temp_jal_offset;
|
||||
|
||||
// ecall/ebreak
|
||||
assign is_etype = (curr_opcode == `INST_SYS) && jal_sys_jal;
|
||||
assign frE_to_bckE_req_if.is_etype = is_etype;
|
||||
|
||||
// CSR
|
||||
|
||||
assign csr_cond1 = func3 != 3'h0;
|
||||
assign csr_cond2 = u_12 >= 12'h2;
|
||||
|
||||
assign frE_to_bckE_req_if.csr_addr = (csr_cond1 && csr_cond2) ? u_12 : 12'h55;
|
||||
|
||||
// ITYPE IMEED
|
||||
assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
|
||||
assign alu_shift_i_immed = {{7{1'b0}}, frE_to_bckE_req_if.rs2};
|
||||
assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12;
|
||||
|
||||
always @(*) begin
|
||||
case (curr_opcode)
|
||||
`INST_ALU: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
|
||||
`INST_S: temp_itype_immed = {{20{func7[6]}}, func7, frE_to_bckE_req_if.rd};
|
||||
`INST_L: temp_itype_immed = {{20{u_12[11]}}, u_12};
|
||||
`INST_B: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]};
|
||||
default: temp_itype_immed = 32'hdeadbeef;
|
||||
// JAL
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
wire [31:0] jal_offset = {{11{jal_imm[20]}}, jal_imm};
|
||||
wire [11:0] jalr_imm = {func7, rs2};
|
||||
wire [31:0] jalr_offset = {{20{jalr_imm[11]}}, jalr_imm};
|
||||
always @(*) begin
|
||||
case (opcode)
|
||||
`INST_JAL: jalx_offset = jal_offset;
|
||||
`INST_JALR: jalx_offset = jalr_offset;
|
||||
default: jalx_offset = 32'd4;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign frE_to_bckE_req_if.itype_immed = temp_itype_immed;
|
||||
end
|
||||
|
||||
// I-type immediate
|
||||
wire alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
|
||||
wire [11:0] alu_shift_imm = {{7{1'b0}}, rs2};
|
||||
wire [11:0] alu_imm = alu_shift_i ? alu_shift_imm : u_12;
|
||||
always @(*) begin
|
||||
case (curr_opcode)
|
||||
case (opcode)
|
||||
`INST_I: src2_imm = {{20{alu_imm[11]}}, alu_imm};
|
||||
`INST_S: src2_imm = {{20{func7[6]}}, func7, rd};
|
||||
`INST_L: src2_imm = {{20{u_12[11]}}, u_12};
|
||||
`INST_B: src2_imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
default: src2_imm = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
|
||||
// BRANCH
|
||||
always @(*) begin
|
||||
br_op = `BR_OTHER;
|
||||
case (opcode)
|
||||
`INST_B: begin
|
||||
// $display("BRANCH IN DECODE");
|
||||
temp_branch_stall = in_valid;
|
||||
case (func3)
|
||||
3'h0: temp_branch_type = `BR_EQ;
|
||||
3'h1: temp_branch_type = `BR_NE;
|
||||
3'h4: temp_branch_type = `BR_LT;
|
||||
3'h5: temp_branch_type = `BR_GT;
|
||||
3'h6: temp_branch_type = `BR_LTU;
|
||||
3'h7: temp_branch_type = `BR_GTU;
|
||||
default: temp_branch_type = `BR_NO;
|
||||
3'h0: br_op = `BR_EQ;
|
||||
3'h1: br_op = `BR_NE;
|
||||
3'h4: br_op = `BR_LT;
|
||||
3'h5: br_op = `BR_GE;
|
||||
3'h6: br_op = `BR_LTU;
|
||||
3'h7: br_op = `BR_GEU;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`INST_JAL: begin
|
||||
temp_branch_type = `BR_NO;
|
||||
temp_branch_stall = in_valid;
|
||||
end
|
||||
`INST_JALR: begin
|
||||
temp_branch_type = `BR_NO;
|
||||
temp_branch_stall = in_valid;
|
||||
end
|
||||
default: begin
|
||||
temp_branch_type = `BR_NO;
|
||||
temp_branch_stall = 1'b0;
|
||||
`INST_JAL: br_op = `BR_JAL;
|
||||
`INST_JALR: br_op = `BR_JALR;
|
||||
`INST_SYS: begin
|
||||
if (is_jals && u_12 == 12'h000) br_op = `BR_ECALL;
|
||||
if (is_jals && u_12 == 12'h001) br_op = `BR_EBREAK;
|
||||
if (is_jals && u_12 == 12'h302) br_op = `BR_MRET;
|
||||
if (is_jals && u_12 == 12'h102) br_op = `BR_SRET;
|
||||
if (is_jals && u_12 == 12'h7B2) br_op = `BR_DRET;
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign frE_to_bckE_req_if.branch_type = temp_branch_type;
|
||||
|
||||
assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && in_valid;
|
||||
assign wstall_if.warp_num = in_warp_num;
|
||||
|
||||
|
||||
// ALU
|
||||
always @(*) begin
|
||||
// ALU OP
|
||||
alu_op = `ALU_OTHER;
|
||||
if (is_lui) begin
|
||||
alu_op = `ALU_LUI;
|
||||
end else if (is_auipc) begin
|
||||
alu_op = `ALU_AUIPC;
|
||||
end else if (is_itype || is_rtype) begin
|
||||
case (func3)
|
||||
3'h0: alu_op = (is_rtype && func7 == 7'h20) ? `ALU_SUB : `ALU_ADD;
|
||||
3'h1: alu_op = `ALU_SLL;
|
||||
3'h2: alu_op = `ALU_SLT;
|
||||
3'h3: alu_op = `ALU_SLTU;
|
||||
3'h4: alu_op = `ALU_XOR;
|
||||
3'h5: alu_op = (func7 == 7'h0) ? `ALU_SRL : `ALU_SRA;
|
||||
3'h6: alu_op = `ALU_OR;
|
||||
3'h7: alu_op = `ALU_AND;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// MUL
|
||||
always @(*) begin
|
||||
mul_op = `MUL_MUL;
|
||||
case (func3)
|
||||
3'h0: alu_op = (curr_opcode == `INST_ALU) ? `ALU_ADD : (func7 == 7'h0 ? `ALU_ADD : `ALU_SUB);
|
||||
3'h1: alu_op = `ALU_SLLA;
|
||||
3'h2: alu_op = `ALU_SLT;
|
||||
3'h3: alu_op = `ALU_SLTU;
|
||||
3'h4: alu_op = `ALU_XOR;
|
||||
3'h5: alu_op = (func7 == 7'h0) ? `ALU_SRL : `ALU_SRA;
|
||||
3'h6: alu_op = `ALU_OR;
|
||||
3'h7: alu_op = `ALU_AND;
|
||||
default: alu_op = `ALU_NO;
|
||||
3'h0: mul_op = `MUL_MUL;
|
||||
3'h1: mul_op = `MUL_MULH;
|
||||
3'h2: mul_op = `MUL_MULHSU;
|
||||
3'h3: mul_op = `MUL_MULHU;
|
||||
3'h4: mul_op = `MUL_DIV;
|
||||
3'h5: mul_op = `MUL_DIVU;
|
||||
3'h6: mul_op = `MUL_REM;
|
||||
3'h7: mul_op = `MUL_REMU;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
// LSU
|
||||
wire is_lsu = (is_ltype || is_stype);
|
||||
assign lsu_op = {is_stype, func3};
|
||||
|
||||
// CSR
|
||||
wire is_csr_imm = is_csr && (func3[2] == 1);
|
||||
always @(*) begin
|
||||
// ALU OP
|
||||
csr_op = `CSR_OTHER;
|
||||
case (func3[1:0])
|
||||
2'h1: csr_op = `CSR_RW;
|
||||
2'h2: csr_op = `CSR_RS;
|
||||
2'h3: csr_op = `CSR_RC;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
// GPU
|
||||
always @(*) begin
|
||||
gpu_op = `GPU_OTHER;
|
||||
case (func3)
|
||||
3'h0: mul_alu = `ALU_MUL;
|
||||
3'h1: mul_alu = `ALU_MULH;
|
||||
3'h2: mul_alu = `ALU_MULHSU;
|
||||
3'h3: mul_alu = `ALU_MULHU;
|
||||
3'h4: mul_alu = `ALU_DIV;
|
||||
3'h5: mul_alu = `ALU_DIVU;
|
||||
3'h6: mul_alu = `ALU_REM;
|
||||
3'h7: mul_alu = `ALU_REMU;
|
||||
default: mul_alu = `ALU_NO;
|
||||
3'h0: gpu_op = `GPU_TMC;
|
||||
3'h1: gpu_op = `GPU_WSPAWN;
|
||||
3'h2: gpu_op = `GPU_SPLIT;
|
||||
3'h3: gpu_op = `GPU_JOIN;
|
||||
3'h4: gpu_op = `GPU_BAR;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign csr_type = func3[1:0];
|
||||
VX_decode_if decode_tmp_if();
|
||||
|
||||
always @(*) begin
|
||||
case (csr_type)
|
||||
2'h1: csr_alu = `ALU_CSR_RW;
|
||||
2'h2: csr_alu = `ALU_CSR_RS;
|
||||
2'h3: csr_alu = `ALU_CSR_RC;
|
||||
default: csr_alu = `ALU_NO;
|
||||
endcase
|
||||
assign decode_tmp_if.valid = ifetch_rsp_if.valid;
|
||||
assign decode_tmp_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
assign decode_tmp_if.curr_PC = ifetch_rsp_if.curr_PC;
|
||||
assign decode_tmp_if.next_PC = ifetch_rsp_if.curr_PC + 32'h4;
|
||||
|
||||
assign decode_tmp_if.ex_type = is_br ? `EX_BR :
|
||||
is_lsu ? `EX_LSU :
|
||||
is_csr ? `EX_CSR :
|
||||
is_mul ? `EX_MUL :
|
||||
is_gpu ? `EX_GPU :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||
`EX_NOP;
|
||||
|
||||
assign decode_tmp_if.instr_op = is_br ? `OP_BITS'(br_op) :
|
||||
is_lsu ? `OP_BITS'(lsu_op) :
|
||||
is_csr ? `OP_BITS'(csr_op) :
|
||||
is_mul ? `OP_BITS'(mul_op) :
|
||||
is_gpu ? `OP_BITS'(gpu_op) :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
|
||||
0;
|
||||
|
||||
assign decode_tmp_if.rd = rd;
|
||||
|
||||
assign decode_tmp_if.rs1 = is_lui ? `NR_BITS'(0) : rs1;
|
||||
|
||||
assign decode_tmp_if.rs2 = rs2;
|
||||
|
||||
assign decode_tmp_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
|
||||
(is_jal || is_jalr || is_jals) ? jalx_offset :
|
||||
is_csr ? 32'(u_12) :
|
||||
src2_imm;
|
||||
|
||||
assign decode_tmp_if.rs1_is_PC = is_auipc;
|
||||
|
||||
assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm;
|
||||
|
||||
assign decode_tmp_if.use_rs1 = (decode_tmp_if.rs1 != 0)
|
||||
&& (is_jalr || is_btype || is_ltype || is_stype || is_itype || is_rtype || ~is_csr_imm || is_gpu);
|
||||
|
||||
assign decode_tmp_if.use_rs2 = (decode_tmp_if.rs2 != 0)
|
||||
&& (is_btype || is_stype || is_rtype || (is_gpu && (gpu_op == `GPU_BAR || gpu_op == `GPU_WSPAWN)));
|
||||
|
||||
assign decode_tmp_if.wb = (rd == 0) ? `WB_NO : // disable writeback to r0
|
||||
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
|
||||
(is_jal || is_jalr || is_jals) ? `WB_JAL :
|
||||
is_ltype ? `WB_MEM :
|
||||
`WB_NO;
|
||||
|
||||
assign join_if.is_join = is_gpu && (gpu_op == `GPU_JOIN) && in_valid;
|
||||
assign join_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
|
||||
assign wstall_if.wstall = (is_br || is_gpu) && in_valid;
|
||||
assign wstall_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
|
||||
wire stall = ~decode_if.ready && (| decode_if.valid);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + 1 + 1 + `EX_BITS + `OP_BITS + `WB_BITS)
|
||||
) decode_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb}),
|
||||
.out ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb})
|
||||
);
|
||||
|
||||
assign ifetch_rsp_if.ready = ~stall;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if ((| decode_tmp_if.valid) && ~stall) begin
|
||||
$write("%t: Core%0d-Decode: warp=%0d, PC=%0h, ex=", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC);
|
||||
print_ex_type(decode_tmp_if.ex_type);
|
||||
$write(", op=");
|
||||
print_instr_op(decode_tmp_if.ex_type, decode_tmp_if.instr_op);
|
||||
$write(", wb=");
|
||||
print_wb(decode_tmp_if.wb);
|
||||
$write(", rd=%0d, rs1=%0d, rs2=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_rs1=%b, use_rs2=%b\n", decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2);
|
||||
|
||||
// trap unsupported instructions
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.instr_op) == `ALU_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_BR) && `BR_OP(decode_tmp_if.instr_op) == `BR_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_CSR) && `CSR_OP(decode_tmp_if.instr_op) == `CSR_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_GPU) && `GPU_OP(decode_tmp_if.instr_op) == `GPU_OTHER));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
wire[4:0] temp_final_alu;
|
||||
|
||||
assign temp_final_alu = is_btype ? ((frE_to_bckE_req_if.branch_type < `BR_LTU) ? `ALU_SUB : `ALU_SUBU) :
|
||||
is_lui ? `ALU_LUI :
|
||||
is_auipc ? `ALU_AUIPC :
|
||||
is_csr ? csr_alu :
|
||||
(is_stype || is_linst) ? `ALU_ADD :
|
||||
alu_op;
|
||||
|
||||
assign frE_to_bckE_req_if.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
endmodule
|
|
@ -64,88 +64,159 @@
|
|||
|
||||
`define NC_BITS `LOG2UP(`NUM_CORES)
|
||||
|
||||
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
|
||||
|
||||
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
|
||||
|
||||
`define NUM_GPRS 32
|
||||
`define NUM_REGS 32
|
||||
|
||||
`define NR_BITS `LOG2UP(`NUM_REGS)
|
||||
|
||||
`define CSR_ADDR_SIZE 12
|
||||
|
||||
`define CSR_WIDTH 12
|
||||
|
||||
`define DIV_LATENCY 22
|
||||
`define DIV_LATENCY 2
|
||||
|
||||
`define MUL_LATENCY 2
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define BYTE_EN_NO 3'h7
|
||||
`define BYTE_EN_SB 3'h0
|
||||
`define BYTE_EN_SH 3'h1
|
||||
`define BYTE_EN_SW 3'h2
|
||||
`define BYTE_EN_UB 3'h4
|
||||
`define BYTE_EN_UH 3'h5
|
||||
`define BYTE_EN_BITS 3
|
||||
`define INST_LUI 7'b0110111
|
||||
`define INST_AUIPC 7'b0010111
|
||||
`define INST_JAL 7'b1101111
|
||||
`define INST_JALR 7'b1100111
|
||||
`define INST_B 7'b1100011
|
||||
`define INST_L 7'b0000011
|
||||
`define INST_S 7'b0100011
|
||||
`define INST_I 7'b0010011
|
||||
`define INST_R 7'b0110011
|
||||
`define INST_F 7'b0001111
|
||||
`define INST_SYS 7'b1110011
|
||||
`define INST_GPU 7'b1101011
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
`define OP_BITS 4
|
||||
|
||||
`define INST_R 7'd051
|
||||
`define INST_L 7'd003
|
||||
`define INST_ALU 7'd019
|
||||
`define INST_S 7'd035
|
||||
`define INST_B 7'd099
|
||||
`define INST_LUI 7'd055
|
||||
`define INST_AUIPC 7'd023
|
||||
`define INST_JAL 7'd111
|
||||
`define INST_JALR 7'd103
|
||||
`define INST_SYS 7'd115
|
||||
`define INST_GPGPU 7'd107
|
||||
`define ALU_ADD 4'h0
|
||||
`define ALU_SUB 4'h1
|
||||
`define ALU_SLL 4'h2
|
||||
`define ALU_SRL 4'h3
|
||||
`define ALU_SRA 4'h4
|
||||
`define ALU_SLT 4'h5
|
||||
`define ALU_SLTU 4'h6
|
||||
`define ALU_XOR 4'h7
|
||||
`define ALU_OR 4'h8
|
||||
`define ALU_AND 4'h9
|
||||
`define ALU_LUI 4'hA
|
||||
`define ALU_AUIPC 4'hB
|
||||
`define ALU_OTHER 4'hF
|
||||
`define ALU_BITS 4
|
||||
`define ALU_OP(x) x[`ALU_BITS-1:0]
|
||||
|
||||
`define RS2_IMMED 1
|
||||
`define RS2_REG 0
|
||||
`define MUL_MUL 3'h0
|
||||
`define MUL_MULH 3'h1
|
||||
`define MUL_MULHSU 3'h2
|
||||
`define MUL_MULHU 3'h3
|
||||
`define MUL_DIV 3'h4
|
||||
`define MUL_DIVU 3'h5
|
||||
`define MUL_REM 3'h6
|
||||
`define MUL_REMU 3'h7
|
||||
`define MUL_BITS 3
|
||||
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
||||
`define IS_DIV_OP(x) x[2]
|
||||
|
||||
`define BR_NO 3'h0
|
||||
`define BR_EQ 3'h1
|
||||
`define BR_NE 3'h2
|
||||
`define BR_LT 3'h3
|
||||
`define BR_GT 3'h4
|
||||
`define BR_LTU 3'h5
|
||||
`define BR_GTU 3'h6
|
||||
`define BR_EQ 4'h0
|
||||
`define BR_NE 4'h1
|
||||
`define BR_LT 4'h2
|
||||
`define BR_GE 4'h3
|
||||
`define BR_LTU 4'h4
|
||||
`define BR_GEU 4'h5
|
||||
`define BR_JAL 4'h6
|
||||
`define BR_JALR 4'h7
|
||||
`define BR_ECALL 4'h8
|
||||
`define BR_EBREAK 4'h9
|
||||
`define BR_MRET 4'hA
|
||||
`define BR_SRET 4'hB
|
||||
`define BR_DRET 4'hC
|
||||
`define BR_OTHER 4'hF
|
||||
`define BR_BITS 4
|
||||
`define BR_OP(x) x[`BR_BITS-1:0]
|
||||
|
||||
`define ALU_NO 5'd15
|
||||
`define ALU_ADD 5'd00
|
||||
`define ALU_SUB 5'd01
|
||||
`define ALU_SLLA 5'd02
|
||||
`define ALU_SLT 5'd03
|
||||
`define ALU_SLTU 5'd04
|
||||
`define ALU_XOR 5'd05
|
||||
`define ALU_SRL 5'd06
|
||||
`define ALU_SRA 5'd07
|
||||
`define ALU_OR 5'd08
|
||||
`define ALU_AND 5'd09
|
||||
`define ALU_SUBU 5'd10
|
||||
`define ALU_LUI 5'd11
|
||||
`define ALU_AUIPC 5'd12
|
||||
`define ALU_CSR_RW 5'd13
|
||||
`define ALU_CSR_RS 5'd14
|
||||
`define ALU_CSR_RC 5'd15
|
||||
`define ALU_MUL 5'd16
|
||||
`define ALU_MULH 5'd17
|
||||
`define ALU_MULHSU 5'd18
|
||||
`define ALU_MULHU 5'd19
|
||||
`define ALU_DIV 5'd20
|
||||
`define ALU_DIVU 5'd21
|
||||
`define ALU_REM 5'd22
|
||||
`define ALU_REMU 5'd23
|
||||
`define BYTEEN_SB 3'h0
|
||||
`define BYTEEN_SH 3'h1
|
||||
`define BYTEEN_SW 3'h2
|
||||
`define BYTEEN_UB 3'h4
|
||||
`define BYTEEN_UH 3'h5
|
||||
`define BYTEEN_BITS 3
|
||||
`define LSU_BITS 4
|
||||
`define LSU_RW(x) x[3]
|
||||
`define LSU_BE(x) x[2:0]
|
||||
|
||||
`define CSR_RW 2'h0
|
||||
`define CSR_RS 2'h1
|
||||
`define CSR_RC 2'h2
|
||||
`define CSR_OTHER 2'h3
|
||||
`define CSR_BITS 2
|
||||
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
||||
|
||||
`define GPU_TMC 3'h0
|
||||
`define GPU_WSPAWN 3'h1
|
||||
`define GPU_SPLIT 3'h2
|
||||
`define GPU_JOIN 3'h3
|
||||
`define GPU_BAR 3'h4
|
||||
`define GPU_OTHER 3'h7
|
||||
`define GPU_BITS 3
|
||||
`define GPU_OP(x) x[`GPU_BITS-1:0]
|
||||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_BR 3'h2
|
||||
`define EX_MUL 3'h3
|
||||
`define EX_LSU 3'h4
|
||||
`define EX_FPU 3'h5
|
||||
`define EX_CSR 3'h6
|
||||
`define EX_GPU 3'h7
|
||||
`define EX_BITS 3
|
||||
|
||||
`define WB_NO 2'h0
|
||||
`define WB_ALU 2'h1
|
||||
`define WB_MEM 2'h2
|
||||
`define WB_JAL 2'h3
|
||||
`define WB_BITS 2
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, warp_num
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 2 + 5 + `NW_BITS)
|
||||
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
|
||||
| (0 << 1) // B - Tentatively reserved for Bit operations extension \
|
||||
| (0 << 2) // C - Compressed extension \
|
||||
| (0 << 3) // D - Double precsision floating-point extension \
|
||||
| (0 << 4) // E - RV32E base ISA \
|
||||
| (0 << 5) // F - Single precsision floating-point extension \
|
||||
| (0 << 6) // G - Additional standard extensions present \
|
||||
| (0 << 7) // H - Hypervisor mode implemented \
|
||||
| (1 << 8) // I - RV32I/64I/128I base ISA \
|
||||
| (0 << 9) // J - Reserved \
|
||||
| (0 << 10) // K - Reserved \
|
||||
| (0 << 11) // L - Tentatively reserved for Bit operations extension \
|
||||
| (1 << 12) // M - Integer Multiply/Divide extension \
|
||||
| (0 << 13) // N - User level interrupts supported \
|
||||
| (0 << 14) // O - Reserved \
|
||||
| (0 << 15) // P - Tentatively reserved for Packed-SIMD extension \
|
||||
| (0 << 16) // Q - Quad-precision floating-point extension \
|
||||
| (0 << 17) // R - Reserved \
|
||||
| (0 << 18) // S - Supervisor mode implemented \
|
||||
| (0 << 19) // T - Tentatively reserved for Transactional Memory extension \
|
||||
| (1 << 20) // U - User mode implemented \
|
||||
| (0 << 21) // V - Tentatively reserved for Vector extension \
|
||||
| (0 << 22) // W - Reserved \
|
||||
| (1 << 23) // X - Non-standard extensions present \
|
||||
| (0 << 24) // Y - Reserved \
|
||||
| (0 << 25) // Z - Reserved
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, warp_num
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + `WB_BITS + `NR_BITS + `NW_BITS)
|
||||
`else
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH 0
|
||||
`endif
|
||||
|
@ -288,9 +359,129 @@
|
|||
`define VX_DRAM_TAG_WIDTH `L3DRAM_TAG_WIDTH
|
||||
`define VX_SNP_TAG_WIDTH `L3SNP_TAG_WIDTH
|
||||
`define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH
|
||||
`define VX_CSR_ID_WIDTH `CLOG2(`NUM_CLUSTERS * `NUM_CORES)
|
||||
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)
|
||||
|
||||
`define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
|
||||
// VX_DEFINE
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
task print_ex_type;
|
||||
input [`EX_BITS-1:0] ex;
|
||||
begin
|
||||
case (ex)
|
||||
`EX_ALU: $write("ALU");
|
||||
`EX_BR: $write("BR");
|
||||
`EX_LSU: $write("LSU");
|
||||
`EX_CSR: $write("CSR");
|
||||
`EX_MUL: $write("MUL");
|
||||
`EX_FPU: $write("FPU");
|
||||
`EX_GPU: $write("GPU");
|
||||
default: $write("NOP");
|
||||
endcase
|
||||
end
|
||||
endtask
|
||||
|
||||
task print_instr_op;
|
||||
input [`EX_BITS-1:0] ex;
|
||||
input [`OP_BITS-1:0] op;
|
||||
begin
|
||||
case (ex)
|
||||
`EX_ALU: begin
|
||||
case (`ALU_BITS'(op))
|
||||
`ALU_ADD: $write("ADD");
|
||||
`ALU_SUB: $write("SUB");
|
||||
`ALU_SLL: $write("SLL");
|
||||
`ALU_SRL: $write("SRL");
|
||||
`ALU_SRA: $write("SRA");
|
||||
`ALU_SLT: $write("SLT");
|
||||
`ALU_SLTU: $write("SLTU");
|
||||
`ALU_XOR: $write("XOR");
|
||||
`ALU_OR: $write("OR");
|
||||
`ALU_AND: $write("AND");
|
||||
`ALU_LUI: $write("LUI");
|
||||
`ALU_AUIPC: $write("AUIPC");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_BR: begin
|
||||
case (`BR_BITS'(op))
|
||||
`BR_EQ: $write("EQ");
|
||||
`BR_NE: $write("NE");
|
||||
`BR_LT: $write("LT");
|
||||
`BR_GE: $write("GE");
|
||||
`BR_LTU: $write("LTU");
|
||||
`BR_GEU: $write("GEU");
|
||||
`BR_JAL: $write("JAL");
|
||||
`BR_JALR: $write("JALR");
|
||||
`BR_ECALL: $write("ECALL");
|
||||
`BR_EBREAK: $write("EBREAK");
|
||||
`BR_MRET: $write("MRET");
|
||||
`BR_SRET: $write("SRET");
|
||||
`BR_DRET: $write("DRET");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_MUL: begin
|
||||
case (`MUL_BITS'(op))
|
||||
`MUL_MUL: $write("MUL");
|
||||
`MUL_MULH: $write("MULH");
|
||||
`MUL_MULHSU: $write("MULHSU");
|
||||
`MUL_MULHU: $write("MULHU");
|
||||
`MUL_DIV: $write("DIV");
|
||||
`MUL_DIVU: $write("DIVU");
|
||||
`MUL_REM: $write("REM");
|
||||
`MUL_REMU: $write("REMU");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_LSU: begin
|
||||
case (`LSU_BITS'(op))
|
||||
4'b0000: $write("LB");
|
||||
4'b0001: $write("LH");
|
||||
4'b0010: $write("LW");
|
||||
4'b0100: $write("LBU");
|
||||
4'b0101: $write("LHU");
|
||||
4'b1000: $write("SB");
|
||||
4'b1001: $write("SH");
|
||||
4'b1010: $write("SW");
|
||||
4'b1100: $write("SBU");
|
||||
4'b1101: $write("SHU");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_CSR: begin
|
||||
case (`CSR_BITS'(op))
|
||||
`CSR_RW: $write("CSRW");
|
||||
`CSR_RS: $write("CSRS");
|
||||
`CSR_RC: $write("CSRC");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_GPU: begin
|
||||
case (`GPU_BITS'(op))
|
||||
`GPU_TMC: $write("TMC");
|
||||
`GPU_WSPAWN: $write("WSPAWN");
|
||||
`GPU_SPLIT: $write("SPLIT");
|
||||
`GPU_JOIN: $write("JOIN");
|
||||
`GPU_BAR: $write("BAR");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
endtask
|
||||
|
||||
task print_wb;
|
||||
input [`WB_BITS-1:0] wb;
|
||||
begin
|
||||
case (wb)
|
||||
`WB_ALU: $write("ALU");
|
||||
`WB_MEM: $write("MEM");
|
||||
`WB_JAL: $write("JAL");
|
||||
default: $write("NO");
|
||||
endcase
|
||||
end
|
||||
endtask
|
||||
|
||||
`endif
|
||||
|
|
|
@ -1,147 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_exec_unit (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
// Request
|
||||
VX_exec_unit_req_if exec_unit_req_if,
|
||||
|
||||
// Output
|
||||
VX_wb_if inst_exec_wb_if,
|
||||
VX_jal_rsp_if jal_rsp_if,
|
||||
VX_branch_rsp_if branch_rsp_if,
|
||||
|
||||
input wire no_slot_exec,
|
||||
output wire delay
|
||||
);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] in_a_reg_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] in_b_reg_data;
|
||||
wire [4:0] in_alu_op;
|
||||
wire in_rs2_src;
|
||||
wire [31:0] in_itype_immed;
|
||||
`DEBUG_BEGIN
|
||||
wire [2:0] in_branch_type;
|
||||
`DEBUG_END
|
||||
wire [19:0] in_upper_immed;
|
||||
wire in_jal;
|
||||
wire [31:0] in_jal_offset;
|
||||
wire [31:0] in_curr_PC;
|
||||
|
||||
assign in_a_reg_data = exec_unit_req_if.a_reg_data;
|
||||
assign in_b_reg_data = exec_unit_req_if.b_reg_data;
|
||||
assign in_alu_op = exec_unit_req_if.alu_op;
|
||||
assign in_rs2_src = exec_unit_req_if.rs2_src;
|
||||
assign in_itype_immed = exec_unit_req_if.itype_immed;
|
||||
assign in_branch_type = exec_unit_req_if.branch_type;
|
||||
assign in_upper_immed = exec_unit_req_if.upper_immed;
|
||||
assign in_jal = exec_unit_req_if.jal;
|
||||
assign in_jal_offset = exec_unit_req_if.jal_offset;
|
||||
assign in_curr_PC = exec_unit_req_if.curr_PC;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire [`NUM_THREADS-1:0] alu_stall;
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
VX_alu_unit alu_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.src_a (in_a_reg_data[i]),
|
||||
.src_b (in_b_reg_data[i]),
|
||||
.src_rs2 (in_rs2_src),
|
||||
.itype_immed (in_itype_immed),
|
||||
.upper_immed (in_upper_immed),
|
||||
.alu_op (in_alu_op),
|
||||
.curr_PC (in_curr_PC),
|
||||
.alu_result (alu_result[i]),
|
||||
.alu_stall (alu_stall[i])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire internal_stall = (| alu_stall);
|
||||
|
||||
assign delay = no_slot_exec || internal_stall;
|
||||
|
||||
wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N(`NUM_THREADS)
|
||||
) choose_alu_result (
|
||||
.data_in (exec_unit_req_if.valid),
|
||||
.data_out (jal_branch_use_index),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire [31:0] branch_use_alu_result = alu_result[jal_branch_use_index];
|
||||
|
||||
reg temp_branch_dir;
|
||||
always @(*)
|
||||
begin
|
||||
case (exec_unit_req_if.branch_type)
|
||||
`BR_EQ: temp_branch_dir = (branch_use_alu_result == 0);
|
||||
`BR_NE: temp_branch_dir = (branch_use_alu_result != 0);
|
||||
`BR_LT: temp_branch_dir = (branch_use_alu_result[31] != 0);
|
||||
`BR_GT: temp_branch_dir = (branch_use_alu_result[31] == 0);
|
||||
`BR_LTU: temp_branch_dir = (branch_use_alu_result[31] != 0);
|
||||
`BR_GTU: temp_branch_dir = (branch_use_alu_result[31] == 0);
|
||||
`BR_NO: temp_branch_dir = 0;
|
||||
default: temp_branch_dir = 0;
|
||||
endcase // in_branch_type
|
||||
end
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] duplicate_PC_data;
|
||||
|
||||
generate
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign duplicate_PC_data[i] = exec_unit_req_if.next_PC;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
VX_jal_rsp_if jal_rsp_temp_if();
|
||||
VX_branch_rsp_if branch_rsp_temp_if();
|
||||
|
||||
// Actual Writeback
|
||||
assign inst_exec_wb_if.rd = exec_unit_req_if.rd;
|
||||
assign inst_exec_wb_if.wb = exec_unit_req_if.wb;
|
||||
assign inst_exec_wb_if.valid = exec_unit_req_if.valid & {`NUM_THREADS{!internal_stall}};
|
||||
assign inst_exec_wb_if.warp_num = exec_unit_req_if.warp_num;
|
||||
assign inst_exec_wb_if.data = exec_unit_req_if.jal ? duplicate_PC_data : alu_result;
|
||||
assign inst_exec_wb_if.curr_PC = in_curr_PC;
|
||||
|
||||
// Jal rsp
|
||||
assign jal_rsp_temp_if.valid = in_jal;
|
||||
assign jal_rsp_temp_if.dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset);
|
||||
assign jal_rsp_temp_if.warp_num = exec_unit_req_if.warp_num;
|
||||
|
||||
// Branch rsp
|
||||
assign branch_rsp_temp_if.valid = (exec_unit_req_if.branch_type != `BR_NO) && (| exec_unit_req_if.valid);
|
||||
assign branch_rsp_temp_if.dir = temp_branch_dir;
|
||||
assign branch_rsp_temp_if.warp_num = exec_unit_req_if.warp_num;
|
||||
assign branch_rsp_temp_if.dest = $signed(exec_unit_req_if.curr_PC) + ($signed(exec_unit_req_if.itype_immed) << 1); // itype_immed = branch_offset
|
||||
|
||||
VX_generic_register #(
|
||||
.N(33 + `NW_BITS-1 + 1)
|
||||
) jal_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (1'b0),
|
||||
.flush (1'b0),
|
||||
.in ({jal_rsp_temp_if.valid, jal_rsp_temp_if.dest, jal_rsp_temp_if.warp_num}),
|
||||
.out ({jal_rsp_if.valid , jal_rsp_if.dest , jal_rsp_if.warp_num})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(34 + `NW_BITS-1 + 1)
|
||||
) branch_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (1'b0),
|
||||
.flush (1'b0),
|
||||
.in ({branch_rsp_temp_if.valid, branch_rsp_temp_if.dir, branch_rsp_temp_if.warp_num, branch_rsp_temp_if.dest}),
|
||||
.out ({branch_rsp_if.valid , branch_rsp_if.dir , branch_rsp_if.warp_num , branch_rsp_if.dest })
|
||||
);
|
||||
|
||||
endmodule : VX_exec_unit
|
140
hw/rtl/VX_execute.v
Normal file
140
hw/rtl/VX_execute.v
Normal file
|
@ -0,0 +1,140 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_execute #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_SIGNALS_LSU_IO
|
||||
`SCOPE_SIGNALS_BE_IO
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// CSR io interface
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
|
||||
// Dcache interface
|
||||
VX_cache_core_req_if dcache_req_if,
|
||||
VX_cache_core_rsp_if dcache_rsp_if,
|
||||
|
||||
// inputs
|
||||
VX_execute_if execute_if,
|
||||
VX_wb_if writeback_if,
|
||||
|
||||
// outputs
|
||||
VX_branch_rsp_if branch_rsp_if,
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_wb_if alu_wb_if,
|
||||
VX_wb_if branch_wb_if,
|
||||
VX_wb_if lsu_wb_if,
|
||||
VX_wb_if csr_wb_if,
|
||||
VX_wb_if mul_wb_if,
|
||||
|
||||
input wire notify_commit,
|
||||
output wire ebreak
|
||||
);
|
||||
VX_alu_req_if alu_req_if();
|
||||
VX_branch_req_if branch_req_if();
|
||||
VX_csr_req_if csr_req_if();
|
||||
VX_lsu_req_if lsu_req_if();
|
||||
VX_mul_req_if mul_req_if();
|
||||
VX_gpu_req_if gpu_req_if();
|
||||
|
||||
VX_gpr_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) gpr_stage (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.writeback_if (writeback_if),
|
||||
.execute_if (execute_if),
|
||||
.alu_req_if (alu_req_if),
|
||||
.branch_req_if (branch_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
VX_alu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) alu_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.alu_req_if (alu_req_if),
|
||||
.alu_wb_if (alu_wb_if)
|
||||
);
|
||||
|
||||
VX_branch_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) branch_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.branch_req_if (branch_req_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.branch_wb_if (branch_wb_if)
|
||||
);
|
||||
|
||||
VX_lsu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) lsu_unit (
|
||||
`SCOPE_SIGNALS_LSU_BIND
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.dcache_req_if (dcache_req_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.lsu_wb_if (lsu_wb_if)
|
||||
);
|
||||
|
||||
VX_csr_pipe #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) csr_pipe (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.csr_req_if (csr_req_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_wb_if (csr_wb_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.notify_commit (notify_commit)
|
||||
);
|
||||
|
||||
VX_mul_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) mul_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.mul_req_if (mul_req_if),
|
||||
.mul_wb_if (mul_wb_if)
|
||||
);
|
||||
|
||||
VX_gpu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) gpu_unit (
|
||||
.gpu_req_if (gpu_req_if),
|
||||
.warp_ctl_if (warp_ctl_if)
|
||||
);
|
||||
|
||||
assign ebreak = (| branch_req_if.valid) && (branch_req_if.br_op == `BR_EBREAK || branch_req_if.br_op == `BR_ECALL);
|
||||
|
||||
`SCOPE_ASSIGN(scope_decode_valid, decode_if.valid);
|
||||
`SCOPE_ASSIGN(scope_decode_warp_num, decode_if.warp_num);
|
||||
`SCOPE_ASSIGN(scope_decode_curr_PC, decode_if.curr_PC);
|
||||
`SCOPE_ASSIGN(scope_decode_is_jal, decode_if.is_jal);
|
||||
`SCOPE_ASSIGN(scope_decode_rs1, decode_if.rs1);
|
||||
`SCOPE_ASSIGN(scope_decode_rs2, decode_if.rs2);
|
||||
|
||||
`SCOPE_ASSIGN(scope_execute_valid, alu_req_if.valid);
|
||||
`SCOPE_ASSIGN(scope_execute_warp_num, alu_req_if.warp_num);
|
||||
`SCOPE_ASSIGN(scope_execute_curr_PC, alu_req_if.curr_PC);
|
||||
`SCOPE_ASSIGN(scope_execute_rd, alu_req_if.rd);
|
||||
`SCOPE_ASSIGN(scope_execute_a, alu_req_if.rs1_data);
|
||||
`SCOPE_ASSIGN(scope_execute_b, alu_req_if.rs2_data);
|
||||
|
||||
`SCOPE_ASSIGN(scope_writeback_valid, writeback_if.valid);
|
||||
`SCOPE_ASSIGN(scope_writeback_warp_num, writeback_if.warp_num);
|
||||
`SCOPE_ASSIGN(scope_writeback_curr_PC, writeback_if.curr_PC);
|
||||
`SCOPE_ASSIGN(scope_writeback_wb, writeback_if.wb);
|
||||
`SCOPE_ASSIGN(scope_writeback_rd, writeback_if.rd);
|
||||
`SCOPE_ASSIGN(scope_writeback_data, writeback_if.data);
|
||||
|
||||
endmodule
|
|
@ -1,99 +1,56 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fetch (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if,
|
||||
input wire schedule_delay,
|
||||
input wire icache_stage_delay,
|
||||
input wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
input wire icache_stage_response,
|
||||
output wire busy,
|
||||
VX_jal_rsp_if jal_rsp_if,
|
||||
VX_branch_rsp_if branch_rsp_if,
|
||||
VX_inst_meta_if fe_inst_meta_fi,
|
||||
VX_warp_ctl_if warp_ctl_if
|
||||
module VX_fetch #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Icache interface
|
||||
VX_cache_core_req_if icache_req_if,
|
||||
VX_cache_core_rsp_if icache_rsp_if,
|
||||
|
||||
// inputs
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if,
|
||||
VX_branch_rsp_if branch_rsp_if,
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
|
||||
// outputs
|
||||
VX_ifetch_rsp_if ifetch_rsp_if,
|
||||
|
||||
output wire busy
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0] thread_mask;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[31:0] warp_pc;
|
||||
wire scheduled_warp;
|
||||
VX_ifetch_req_if ifetch_req_if();
|
||||
|
||||
wire pipe_stall = schedule_delay || icache_stage_delay;
|
||||
|
||||
VX_warp_sched warp_sched (
|
||||
VX_warp_sched #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) warp_sched (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipe_stall),
|
||||
.reset (reset),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.ifetch_req_if (ifetch_req_if),
|
||||
.ifetch_rsp_if (ifetch_rsp_if),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
.is_barrier (warp_ctl_if.is_barrier),
|
||||
.barrier_id (warp_ctl_if.barrier_id),
|
||||
.num_warps (warp_ctl_if.num_warps),
|
||||
.barrier_warp_num (warp_ctl_if.warp_num),
|
||||
VX_icache_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) icache_stage (
|
||||
`SCOPE_SIGNALS_ISTAGE_BIND
|
||||
|
||||
// Wspawn
|
||||
.wspawn (warp_ctl_if.wspawn),
|
||||
.wsapwn_pc (warp_ctl_if.wspawn_pc),
|
||||
.wspawn_new_active(warp_ctl_if.wspawn_new_active),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// CTM
|
||||
.ctm (warp_ctl_if.change_mask),
|
||||
.ctm_mask (warp_ctl_if.thread_mask),
|
||||
.ctm_warp_num (warp_ctl_if.warp_num),
|
||||
.icache_rsp_if (icache_rsp_if),
|
||||
.icache_req_if (icache_req_if),
|
||||
|
||||
// WHALT
|
||||
.whalt (warp_ctl_if.whalt),
|
||||
.whalt_warp_num (warp_ctl_if.warp_num),
|
||||
|
||||
// Wstall
|
||||
.wstall (wstall_if.wstall),
|
||||
.wstall_warp_num (wstall_if.warp_num),
|
||||
|
||||
// Lock/release Stuff
|
||||
.icache_stage_response(icache_stage_response),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
|
||||
// Join
|
||||
.is_join (join_if.is_join),
|
||||
.join_warp_num (join_if.warp_num),
|
||||
|
||||
// Split
|
||||
.is_split (warp_ctl_if.is_split),
|
||||
.dont_split (warp_ctl_if.dont_split),
|
||||
.split_new_mask (warp_ctl_if.split_new_mask),
|
||||
.split_later_mask (warp_ctl_if.split_later_mask),
|
||||
.split_save_pc (warp_ctl_if.split_save_pc),
|
||||
.split_warp_num (warp_ctl_if.warp_num),
|
||||
|
||||
// JAL
|
||||
.jal (jal_rsp_if.valid),
|
||||
.dest (jal_rsp_if.dest),
|
||||
.jal_warp_num (jal_rsp_if.warp_num),
|
||||
|
||||
// Branch
|
||||
.branch_valid (branch_rsp_if.valid),
|
||||
.branch_dir (branch_rsp_if.dir),
|
||||
.branch_dest (branch_rsp_if.dest),
|
||||
.branch_warp_num (branch_rsp_if.warp_num),
|
||||
|
||||
// Outputs
|
||||
.thread_mask (thread_mask),
|
||||
.warp_num (warp_num),
|
||||
.warp_pc (warp_pc),
|
||||
.busy (busy),
|
||||
.scheduled_warp (scheduled_warp)
|
||||
.ifetch_req_if (ifetch_req_if),
|
||||
.ifetch_rsp_if (ifetch_rsp_if)
|
||||
);
|
||||
|
||||
assign fe_inst_meta_fi.warp_num = warp_num;
|
||||
assign fe_inst_meta_fi.valid = thread_mask;
|
||||
assign fe_inst_meta_fi.instruction = 32'h0;
|
||||
assign fe_inst_meta_fi.curr_PC = warp_pc;
|
||||
|
||||
`DEBUG_BEGIN
|
||||
wire start_mat_add = scheduled_warp && (warp_pc == 32'h80000ed8) && (warp_num == 0);
|
||||
wire end_mat_add = scheduled_warp && (warp_pc == 32'h80000fbc) && (warp_num == 0);
|
||||
`DEBUG_END
|
||||
|
||||
endmodule
|
|
@ -1,116 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_front_end #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_SIGNALS_ISTAGE_IO
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire schedule_delay,
|
||||
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
|
||||
VX_cache_core_rsp_if icache_rsp_if,
|
||||
VX_cache_core_req_if icache_req_if,
|
||||
|
||||
VX_jal_rsp_if jal_rsp_if,
|
||||
VX_branch_rsp_if branch_rsp_if,
|
||||
|
||||
VX_backend_req_if bckE_req_if,
|
||||
output wire busy
|
||||
);
|
||||
|
||||
VX_inst_meta_if fe_inst_meta_fi();
|
||||
VX_inst_meta_if fe_inst_meta_fi2();
|
||||
VX_inst_meta_if fe_inst_meta_id();
|
||||
|
||||
VX_backend_req_if frE_to_bckE_req_if();
|
||||
VX_inst_meta_if fd_inst_meta_de();
|
||||
|
||||
wire total_freeze = schedule_delay;
|
||||
wire icache_stage_delay;
|
||||
|
||||
wire[`NW_BITS-1:0] icache_stage_wid;
|
||||
wire icache_stage_response;
|
||||
|
||||
VX_wstall_if wstall_if();
|
||||
VX_join_if join_if();
|
||||
|
||||
VX_fetch fetch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
.icache_stage_response(icache_stage_response),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.icache_stage_delay (icache_stage_delay),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.busy (busy),
|
||||
.fe_inst_meta_fi (fe_inst_meta_fi)
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(64+`NW_BITS-1+1+`NUM_THREADS)
|
||||
) f_d_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (icache_stage_delay),
|
||||
.flush (1'b0),
|
||||
.in ({fe_inst_meta_fi.instruction, fe_inst_meta_fi.curr_PC, fe_inst_meta_fi.warp_num, fe_inst_meta_fi.valid}),
|
||||
.out ({fe_inst_meta_fi2.instruction, fe_inst_meta_fi2.curr_PC, fe_inst_meta_fi2.warp_num, fe_inst_meta_fi2.valid})
|
||||
);
|
||||
|
||||
VX_icache_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) icache_stage (
|
||||
`SCOPE_SIGNALS_ISTAGE_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.total_freeze (total_freeze),
|
||||
.icache_stage_delay (icache_stage_delay),
|
||||
.icache_stage_response(icache_stage_response),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
.fe_inst_meta_fi (fe_inst_meta_fi2),
|
||||
.fe_inst_meta_id (fe_inst_meta_id),
|
||||
.icache_rsp_if (icache_rsp_if),
|
||||
.icache_req_if (icache_req_if)
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(64 + `NW_BITS-1 + 1 + `NUM_THREADS)
|
||||
) i_d_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (total_freeze),
|
||||
.flush (1'b0),
|
||||
.in ({fe_inst_meta_id.instruction, fe_inst_meta_id.curr_PC, fe_inst_meta_id.warp_num, fe_inst_meta_id.valid}),
|
||||
.out ({fd_inst_meta_de.instruction, fd_inst_meta_de.curr_PC, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid})
|
||||
);
|
||||
|
||||
VX_decode decode (
|
||||
.fd_inst_meta_de (fd_inst_meta_de),
|
||||
.frE_to_bckE_req_if (frE_to_bckE_req_if),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if)
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(233 + `NW_BITS-1 + 1 + `NUM_THREADS)
|
||||
) d_e_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (total_freeze),
|
||||
.flush (1'b0),
|
||||
.in ({frE_to_bckE_req_if.csr_addr, frE_to_bckE_req_if.is_jal, frE_to_bckE_req_if.is_etype, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.next_PC, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}),
|
||||
.out ({bckE_req_if.csr_addr , bckE_req_if.is_jal , bckE_req_if.is_etype ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.next_PC , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier })
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
|
88
hw/rtl/VX_gpr_mux.v
Normal file
88
hw/rtl/VX_gpr_mux.v
Normal file
|
@ -0,0 +1,88 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr_mux (
|
||||
// inputs
|
||||
VX_execute_if execute_if,
|
||||
input wire [`NUM_THREADS-1:0][31:0] rs1_data,
|
||||
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_branch_req_if branch_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0] is_alu = {`NUM_THREADS{execute_if.ex_type == `EX_ALU}};
|
||||
wire[`NUM_THREADS-1:0] is_br = {`NUM_THREADS{execute_if.ex_type == `EX_BR}};
|
||||
wire[`NUM_THREADS-1:0] is_lsu = {`NUM_THREADS{execute_if.ex_type == `EX_LSU}};
|
||||
wire[`NUM_THREADS-1:0] is_csr = {`NUM_THREADS{execute_if.ex_type == `EX_CSR}};
|
||||
wire[`NUM_THREADS-1:0] is_mul = {`NUM_THREADS{execute_if.ex_type == `EX_MUL}};
|
||||
wire[`NUM_THREADS-1:0] is_gpu = {`NUM_THREADS{execute_if.ex_type == `EX_GPU}};
|
||||
|
||||
// ALU unit
|
||||
assign alu_req_if.valid = execute_if.valid & is_alu;
|
||||
assign alu_req_if.warp_num = execute_if.warp_num;
|
||||
assign alu_req_if.curr_PC = execute_if.curr_PC;
|
||||
assign alu_req_if.alu_op = `ALU_OP(execute_if.instr_op);
|
||||
assign alu_req_if.rd = execute_if.rd;
|
||||
assign alu_req_if.wb = execute_if.wb;
|
||||
assign alu_req_if.rs1_data = rs1_data;
|
||||
assign alu_req_if.rs2_data = rs2_data;
|
||||
|
||||
// BR unit
|
||||
assign branch_req_if.valid = execute_if.valid & is_br;
|
||||
assign branch_req_if.warp_num = execute_if.warp_num;
|
||||
assign branch_req_if.curr_PC = execute_if.curr_PC;
|
||||
assign branch_req_if.br_op = `BR_OP(execute_if.instr_op);
|
||||
assign branch_req_if.offset = execute_if.imm;
|
||||
assign branch_req_if.next_PC = execute_if.next_PC;
|
||||
assign branch_req_if.rs1_data = rs1_data;
|
||||
assign branch_req_if.rs2_data = rs2_data;
|
||||
assign branch_req_if.rd = execute_if.rd;
|
||||
assign branch_req_if.wb = execute_if.wb;
|
||||
|
||||
// LSU unit
|
||||
assign lsu_req_if.valid = execute_if.valid & is_lsu;
|
||||
assign lsu_req_if.warp_num = execute_if.warp_num;
|
||||
assign lsu_req_if.curr_PC = execute_if.curr_PC;
|
||||
assign lsu_req_if.base_addr = rs1_data;
|
||||
assign lsu_req_if.store_data = rs2_data;
|
||||
assign lsu_req_if.offset = execute_if.imm;
|
||||
assign lsu_req_if.rw = `LSU_RW(execute_if.instr_op);
|
||||
assign lsu_req_if.byteen = `LSU_BE(execute_if.instr_op);
|
||||
assign lsu_req_if.rd = execute_if.rd;
|
||||
assign lsu_req_if.wb = execute_if.wb;
|
||||
|
||||
// CSR unit
|
||||
assign csr_req_if.valid = execute_if.valid & is_csr;
|
||||
assign csr_req_if.warp_num = execute_if.warp_num;
|
||||
assign csr_req_if.curr_PC = execute_if.curr_PC;
|
||||
assign csr_req_if.csr_op = `CSR_OP(execute_if.instr_op);
|
||||
assign csr_req_if.csr_addr = execute_if.imm[`CSR_ADDR_SIZE-1:0];
|
||||
assign csr_req_if.csr_mask = execute_if.rs2_is_imm ? 32'(execute_if.rs1) : rs1_data[0];
|
||||
assign csr_req_if.rd = execute_if.rd;
|
||||
assign csr_req_if.wb = execute_if.wb;
|
||||
assign csr_req_if.is_io = 1'b0;
|
||||
|
||||
// MUL unit
|
||||
assign mul_req_if.valid = execute_if.valid & is_mul;
|
||||
assign mul_req_if.warp_num = execute_if.warp_num;
|
||||
assign mul_req_if.curr_PC = execute_if.curr_PC;
|
||||
assign mul_req_if.mul_op = `MUL_OP(execute_if.instr_op);
|
||||
assign mul_req_if.rs1_data = rs1_data;
|
||||
assign mul_req_if.rs2_data = rs2_data;
|
||||
assign mul_req_if.rd = execute_if.rd;
|
||||
assign mul_req_if.wb = execute_if.wb;
|
||||
|
||||
// GPU unit
|
||||
assign gpu_req_if.valid = execute_if.valid & is_gpu;
|
||||
assign gpu_req_if.warp_num = execute_if.warp_num;
|
||||
assign gpu_req_if.next_PC = execute_if.next_PC;
|
||||
assign gpu_req_if.gpu_op = `GPU_OP(execute_if.instr_op);
|
||||
assign gpu_req_if.rs1_data = rs1_data;
|
||||
assign gpu_req_if.rs2_data = rs2_data[0];
|
||||
|
||||
endmodule
|
|
@ -1,81 +1,75 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr_ram (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire write_ce,
|
||||
VX_gpr_read_if gpr_read_if,
|
||||
VX_wb_if writeback_if,
|
||||
input wire clk,
|
||||
input wire [`NUM_THREADS-1:0] we,
|
||||
input wire [`NR_BITS-1:0] waddr,
|
||||
input wire [`NUM_THREADS-1:0][31:0] wdata,
|
||||
input wire [`NR_BITS-1:0] rs1,
|
||||
input wire [`NR_BITS-1:0] rs2,
|
||||
output wire [`NUM_THREADS-1:0][31:0] rs1_data,
|
||||
output wire [`NUM_THREADS-1:0][31:0] rs2_data
|
||||
);
|
||||
`ifndef ASIC
|
||||
|
||||
output wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data,
|
||||
output wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data
|
||||
);
|
||||
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data_unqual;
|
||||
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data_unqual;
|
||||
reg [`NUM_THREADS-1:0][3:0][7:0] ram [31:0];
|
||||
|
||||
assign a_reg_data = (gpr_read_if.rs1 != 0) ? a_reg_data_unqual : 0;
|
||||
assign b_reg_data = (gpr_read_if.rs2 != 0) ? b_reg_data_unqual : 0;
|
||||
integer i;
|
||||
|
||||
wire [`NUM_THREADS-1:0] write_enable = writeback_if.valid & {`NUM_THREADS{write_ce && (writeback_if.wb != 0)}};
|
||||
|
||||
`ifndef ASIC
|
||||
`UNUSED_VAR(reset)
|
||||
|
||||
reg [`NUM_THREADS-1:0][3:0][7:0] ram[31:0];
|
||||
|
||||
wire [4:0] waddr = writeback_if.rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] wdata = writeback_if.data;
|
||||
initial begin
|
||||
// initialize r0 to 0
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
ram[i][0] = 0;
|
||||
ram[i][1] = 0;
|
||||
ram[i][2] = 0;
|
||||
ram[i][3] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(posedge clk) begin
|
||||
if (write_enable[i]) begin
|
||||
always @(posedge clk) begin
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (we[i]) begin
|
||||
ram[waddr][i][0] <= wdata[i][07:00];
|
||||
ram[waddr][i][1] <= wdata[i][15:08];
|
||||
ram[waddr][i][2] <= wdata[i][23:16];
|
||||
ram[waddr][i][3] <= wdata[i][31:24];
|
||||
end
|
||||
end
|
||||
assert(~(|we) || (waddr != 0)); // ensure r0 is never written!
|
||||
end
|
||||
|
||||
assign a_reg_data_unqual = ram[gpr_read_if.rs1];
|
||||
assign b_reg_data_unqual = ram[gpr_read_if.rs2];
|
||||
assign rs1_data = ram[rs1];
|
||||
assign rs2_data = ram[rs2];
|
||||
|
||||
`else
|
||||
|
||||
wire going_to_write = write_enable & (| writeback_if.wb_valid);
|
||||
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
|
||||
wire [`NUM_THREADS-1:0][31:0] write_bit_mask;
|
||||
|
||||
genvar i;
|
||||
integer i;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire local_write = write_enable & writeback_if.wb_valid[i];
|
||||
assign write_bit_mask[i] = {`NUM_GPRS{~local_write}};
|
||||
assign write_bit_mask[i] = {32{~we[i]}};
|
||||
end
|
||||
|
||||
wire cenb = 0;
|
||||
wire cena_1 = 0;
|
||||
wire cena_2 = 0;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] tmp_a;
|
||||
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] tmp_b;
|
||||
wire [`NUM_THREADS-1:0][31:0] tmp_a;
|
||||
wire [`NUM_THREADS-1:0][31:0] tmp_b;
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
genvar j;
|
||||
integer j;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (j = 0; j < `NUM_GPRS; j++) begin
|
||||
assign a_reg_data_unqual[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
|
||||
assign b_reg_data_unqual[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
|
||||
for (j = 0; j < 32; j++) begin
|
||||
assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
|
||||
assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
|
||||
end
|
||||
end
|
||||
`else
|
||||
assign a_reg_data_unqual = tmp_a;
|
||||
assign b_reg_data_unqual = tmp_b;
|
||||
assign rs1_data = tmp_a;
|
||||
assign rs2_data = tmp_b;
|
||||
`endif
|
||||
|
||||
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = writeback_if.write_data;
|
||||
|
||||
for (i = 0; i < 'NT; i=i+4)
|
||||
begin
|
||||
for (i = 0; i < 'NT; i=i+4) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
rf2_32x128_wm1 first_ram (
|
||||
.CENYA(),
|
||||
|
@ -88,12 +82,12 @@ module VX_gpr_ram (
|
|||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_1),
|
||||
.AA(gpr_read_if.rs1[(i+3):(i)]),
|
||||
.AA(rs1[(i+3):(i)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(i+3):(i)]),
|
||||
.AB(writeback_if.rd[(i+3):(i)]),
|
||||
.DB(to_write[(i+3):(i)]),
|
||||
.AB(waddr[(i+3):(i)]),
|
||||
.DB(wdata[(i+3):(i)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
|
@ -125,12 +119,12 @@ module VX_gpr_ram (
|
|||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_2),
|
||||
.AA(gpr_read_if.rs2[(i+3):(i)]),
|
||||
.AA(rs2[(i+3):(i)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(i+3):(i)]),
|
||||
.AB(writeback_if.rd[(i+3):(i)]),
|
||||
.DB(to_write[(i+3):(i)]),
|
||||
.AB(waddr[(i+3):(i)]),
|
||||
.DB(wdata[(i+3):(i)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
|
|
|
@ -1,226 +1,172 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr_stage (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire schedule_delay,
|
||||
module VX_gpr_stage #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire memory_delay,
|
||||
input wire exec_delay,
|
||||
input wire stall_gpr_csr,
|
||||
output wire delay,
|
||||
// inputs
|
||||
VX_execute_if execute_if,
|
||||
VX_wb_if writeback_if,
|
||||
|
||||
// decodee inputs
|
||||
VX_backend_req_if bckE_req_if,
|
||||
|
||||
// WriteBack inputs
|
||||
VX_wb_if writeback_if,
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_if exec_unit_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_gpu_inst_req_if gpu_inst_req_if,
|
||||
VX_csr_req_if csr_req_if
|
||||
// outputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_branch_req_if branch_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
`DEBUG_BEGIN
|
||||
wire[31:0] curr_PC = bckE_req_if.curr_PC;
|
||||
wire[2:0] branchType = bckE_req_if.branch_type;
|
||||
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
|
||||
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
|
||||
wire is_jal = bckE_req_if.is_jal;
|
||||
`DEBUG_END
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data_all [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data_all [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_PC;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_imm;
|
||||
wire [`NUM_THREADS-1:0] we [`NUM_WARPS-1:0];
|
||||
|
||||
assign csr_req_if.is_io = 1'b0; // GPR only issues csr requests coming from core
|
||||
genvar i;
|
||||
|
||||
VX_gpr_read_if gpr_read_if();
|
||||
assign gpr_read_if.rs1 = bckE_req_if.rs1;
|
||||
assign gpr_read_if.rs2 = bckE_req_if.rs2;
|
||||
assign gpr_read_if.warp_num = bckE_req_if.warp_num;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign rs1_PC[i] = execute_if.curr_PC;
|
||||
assign rs2_imm[i] = execute_if.imm;
|
||||
end
|
||||
|
||||
`ifndef ASIC
|
||||
assign gpr_read_if.is_jal = bckE_req_if.is_jal;
|
||||
assign gpr_read_if.curr_PC = bckE_req_if.curr_PC;
|
||||
`else
|
||||
assign gpr_read_if.is_jal = exec_unit_req_if.is_jal;
|
||||
assign gpr_read_if.curr_PC = exec_unit_req_if.curr_PC;
|
||||
`endif
|
||||
assign rs1_data = execute_if.rs1_is_PC ? rs1_PC : rs1_data_all[execute_if.warp_num];
|
||||
assign rs2_data = execute_if.rs2_is_imm ? rs2_imm : rs2_data_all[execute_if.warp_num];
|
||||
|
||||
VX_gpr_wrapper grp_wrapper (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.writeback_if (writeback_if),
|
||||
.gpr_read_if (gpr_read_if)
|
||||
);
|
||||
generate
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign we[i] = writeback_if.valid & {`NUM_THREADS{(writeback_if.wb != 0) && (i == writeback_if.warp_num)}};
|
||||
VX_gpr_ram gpr_ram (
|
||||
.clk (clk),
|
||||
.we (we[i]),
|
||||
.waddr (writeback_if.rd),
|
||||
.wdata (writeback_if.data),
|
||||
.rs1 (execute_if.rs1),
|
||||
.rs2 (execute_if.rs2),
|
||||
.rs1_data (rs1_data_all[i]),
|
||||
.rs2_data (rs2_data_all[i])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_if exec_unit_req_temp_if();
|
||||
VX_lsu_req_if lsu_req_temp_if();
|
||||
VX_gpu_inst_req_if gpu_inst_req_temp_if();
|
||||
VX_csr_req_if csr_req_temp_if();
|
||||
VX_alu_req_if alu_req_tmp_if();
|
||||
VX_branch_req_if branch_req_tmp_if();
|
||||
VX_lsu_req_if lsu_req_tmp_if();
|
||||
VX_csr_req_if csr_req_tmp_if();
|
||||
VX_mul_req_if mul_req_tmp_if();
|
||||
VX_gpu_req_if gpu_req_tmp_if();
|
||||
|
||||
VX_inst_multiplex inst_mult(
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.gpr_read_if (gpr_read_if),
|
||||
.exec_unit_req_if (exec_unit_req_temp_if),
|
||||
.lsu_req_if (lsu_req_temp_if),
|
||||
.gpu_inst_req_if (gpu_inst_req_temp_if),
|
||||
.csr_req_if (csr_req_temp_if)
|
||||
);
|
||||
VX_gpr_mux gpr_mux (
|
||||
.execute_if (execute_if),
|
||||
.rs1_data (rs1_data),
|
||||
.rs2_data (rs2_data),
|
||||
.alu_req_if (alu_req_if),
|
||||
.branch_req_if (branch_req_tmp_if),
|
||||
.lsu_req_if (lsu_req_tmp_if),
|
||||
.csr_req_if (csr_req_tmp_if),
|
||||
.mul_req_if (mul_req_tmp_if),
|
||||
.gpu_req_if (gpu_req_tmp_if)
|
||||
);
|
||||
|
||||
`DEBUG_BEGIN
|
||||
wire is_lsu = (| lsu_req_temp_if.valid);
|
||||
`DEBUG_END
|
||||
wire stall_rest = 0;
|
||||
wire flush_rest = schedule_delay;
|
||||
wire stall_alu = ~alu_req_if.ready && (| alu_req_if.valid);
|
||||
wire stall_br = ~branch_req_if.ready && (| branch_req_if.valid);
|
||||
wire stall_lsu = ~lsu_req_if.ready && (| lsu_req_if.valid);
|
||||
wire stall_csr = ~csr_req_if.ready && (| csr_req_if.valid);
|
||||
wire stall_mul = ~mul_req_if.ready && (| mul_req_if.valid);
|
||||
wire stall_gpu = ~gpu_req_if.ready && (| gpu_req_if.valid);
|
||||
|
||||
wire stall_lsu = memory_delay;
|
||||
wire flush_lsu = schedule_delay && !stall_lsu;
|
||||
|
||||
wire stall_exec = exec_delay;
|
||||
wire flush_exec = schedule_delay && !stall_exec;
|
||||
|
||||
wire stall_csr = stall_gpr_csr && bckE_req_if.is_csr && (| bckE_req_if.valid);
|
||||
|
||||
assign delay = stall_lsu || stall_exec || stall_csr;
|
||||
|
||||
`ifdef ASIC
|
||||
wire delayed_lsu_last_cycle;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1)
|
||||
) delayed_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_rest),
|
||||
.flush (stall_rest),
|
||||
.in (stall_lsu),
|
||||
.out (delayed_lsu_last_cycle),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] temp_store_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] temp_base_addr; // A reg data
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] real_store_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] real_base_addr; // A reg data
|
||||
|
||||
wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS*32*2)
|
||||
) lsu_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (!store_curr_real),
|
||||
.flush (stall_rest),
|
||||
.in ({real_store_data, real_base_addr}),
|
||||
.out ({temp_store_data, temp_base_addr})
|
||||
);
|
||||
|
||||
assign real_store_data = lsu_req_temp_if.store_data;
|
||||
assign real_base_addr = lsu_req_temp_if.base_addr;
|
||||
|
||||
assign lsu_req_if.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data;
|
||||
assign lsu_req_if.base_addr = (delayed_lsu_last_cycle) ? temp_base_addr : real_base_addr;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS))
|
||||
) lsu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_lsu),
|
||||
.flush (flush_lsu),
|
||||
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.curr_PC, lsu_req_temp_if.warp_num, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
|
||||
.out ({lsu_req_if.valid , lsu_req_if.curr_PC ,lsu_req_if.warp_num , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(224 + `NW_BITS-1 + 1 + (`NUM_THREADS))
|
||||
) exec_unit_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_exec),
|
||||
.flush (flush_exec),
|
||||
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.next_PC, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.is_jal, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_addr, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
|
||||
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.next_PC , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.is_jal , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_addr , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
|
||||
);
|
||||
|
||||
assign exec_unit_req_if.a_reg_data = real_base_addr;
|
||||
assign exec_unit_req_if.b_reg_data = real_store_data;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(36 + `NW_BITS-1 + 1 + (`NUM_THREADS))
|
||||
) gpu_inst_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_rest),
|
||||
.flush (flush_rest),
|
||||
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.next_PC}),
|
||||
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.next_PC })
|
||||
);
|
||||
|
||||
assign gpu_inst_req_if.a_reg_data = real_base_addr;
|
||||
assign gpu_inst_req_if.rd2 = real_store_data;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_gpr_csr),
|
||||
.flush (flush_rest),
|
||||
.in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_addr, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}),
|
||||
.out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_addr , csr_req_if.csr_immed , csr_req_if.csr_mask })
|
||||
);
|
||||
|
||||
|
||||
`else
|
||||
|
||||
// 341
|
||||
VX_generic_register #(
|
||||
.N(77 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `NR_BITS + `WB_BITS)
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_alu),
|
||||
.flush (0),
|
||||
.in ({alu_req_tmp_if.valid, alu_req_tmp_if.warp_num, alu_req_tmp_if.curr_PC, alu_req_tmp_if.alu_op, alu_req_tmp_if.rs1_data, alu_req_tmp_if.rs2_data, alu_req_tmp_if.rd, alu_req_tmp_if.wb}),
|
||||
.out ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.alu_op, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.rd, alu_req_if.wb})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + 32 + `BR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + `NR_BITS + `WB_BITS)
|
||||
) br_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_br),
|
||||
.flush (0),
|
||||
.in ({branch_req_tmp_if.valid, branch_req_tmp_if.warp_num, branch_req_tmp_if.curr_PC, branch_req_tmp_if.next_PC, branch_req_tmp_if.br_op, branch_req_tmp_if.rs1_data, branch_req_tmp_if.rs2_data, branch_req_tmp_if.offset, branch_req_tmp_if.rd, branch_req_tmp_if.wb}),
|
||||
.out ({branch_req_if.valid, branch_req_if.warp_num, branch_req_if.curr_PC, branch_req_if.next_PC, branch_req_if.br_op, branch_req_if.rs1_data, branch_req_if.rs2_data, branch_req_if.offset, branch_req_if.rd, branch_req_if.wb})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 1 + `BYTEEN_BITS + `NR_BITS + `WB_BITS)
|
||||
) lsu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_lsu),
|
||||
.flush (flush_lsu),
|
||||
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.curr_PC, lsu_req_temp_if.warp_num, lsu_req_temp_if.store_data, lsu_req_temp_if.base_addr, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
|
||||
.out ({lsu_req_if.valid , lsu_req_if.curr_PC , lsu_req_if.warp_num , lsu_req_if.store_data , lsu_req_if.base_addr , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
|
||||
.flush (0),
|
||||
.in ({lsu_req_tmp_if.valid, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.store_data, lsu_req_tmp_if.offset, lsu_req_tmp_if.rw, lsu_req_tmp_if.byteen, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb}),
|
||||
.out ({lsu_req_if.valid, lsu_req_if.warp_num, lsu_req_if.curr_PC, lsu_req_if.base_addr, lsu_req_if.store_data, lsu_req_if.offset, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.rd, lsu_req_if.wb})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(224 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))
|
||||
) exec_unit_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_exec),
|
||||
.flush (flush_exec),
|
||||
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.next_PC, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.is_jal, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_addr, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
|
||||
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.next_PC , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.is_jal , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_addr , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(68 + `NW_BITS-1 + 1 + 33*(`NUM_THREADS))
|
||||
) gpu_inst_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_rest),
|
||||
.flush (flush_rest),
|
||||
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.next_PC, gpu_inst_req_temp_if.a_reg_data, gpu_inst_req_temp_if.rd2}),
|
||||
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.next_PC , gpu_inst_req_if.a_reg_data , gpu_inst_req_if.rd2 })
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + `CSR_ADDR_SIZE + 32 + 1 + `NR_BITS + `WB_BITS)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_gpr_csr),
|
||||
.flush (flush_rest),
|
||||
.in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_addr, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}),
|
||||
.out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_addr , csr_req_if.csr_immed , csr_req_if.csr_mask })
|
||||
.stall (stall_csr),
|
||||
.flush (0),
|
||||
.in ({csr_req_tmp_if.valid, csr_req_tmp_if.warp_num, csr_req_tmp_if.curr_PC, csr_req_tmp_if.csr_op, csr_req_tmp_if.csr_addr, csr_req_tmp_if.csr_mask, csr_req_tmp_if.is_io, csr_req_tmp_if.rd, csr_req_tmp_if.wb}),
|
||||
.out ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.csr_op, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io, csr_req_if.rd, csr_req_if.wb})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `NR_BITS + `WB_BITS)
|
||||
) mul_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_mul),
|
||||
.flush (0),
|
||||
.in ({mul_req_tmp_if.valid, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data, mul_req_tmp_if.rd, mul_req_tmp_if.wb}),
|
||||
.out ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.mul_op, mul_req_if.rs1_data, mul_req_if.rs2_data, mul_req_if.rd, mul_req_if.wb})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `GPU_BITS + (`NUM_THREADS * 32) + 32)
|
||||
) gpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_gpu),
|
||||
.flush (0),
|
||||
.in ({gpu_req_tmp_if.valid, gpu_req_tmp_if.warp_num, gpu_req_tmp_if.next_PC, gpu_req_tmp_if.gpu_op, gpu_req_tmp_if.rs1_data, gpu_req_tmp_if.rs2_data}),
|
||||
.out ({gpu_req_if.valid, gpu_req_if.warp_num, gpu_req_if.next_PC, gpu_req_if.gpu_op, gpu_req_if.rs1_data, gpu_req_if.rs2_data})
|
||||
);
|
||||
|
||||
assign execute_if.alu_ready = ~stall_alu;
|
||||
assign execute_if.br_ready = ~stall_br;
|
||||
assign execute_if.lsu_ready = ~stall_lsu;
|
||||
assign execute_if.csr_ready = ~stall_csr;
|
||||
assign execute_if.mul_ready = ~stall_mul;
|
||||
assign execute_if.gpu_ready = ~stall_gpu;
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if ((| execute_if.valid)) begin
|
||||
$display("%t: Core%0d-GPR: warp=%0d, PC=%0h, a=%0h, b=%0h", $time, CORE_ID, execute_if.warp_num, execute_if.curr_PC, rs1_data, rs2_data);
|
||||
|
||||
// scheduler ensures the destination execute unit is ready (garanteed by the scheduler)
|
||||
assert((execute_if.ex_type != `EX_ALU) || alu_req_if.ready);
|
||||
assert((execute_if.ex_type != `EX_BR) || branch_req_if.ready);
|
||||
assert((execute_if.ex_type != `EX_LSU) || lsu_req_if.ready);
|
||||
assert((execute_if.ex_type != `EX_CSR) || csr_req_if.ready);
|
||||
assert((execute_if.ex_type != `EX_MUL) || mul_req_if.ready);
|
||||
assert((execute_if.ex_type != `EX_GPU) || gpu_req_if.ready);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule : VX_gpr_stage
|
||||
endmodule
|
||||
|
|
|
@ -1,60 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr_wrapper (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_wb_if writeback_if,
|
||||
VX_gpr_read_if gpr_read_if
|
||||
);
|
||||
wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] tmp_a_reg_data;
|
||||
wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] tmp_b_reg_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] jal_data;
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin : jal_data_assign
|
||||
assign jal_data[i] = gpr_read_if.curr_PC;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
`ifndef ASIC
|
||||
assign gpr_read_if.a_reg_data = gpr_read_if.is_jal ? jal_data : tmp_a_reg_data[gpr_read_if.warp_num];
|
||||
assign gpr_read_if.b_reg_data = tmp_b_reg_data[gpr_read_if.warp_num];
|
||||
`else
|
||||
|
||||
wire [`NW_BITS-1:0] old_warp_num;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NW_BITS-1+1)
|
||||
) store_wn (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (1'b0),
|
||||
.flush (1'b0),
|
||||
.in (gpr_read_if.warp_num),
|
||||
.out (old_warp_num)
|
||||
);
|
||||
|
||||
assign gpr_read_if.a_reg_data = gpr_jal_if.is_jal ? jal_data : tmp_a_reg_data[old_warp_num];
|
||||
assign gpr_read_if.b_reg_data = tmp_b_reg_data[old_warp_num];
|
||||
|
||||
`endif
|
||||
|
||||
generate
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin : warp_gprs
|
||||
wire write_ce = (i == writeback_if.warp_num);
|
||||
VX_gpr_ram gpr_ram(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_ce (write_ce),
|
||||
.gpr_read_if (gpr_read_if),
|
||||
.writeback_if (writeback_if),
|
||||
.a_reg_data (tmp_a_reg_data[i]),
|
||||
.b_reg_data (tmp_b_reg_data[i])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
|
||||
|
|
@ -1,88 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpu_inst (
|
||||
// Input
|
||||
VX_gpu_inst_req_if gpu_inst_req_if,
|
||||
|
||||
// Output
|
||||
VX_warp_ctl_if warp_ctl_if
|
||||
);
|
||||
wire[`NUM_THREADS-1:0] curr_valids = gpu_inst_req_if.valid;
|
||||
wire is_split = gpu_inst_req_if.is_split;
|
||||
|
||||
wire[`NUM_THREADS-1:0] tmc_new_mask;
|
||||
wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0];
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin : tmc_new_mask_init
|
||||
assign tmc_new_mask[i] = all_threads ? 1 : i < gpu_inst_req_if.a_reg_data[0];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire valid_inst = (| curr_valids);
|
||||
|
||||
assign warp_ctl_if.warp_num = gpu_inst_req_if.warp_num;
|
||||
assign warp_ctl_if.change_mask = gpu_inst_req_if.is_tmc && valid_inst;
|
||||
assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0;
|
||||
|
||||
assign warp_ctl_if.whalt = warp_ctl_if.change_mask && (0 == warp_ctl_if.thread_mask);
|
||||
|
||||
wire wspawn = gpu_inst_req_if.is_wspawn && valid_inst;
|
||||
wire[31:0] wspawn_pc = gpu_inst_req_if.rd2;
|
||||
wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0];
|
||||
wire[`NUM_WARPS-1:0] wspawn_new_active;
|
||||
|
||||
generate
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin : wspawn_new_active_init
|
||||
assign wspawn_new_active[i] = all_active ? 1 : i < gpu_inst_req_if.a_reg_data[0];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign warp_ctl_if.is_barrier = gpu_inst_req_if.is_barrier && valid_inst;
|
||||
assign warp_ctl_if.barrier_id = gpu_inst_req_if.a_reg_data[0];
|
||||
|
||||
`DEBUG_BEGIN
|
||||
wire[31:0] num_warps_m1 = gpu_inst_req_if.rd2 - 1;
|
||||
`DEBUG_END
|
||||
|
||||
assign warp_ctl_if.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0];
|
||||
|
||||
assign warp_ctl_if.wspawn = wspawn;
|
||||
assign warp_ctl_if.wspawn_pc = wspawn_pc;
|
||||
assign warp_ctl_if.wspawn_new_active = wspawn_new_active;
|
||||
|
||||
wire[`NUM_THREADS-1:0] split_new_use_mask;
|
||||
wire[`NUM_THREADS-1:0] split_new_later_mask;
|
||||
|
||||
generate
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin : masks_init
|
||||
wire curr_bool = (gpu_inst_req_if.a_reg_data[i] == 32'b1);
|
||||
assign split_new_use_mask[i] = curr_valids[i] & (curr_bool);
|
||||
assign split_new_later_mask[i] = curr_valids[i] & (!curr_bool);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire[$clog2(`NUM_THREADS):0] num_valids;
|
||||
|
||||
VX_countones #(
|
||||
.N(`NUM_THREADS)
|
||||
) valids_counter (
|
||||
.valids(curr_valids),
|
||||
.count (num_valids)
|
||||
);
|
||||
|
||||
// wire[`NW_BITS-1:0] num_valids = $countones(curr_valids);
|
||||
|
||||
assign warp_ctl_if.is_split = is_split && (num_valids > 1);
|
||||
assign warp_ctl_if.dont_split = warp_ctl_if.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
|
||||
assign warp_ctl_if.split_new_mask = split_new_use_mask;
|
||||
assign warp_ctl_if.split_later_mask = split_new_later_mask;
|
||||
assign warp_ctl_if.split_save_pc = gpu_inst_req_if.next_PC;
|
||||
assign warp_ctl_if.split_warp_num = gpu_inst_req_if.warp_num;
|
||||
|
||||
// gpu_inst_req_if.is_wspawn
|
||||
// gpu_inst_req_if.is_split
|
||||
// gpu_inst_req_if.is_barrier
|
||||
|
||||
endmodule
|
79
hw/rtl/VX_gpu_unit.v
Normal file
79
hw/rtl/VX_gpu_unit.v
Normal file
|
@ -0,0 +1,79 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
// Input
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
|
||||
// Output
|
||||
VX_warp_ctl_if warp_ctl_if
|
||||
);
|
||||
wire [`NUM_THREADS-1:0] curr_valids = gpu_req_if.valid;
|
||||
wire is_wspawn = (gpu_req_if.gpu_op == `GPU_WSPAWN);
|
||||
wire is_tmc = (gpu_req_if.gpu_op == `GPU_TMC);
|
||||
wire is_split = (gpu_req_if.gpu_op == `GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.gpu_op == `GPU_BAR);
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
||||
wire all_threads = `NUM_THREADS < gpu_req_if.rs1_data[0];
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin : tmc_new_mask_init
|
||||
assign tmc_new_mask[i] = all_threads ? 1 : i < gpu_req_if.rs1_data[0];
|
||||
end
|
||||
|
||||
wire valid_inst = (| curr_valids);
|
||||
|
||||
assign warp_ctl_if.warp_num = gpu_req_if.warp_num;
|
||||
|
||||
assign warp_ctl_if.change_mask = is_tmc && valid_inst;
|
||||
assign warp_ctl_if.thread_mask = is_tmc ? tmc_new_mask : 0;
|
||||
|
||||
assign warp_ctl_if.whalt = warp_ctl_if.change_mask && (0 == warp_ctl_if.thread_mask);
|
||||
|
||||
wire wspawn = is_wspawn && valid_inst;
|
||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
|
||||
wire all_active = `NUM_WARPS < gpu_req_if.rs1_data[0];
|
||||
wire [`NUM_WARPS-1:0] wspawn_new_active;
|
||||
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin : wspawn_new_active_init
|
||||
assign wspawn_new_active[i] = all_active ? 1 : i < gpu_req_if.rs1_data[0];
|
||||
end
|
||||
|
||||
assign warp_ctl_if.is_barrier = is_bar && valid_inst;
|
||||
assign warp_ctl_if.barrier_id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
||||
|
||||
assign warp_ctl_if.num_warps = (`NW_BITS+1)'(gpu_req_if.rs2_data - 1);
|
||||
|
||||
assign warp_ctl_if.wspawn = wspawn;
|
||||
assign warp_ctl_if.wspawn_pc = wspawn_pc;
|
||||
assign warp_ctl_if.wspawn_new_active = wspawn_new_active;
|
||||
|
||||
wire[`NUM_THREADS-1:0] split_new_use_mask;
|
||||
wire[`NUM_THREADS-1:0] split_new_later_mask;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin : masks_init
|
||||
wire curr_bool = (gpu_req_if.rs1_data[i] == 32'b1);
|
||||
assign split_new_use_mask[i] = curr_valids[i] & (curr_bool);
|
||||
assign split_new_later_mask[i] = curr_valids[i] & (!curr_bool);
|
||||
end
|
||||
|
||||
wire [`NT_BITS:0] num_valids;
|
||||
|
||||
VX_countones #(
|
||||
.N(`NUM_THREADS)
|
||||
) valids_counter (
|
||||
.valids(curr_valids),
|
||||
.count (num_valids)
|
||||
);
|
||||
|
||||
assign warp_ctl_if.is_split = is_split && (num_valids > 1);
|
||||
assign warp_ctl_if.do_split = (split_new_use_mask != 0) && (split_new_use_mask != {`NUM_THREADS{1'b1}});
|
||||
assign warp_ctl_if.split_new_mask = split_new_use_mask;
|
||||
assign warp_ctl_if.split_later_mask = split_new_later_mask;
|
||||
assign warp_ctl_if.split_save_pc = gpu_req_if.next_PC;
|
||||
|
||||
assign gpu_req_if.ready = 1'b1; // has no stalls
|
||||
|
||||
endmodule
|
|
@ -7,20 +7,21 @@ module VX_icache_stage #(
|
|||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire total_freeze,
|
||||
output wire icache_stage_delay,
|
||||
output wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
output wire icache_stage_response,
|
||||
VX_inst_meta_if fe_inst_meta_fi,
|
||||
VX_inst_meta_if fe_inst_meta_id,
|
||||
|
||||
// Icache interface
|
||||
VX_cache_core_req_if icache_req_if,
|
||||
VX_cache_core_rsp_if icache_rsp_if
|
||||
VX_cache_core_rsp_if icache_rsp_if,
|
||||
|
||||
// request
|
||||
VX_ifetch_req_if ifetch_req_if,
|
||||
|
||||
// reponse
|
||||
VX_ifetch_rsp_if ifetch_rsp_if
|
||||
);
|
||||
|
||||
reg [`NUM_THREADS-1:0] valid_threads [`NUM_WARPS-1:0];
|
||||
|
||||
wire valid_inst = (| fe_inst_meta_fi.valid);
|
||||
wire valid_inst = (| ifetch_req_if.valid);
|
||||
|
||||
wire [`LOG2UP(`ICREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr, dbg_mrq_write_addr;
|
||||
wire mrq_full;
|
||||
|
@ -30,25 +31,25 @@ module VX_icache_stage #(
|
|||
|
||||
assign mrq_read_addr = icache_rsp_if.tag[0][`LOG2UP(`ICREQ_SIZE)-1:0];
|
||||
|
||||
VX_indexable_queue #(
|
||||
VX_index_queue #(
|
||||
.DATAW (`LOG2UP(`ICREQ_SIZE) + 32 + `NW_BITS),
|
||||
.SIZE (`ICREQ_SIZE)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({mrq_write_addr, fe_inst_meta_fi.curr_PC, fe_inst_meta_fi.warp_num}),
|
||||
.write_data ({mrq_write_addr, ifetch_req_if.curr_PC, ifetch_req_if.warp_num}),
|
||||
.write_addr (mrq_write_addr),
|
||||
.push (mrq_push),
|
||||
.full (mrq_full),
|
||||
.pop (mrq_pop),
|
||||
.read_addr (mrq_read_addr),
|
||||
.read_data ({dbg_mrq_write_addr, fe_inst_meta_id.curr_PC, fe_inst_meta_id.warp_num}),
|
||||
.read_data ({dbg_mrq_write_addr, ifetch_rsp_if.curr_PC, ifetch_rsp_if.warp_num}),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (mrq_push) begin
|
||||
valid_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
|
||||
valid_threads[ifetch_req_if.warp_num] <= ifetch_req_if.valid;
|
||||
end
|
||||
if (mrq_pop) begin
|
||||
assert(mrq_read_addr == dbg_mrq_write_addr);
|
||||
|
@ -59,29 +60,26 @@ module VX_icache_stage #(
|
|||
assign icache_req_if.valid = valid_inst && !mrq_full;
|
||||
assign icache_req_if.rw = 0;
|
||||
assign icache_req_if.byteen = 4'b1111;
|
||||
assign icache_req_if.addr = fe_inst_meta_fi.curr_PC[31:2];
|
||||
assign icache_req_if.addr = ifetch_req_if.curr_PC[31:2];
|
||||
assign icache_req_if.data = 0;
|
||||
|
||||
// Can't accept new request
|
||||
assign icache_stage_delay = mrq_full || !icache_req_if.ready;
|
||||
assign ifetch_req_if.ready = !mrq_full && icache_req_if.ready;
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
assign icache_req_if.tag = {fe_inst_meta_fi.curr_PC, 2'b1, 5'b0, fe_inst_meta_fi.warp_num, mrq_write_addr};
|
||||
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 2'b1, 5'b0, ifetch_req_if.warp_num, mrq_write_addr};
|
||||
`else
|
||||
assign icache_req_if.tag = mrq_write_addr;
|
||||
`endif
|
||||
|
||||
assign fe_inst_meta_id.instruction = icache_rsp_if.valid ? icache_rsp_if.data[0] : 0;
|
||||
assign fe_inst_meta_id.valid = icache_rsp_if.valid ? valid_threads[fe_inst_meta_id.warp_num] : 0;
|
||||
|
||||
assign icache_stage_response = mrq_pop;
|
||||
assign icache_stage_wid = fe_inst_meta_id.warp_num;
|
||||
assign ifetch_rsp_if.valid = icache_rsp_if.valid ? valid_threads[ifetch_rsp_if.warp_num] : 0;
|
||||
assign ifetch_rsp_if.instr = icache_rsp_if.data[0];
|
||||
|
||||
// Can't accept new response
|
||||
assign icache_rsp_if.ready = !total_freeze;
|
||||
assign icache_rsp_if.ready = ifetch_rsp_if.ready;
|
||||
|
||||
`SCOPE_ASSIGN(scope_icache_req_valid, icache_req_if.valid);
|
||||
`SCOPE_ASSIGN(scope_icache_req_warp_num, fe_inst_meta_fi.warp_num);
|
||||
`SCOPE_ASSIGN(scope_icache_req_warp_num, ifetch_req_if.warp_num);
|
||||
`SCOPE_ASSIGN(scope_icache_req_addr, {icache_req_if.addr, 2'b0});
|
||||
`SCOPE_ASSIGN(scope_icache_req_tag, icache_req_if.tag);
|
||||
`SCOPE_ASSIGN(scope_icache_req_ready, icache_req_if.ready);
|
||||
|
@ -94,10 +92,10 @@ module VX_icache_stage #(
|
|||
`ifdef DBG_PRINT_CORE_ICACHE
|
||||
always @(posedge clk) begin
|
||||
if (icache_req_if.valid && icache_req_if.ready) begin
|
||||
$display("%t: I%0d$ req: tag=%0h, pc=%0h, warp=%0d", $time, CORE_ID, mrq_write_addr, fe_inst_meta_fi.curr_PC, fe_inst_meta_fi.warp_num);
|
||||
$display("%t: I$%0d req: tag=%0h, PC=%0h, warp=%0d", $time, CORE_ID, mrq_write_addr, ifetch_req_if.curr_PC, ifetch_req_if.warp_num);
|
||||
end
|
||||
if (icache_rsp_if.valid && icache_rsp_if.ready) begin
|
||||
$display("%t: I%0d$ rsp: tag=%0h, pc=%0h, warp=%0d, instr=%0h", $time, CORE_ID, mrq_read_addr, fe_inst_meta_id.curr_PC, fe_inst_meta_id.warp_num, fe_inst_meta_id.instruction);
|
||||
$display("%t: I$%0d rsp: tag=%0h, PC=%0h, warp=%0d, instr=%0h", $time, CORE_ID, mrq_read_addr, ifetch_rsp_if.curr_PC, ifetch_rsp_if.warp_num, ifetch_rsp_if.instr);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -1,90 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_inst_multiplex (
|
||||
// Inputs
|
||||
VX_backend_req_if bckE_req_if,
|
||||
VX_gpr_read_if gpr_read_if,
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_if exec_unit_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_gpu_inst_req_if gpu_inst_req_if,
|
||||
VX_csr_req_if csr_req_if
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0] is_mem_mask;
|
||||
wire[`NUM_THREADS-1:0] is_gpu_mask;
|
||||
wire[`NUM_THREADS-1:0] is_csr_mask;
|
||||
|
||||
wire is_mem = (bckE_req_if.mem_write != `BYTE_EN_NO) || (bckE_req_if.mem_read != `BYTE_EN_NO);
|
||||
wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split);
|
||||
wire is_csr = bckE_req_if.is_csr;
|
||||
// wire is_gpu = 0;
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin : mask_init
|
||||
assign is_mem_mask[i] = is_mem;
|
||||
assign is_gpu_mask[i] = is_gpu;
|
||||
assign is_csr_mask[i] = is_csr;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// LSU Unit
|
||||
assign lsu_req_if.valid = bckE_req_if.valid & is_mem_mask;
|
||||
assign lsu_req_if.warp_num = bckE_req_if.warp_num;
|
||||
assign lsu_req_if.base_addr = gpr_read_if.a_reg_data;
|
||||
assign lsu_req_if.store_data = gpr_read_if.b_reg_data;
|
||||
|
||||
assign lsu_req_if.offset = bckE_req_if.itype_immed;
|
||||
|
||||
assign lsu_req_if.mem_read = bckE_req_if.mem_read;
|
||||
assign lsu_req_if.mem_write = bckE_req_if.mem_write;
|
||||
assign lsu_req_if.rd = bckE_req_if.rd;
|
||||
assign lsu_req_if.wb = bckE_req_if.wb;
|
||||
assign lsu_req_if.curr_PC = bckE_req_if.curr_PC;
|
||||
|
||||
// Execute Unit
|
||||
assign exec_unit_req_if.valid = bckE_req_if.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
|
||||
assign exec_unit_req_if.warp_num = bckE_req_if.warp_num;
|
||||
assign exec_unit_req_if.curr_PC = bckE_req_if.curr_PC;
|
||||
assign exec_unit_req_if.next_PC = bckE_req_if.next_PC;
|
||||
assign exec_unit_req_if.rd = bckE_req_if.rd;
|
||||
assign exec_unit_req_if.wb = bckE_req_if.wb;
|
||||
assign exec_unit_req_if.a_reg_data = gpr_read_if.a_reg_data;
|
||||
assign exec_unit_req_if.b_reg_data = gpr_read_if.b_reg_data;
|
||||
assign exec_unit_req_if.alu_op = bckE_req_if.alu_op;
|
||||
assign exec_unit_req_if.rs1 = bckE_req_if.rs1;
|
||||
assign exec_unit_req_if.rs2 = bckE_req_if.rs2;
|
||||
assign exec_unit_req_if.rs2_src = bckE_req_if.rs2_src;
|
||||
assign exec_unit_req_if.itype_immed = bckE_req_if.itype_immed;
|
||||
assign exec_unit_req_if.upper_immed = bckE_req_if.upper_immed;
|
||||
assign exec_unit_req_if.branch_type = bckE_req_if.branch_type;
|
||||
assign exec_unit_req_if.is_jal = bckE_req_if.is_jal;
|
||||
assign exec_unit_req_if.jal = bckE_req_if.jal;
|
||||
assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset;
|
||||
assign exec_unit_req_if.is_etype = bckE_req_if.is_etype;
|
||||
|
||||
// GPR Req
|
||||
assign gpu_inst_req_if.valid = bckE_req_if.valid & is_gpu_mask;
|
||||
assign gpu_inst_req_if.warp_num = bckE_req_if.warp_num;
|
||||
assign gpu_inst_req_if.is_wspawn = bckE_req_if.is_wspawn;
|
||||
assign gpu_inst_req_if.is_tmc = bckE_req_if.is_tmc;
|
||||
assign gpu_inst_req_if.is_split = bckE_req_if.is_split;
|
||||
assign gpu_inst_req_if.is_barrier = bckE_req_if.is_barrier;
|
||||
assign gpu_inst_req_if.a_reg_data = gpr_read_if.a_reg_data;
|
||||
assign gpu_inst_req_if.rd2 = gpr_read_if.b_reg_data[0];
|
||||
assign gpu_inst_req_if.next_PC = bckE_req_if.next_PC;
|
||||
|
||||
// CSR Req
|
||||
assign csr_req_if.valid = bckE_req_if.valid & is_csr_mask;
|
||||
assign csr_req_if.warp_num = bckE_req_if.warp_num;
|
||||
assign csr_req_if.rd = bckE_req_if.rd;
|
||||
assign csr_req_if.wb = bckE_req_if.wb;
|
||||
assign csr_req_if.alu_op = bckE_req_if.alu_op;
|
||||
assign csr_req_if.is_csr = bckE_req_if.is_csr;
|
||||
assign csr_req_if.csr_addr = bckE_req_if.csr_addr;
|
||||
assign csr_req_if.csr_immed = bckE_req_if.csr_immed;
|
||||
assign csr_req_if.csr_mask = bckE_req_if.csr_mask;
|
||||
|
||||
endmodule
|
87
hw/rtl/VX_issue.v
Normal file
87
hw/rtl/VX_issue.v
Normal file
|
@ -0,0 +1,87 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
|
||||
VX_execute_if execute_if,
|
||||
|
||||
output wire is_empty
|
||||
);
|
||||
localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1);
|
||||
|
||||
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
||||
reg [CTVW-1:0] count_valid;
|
||||
|
||||
wire rs1_rename = (rename_table[decode_if.warp_num][decode_if.rs1] != 0);
|
||||
wire rs2_rename = (rename_table[decode_if.warp_num][decode_if.rs2] != 0);
|
||||
wire rd_rename = (rename_table[decode_if.warp_num][decode_if.rd ] != 0);
|
||||
|
||||
wire rs1_rename_qual = (rs1_rename) && (decode_if.use_rs1);
|
||||
wire rs2_rename_qual = (rs2_rename) && (decode_if.use_rs2);
|
||||
wire rd_rename_qual = (rd_rename) && (decode_if.wb != 0);
|
||||
|
||||
wire rename_valid = (| decode_if.valid) && (rs1_rename_qual || rs2_rename_qual || rd_rename_qual);
|
||||
|
||||
wire ex_stalled = (| decode_if.valid)
|
||||
&& ((!execute_if.alu_ready && (decode_if.ex_type == `EX_ALU))
|
||||
|| (!execute_if.br_ready && (decode_if.ex_type == `EX_BR))
|
||||
|| (!execute_if.lsu_ready && (decode_if.ex_type == `EX_LSU))
|
||||
|| (!execute_if.csr_ready && (decode_if.ex_type == `EX_CSR))
|
||||
|| (!execute_if.mul_ready && (decode_if.ex_type == `EX_MUL))
|
||||
|| (!execute_if.gpu_ready && (decode_if.ex_type == `EX_GPU)));
|
||||
|
||||
wire stall = rename_valid || ex_stalled;
|
||||
|
||||
wire acquire_rd = (| decode_if.valid) && (decode_if.wb != 0) && (decode_if.rd != 0) && ~stall;
|
||||
|
||||
wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0);
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid;
|
||||
|
||||
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
|
||||
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
|
||||
count_valid;
|
||||
integer i, w;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (w = 0; w < `NUM_WARPS; w++) begin
|
||||
for (i = 0; i < 32; i++) begin
|
||||
rename_table[w][i] <= 0;
|
||||
end
|
||||
end
|
||||
count_valid <= 0;
|
||||
end else begin
|
||||
if (acquire_rd) begin
|
||||
rename_table[decode_if.warp_num][decode_if.rd] <= decode_if.valid;
|
||||
end
|
||||
if (release_rd) begin
|
||||
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
|
||||
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
|
||||
end
|
||||
count_valid <= count_valid_next;
|
||||
end
|
||||
end
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + `WB_BITS),
|
||||
) schedule_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.instr_op, decode_if.wb}),
|
||||
.out ({execute_if.valid, execute_if.warp_num, execute_if.curr_PC, execute_if.next_PC, execute_if.rd, execute_if.rs1, execute_if.rs2, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.ex_type, execute_if.instr_op, execute_if.wb})
|
||||
);
|
||||
|
||||
assign decode_if.ready = ~stall;
|
||||
|
||||
assign is_empty = (0 == count_valid);
|
||||
|
||||
endmodule
|
|
@ -5,23 +5,19 @@ module VX_lsu_unit #(
|
|||
) (
|
||||
`SCOPE_SIGNALS_LSU_IO
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire no_slot_mem,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
|
||||
// Write back to GPR
|
||||
VX_wb_if mem_wb_if,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Dcache interface
|
||||
VX_cache_core_req_if dcache_req_if,
|
||||
VX_cache_core_rsp_if dcache_rsp_if,
|
||||
|
||||
output wire delay
|
||||
);
|
||||
// inputs
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
|
||||
VX_wb_if mem_wb_unqual_if();
|
||||
// outputs
|
||||
VX_wb_if lsu_wb_if
|
||||
);
|
||||
|
||||
wire [`NUM_THREADS-1:0] use_valid;
|
||||
wire use_req_rw;
|
||||
|
@ -29,28 +25,25 @@ module VX_lsu_unit #(
|
|||
wire [`NUM_THREADS-1:0][1:0] use_req_offset;
|
||||
wire [`NUM_THREADS-1:0][3:0] use_req_byteen;
|
||||
wire [`NUM_THREADS-1:0][31:0] use_req_data;
|
||||
wire [`BYTE_EN_BITS-1:0] use_mem_read;
|
||||
wire [4:0] use_rd;
|
||||
wire [`BYTEEN_BITS-1:0] mem_byteen;
|
||||
wire [`NR_BITS-1:0] use_rd;
|
||||
wire [`NW_BITS-1:0] use_warp_num;
|
||||
wire [1:0] use_wb;
|
||||
wire [`WB_BITS-1:0] use_wb;
|
||||
wire [31:0] use_pc;
|
||||
|
||||
genvar i;
|
||||
|
||||
// Generate Full Addresses
|
||||
wire[`NUM_THREADS-1:0][31:0] full_address;
|
||||
wire [`NUM_THREADS-1:0][31:0] full_address;
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign full_address[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset;
|
||||
end
|
||||
|
||||
wire mem_req_rw = (lsu_req_if.mem_write != `BYTE_EN_NO);
|
||||
|
||||
reg [3:0] wmask;
|
||||
always @(*) begin
|
||||
case ((mem_req_rw ? lsu_req_if.mem_write[1:0] : lsu_req_if.mem_read[1:0]))
|
||||
0: wmask = 4'b0001;
|
||||
1: wmask = 4'b0011;
|
||||
default : wmask = 4'b1111;
|
||||
case (lsu_req_if.byteen)
|
||||
0: wmask = 4'b0001;
|
||||
1: wmask = 4'b0011;
|
||||
default: wmask = 4'b1111;
|
||||
endcase
|
||||
end
|
||||
|
||||
|
@ -64,29 +57,32 @@ module VX_lsu_unit #(
|
|||
assign mem_req_offset[i] = full_address[i][1:0];
|
||||
assign mem_req_byteen[i] = wmask << full_address[i][1:0];
|
||||
assign mem_req_data[i] = lsu_req_if.store_data[i] << {mem_req_offset[i], 3'b0};
|
||||
end
|
||||
end
|
||||
|
||||
// Can accept new request
|
||||
wire stall = ~dcache_req_if.ready || mrq_full;
|
||||
assign lsu_req_if.ready = ~stall;
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire[`NUM_THREADS-1:0][31:0] use_address;
|
||||
wire [`NUM_THREADS-1:0][31:0] use_address;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
VX_generic_register #(
|
||||
.N((`NUM_THREADS * 1) + (`NUM_THREADS * 32) + `BYTE_EN_BITS + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + 5 + `NW_BITS + 2 + 32)
|
||||
) lsu_buffer (
|
||||
.N(`NUM_THREADS + (`NUM_THREADS * 32) + `BYTEEN_BITS + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + `NW_BITS + `WB_BITS + 32)
|
||||
) mem_req_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (delay),
|
||||
.flush (1'b0),
|
||||
.in ({lsu_req_if.valid, full_address, lsu_req_if.mem_read, mem_req_rw, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.curr_PC}),
|
||||
.out ({use_valid , use_address, use_mem_read , use_req_rw, use_req_addr, use_req_offset, use_req_byteen, use_req_data, use_rd , use_warp_num , use_wb , use_pc})
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({lsu_req_if.valid, full_address, lsu_req_if.byteen, lsu_req_if.rw, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.curr_PC}),
|
||||
.out ({use_valid , use_address, mem_byteen , use_req_rw, use_req_addr, use_req_offset, use_req_byteen, use_req_data, use_rd , use_warp_num , use_wb , use_pc})
|
||||
);
|
||||
|
||||
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset;
|
||||
wire [`BYTE_EN_BITS-1:0] core_rsp_mem_read;
|
||||
|
||||
reg [`NUM_THREADS-1:0] mem_rsp_mask[`DCREQ_SIZE-1:0];
|
||||
|
||||
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr, dbg_mrq_write_addr;
|
||||
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, dbg_mrq_write_addr;
|
||||
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset;
|
||||
wire [`BYTEEN_BITS-1:0] core_rsp_mem_read;
|
||||
wire mrq_full;
|
||||
|
||||
wire mrq_push = (| dcache_req_if.valid) && dcache_req_if.ready
|
||||
|
@ -94,25 +90,25 @@ module VX_lsu_unit #(
|
|||
|
||||
wire mrq_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
||||
|
||||
assign mrq_read_addr = dcache_rsp_if.tag[0][`LOG2UP(`DCREQ_SIZE)-1:0];
|
||||
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_read_addr = dcache_rsp_if.tag[0][`LOG2UP(`DCREQ_SIZE)-1:0];
|
||||
|
||||
wire [`NUM_THREADS-1:0] mem_rsp_mask_upd = mem_rsp_mask[mrq_read_addr] & ~dcache_rsp_if.valid;
|
||||
|
||||
wire mrq_pop = mrq_pop_part && (0 == mem_rsp_mask_upd);
|
||||
|
||||
VX_indexable_queue #(
|
||||
.DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + 2 + (`NUM_THREADS * 2) + `BYTE_EN_BITS + 5 + `NW_BITS),
|
||||
VX_index_queue #(
|
||||
.DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + `WB_BITS + (`NUM_THREADS * 2) + `BYTEEN_BITS + `NR_BITS + `NW_BITS),
|
||||
.SIZE (`DCREQ_SIZE)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({mrq_write_addr, use_pc, use_wb, use_req_offset, use_mem_read, use_rd, use_warp_num}),
|
||||
.write_data ({mrq_write_addr, use_pc, use_wb, use_req_offset, mem_byteen, use_rd, use_warp_num}),
|
||||
.write_addr (mrq_write_addr),
|
||||
.push (mrq_push),
|
||||
.full (mrq_full),
|
||||
.pop (mrq_pop),
|
||||
.read_addr (mrq_read_addr),
|
||||
.read_data ({dbg_mrq_write_addr, mem_wb_unqual_if.curr_PC, mem_wb_unqual_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_unqual_if.rd, mem_wb_unqual_if.warp_num}),
|
||||
.read_data ({dbg_mrq_write_addr, lsu_wb_if.curr_PC, lsu_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, lsu_wb_if.rd, lsu_wb_if.warp_num}),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
|
@ -127,7 +123,6 @@ module VX_lsu_unit #(
|
|||
end
|
||||
|
||||
// Core Request
|
||||
|
||||
assign dcache_req_if.valid = use_valid & {`NUM_THREADS{~mrq_full}};
|
||||
assign dcache_req_if.rw = {`NUM_THREADS{use_req_rw}};
|
||||
assign dcache_req_if.byteen = use_req_byteen;
|
||||
|
@ -140,43 +135,27 @@ module VX_lsu_unit #(
|
|||
assign dcache_req_if.tag = mrq_write_addr;
|
||||
`endif
|
||||
|
||||
// Can't accept new request
|
||||
assign delay = mrq_full || !dcache_req_if.ready;
|
||||
|
||||
// Core Response
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] core_rsp_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rsp_data_shifted;
|
||||
reg [`NUM_THREADS-1:0][31:0] core_rsp_data;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign rsp_data_shifted[i] = dcache_rsp_if.data[i] >> {mem_rsp_offset[i], 3'b0};
|
||||
wire [15:0] rsp_data_shifted = 16'(dcache_rsp_if.data[i] >> {mem_rsp_offset[i], 3'b0});
|
||||
always @(*) begin
|
||||
case (core_rsp_mem_read)
|
||||
`BYTE_EN_SB: core_rsp_data[i] = {{24{rsp_data_shifted[i][7]}}, rsp_data_shifted[i][7:0]};
|
||||
`BYTE_EN_SH: core_rsp_data[i] = {{16{rsp_data_shifted[i][15]}}, rsp_data_shifted[i][15:0]};
|
||||
`BYTE_EN_UB: core_rsp_data[i] = 32'(rsp_data_shifted[i][7:0]);
|
||||
`BYTE_EN_UH: core_rsp_data[i] = 32'(rsp_data_shifted[i][15:0]);
|
||||
default : core_rsp_data[i] = rsp_data_shifted[i];
|
||||
`BYTEEN_SB: core_rsp_data[i] = {{24{rsp_data_shifted[7]}}, rsp_data_shifted[7:0]};
|
||||
`BYTEEN_UB: core_rsp_data[i] = 32'(rsp_data_shifted[7:0]);
|
||||
`BYTEEN_SH: core_rsp_data[i] = {{16{rsp_data_shifted[15]}}, rsp_data_shifted[15:0]};
|
||||
`BYTEEN_UH: core_rsp_data[i] = 32'(rsp_data_shifted[15:0]);
|
||||
default: core_rsp_data[i] = dcache_rsp_if.data[i];
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_wb_unqual_if.valid = dcache_rsp_if.valid;
|
||||
assign mem_wb_unqual_if.data = core_rsp_data;
|
||||
assign lsu_wb_if.valid = dcache_rsp_if.valid;
|
||||
assign lsu_wb_if.data = core_rsp_data;
|
||||
|
||||
// Can't accept new response
|
||||
assign dcache_rsp_if.ready = !(no_slot_mem & (|mem_wb_if.valid));
|
||||
|
||||
// From LSU to WB
|
||||
localparam WB_REQ_SIZE = (`NUM_THREADS) + (`NUM_THREADS * 32) + (`NW_BITS) + (5) + (2) + 32;
|
||||
VX_generic_register #(.N(WB_REQ_SIZE)) lsu_to_wb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (no_slot_mem),
|
||||
.flush (1'b0),
|
||||
.in ({mem_wb_unqual_if.valid, mem_wb_unqual_if.data, mem_wb_unqual_if.warp_num, mem_wb_unqual_if.rd, mem_wb_unqual_if.wb, mem_wb_unqual_if.curr_PC}),
|
||||
.out ({mem_wb_if.valid, mem_wb_if.data, mem_wb_if.warp_num, mem_wb_if.rd, mem_wb_if.wb, mem_wb_if.curr_PC})
|
||||
);
|
||||
// Can accept new cache response
|
||||
assign dcache_rsp_if.ready = lsu_wb_if.ready;
|
||||
|
||||
`SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.valid);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num);
|
||||
|
@ -196,12 +175,12 @@ module VX_lsu_unit #(
|
|||
`ifdef DBG_PRINT_CORE_DCACHE
|
||||
always @(posedge clk) begin
|
||||
if ((| dcache_req_if.valid) && dcache_req_if.ready) begin
|
||||
$display("%t: D%0d$ req: valid=%b, addr=%0h, tag=%0h, rw=%0b, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h",
|
||||
$time, CORE_ID, use_valid, use_address, mrq_write_addr, use_req_rw, use_pc, use_rd, use_warp_num, use_req_byteen, use_req_data);
|
||||
$display("%t: D$%0d req: valid=%b, warp=%0d, PC=%0h, addr=%0h, tag=%0h, rw=%0b, rd=%0d, byteen=%0h, data=%0h",
|
||||
$time, CORE_ID, use_valid, use_warp_num, use_pc, use_address, mrq_write_addr, use_req_rw, use_rd, use_req_byteen, use_req_data);
|
||||
end
|
||||
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
|
||||
$display("%t: D%0d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h",
|
||||
$time, CORE_ID, mem_wb_unqual_if.valid, mrq_read_addr, mem_wb_unqual_if.curr_PC, mem_wb_unqual_if.rd, mem_wb_unqual_if.warp_num, mem_wb_unqual_if.data);
|
||||
$display("%t: D$%0d rsp: valid=%b, warp=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
|
||||
$time, CORE_ID, lsu_wb_if.valid, lsu_wb_if.warp_num, lsu_wb_if.curr_PC, mrq_read_addr, lsu_wb_if.rd, lsu_wb_if.data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -40,18 +40,20 @@ module VX_mem_unit # (
|
|||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS)
|
||||
) core_dcache_rsp_qual_if(), core_smem_rsp_if();
|
||||
|
||||
// select shared memory address
|
||||
wire is_smem_addr = (({core_dcache_req_if.addr[0], 2'b0} - `SHARED_MEM_BASE_ADDR) <= `SCACHE_SIZE);
|
||||
wire smem_select = (| core_dcache_req_if.valid) ? is_smem_addr : 0;
|
||||
// select shared memory bus
|
||||
wire is_smem_addr = (({core_dcache_req_if.addr[0], 2'b0} - `SHARED_MEM_BASE_ADDR) <= `SCACHE_SIZE);
|
||||
wire smem_req_select = (| core_dcache_req_if.valid) ? is_smem_addr : 0;
|
||||
wire smem_rsp_select = (| core_smem_rsp_if.valid);
|
||||
|
||||
VX_dcache_arb dcache_smem_arb (
|
||||
.req_select (smem_select),
|
||||
.in_core_req_if (core_dcache_req_if),
|
||||
.out0_core_req_if (core_dcache_req_qual_if),
|
||||
.out1_core_req_if (core_smem_req_if),
|
||||
.in0_core_rsp_if (core_dcache_rsp_qual_if),
|
||||
.in1_core_rsp_if (core_smem_rsp_if),
|
||||
.out_core_rsp_if (core_dcache_rsp_if)
|
||||
VX_dcache_arb dcache_smem_arb (
|
||||
.core_req_in_if (core_dcache_req_if),
|
||||
.core_req_out0_if (core_dcache_req_qual_if),
|
||||
.core_req_out1_if (core_smem_req_if),
|
||||
.core_rsp_in0_if (core_dcache_rsp_qual_if),
|
||||
.core_rsp_in1_if (core_smem_rsp_if),
|
||||
.core_rsp_out_if (core_dcache_rsp_if),
|
||||
.select_req (smem_req_select),
|
||||
.select_rsp (smem_rsp_select)
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
|
|
123
hw/rtl/VX_mul_unit.v
Normal file
123
hw/rtl/VX_mul_unit.v
Normal file
|
@ -0,0 +1,123 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_mul_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_mul_req_if mul_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_wb_if mul_wb_if
|
||||
);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire [`NUM_THREADS-1:0][63:0] mul_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] div_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] rem_result;
|
||||
|
||||
wire [`MUL_BITS-1:0] alu_op = mul_req_if.mul_op;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = mul_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = mul_req_if.rs2_data;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
||||
wire [32:0] mul_in1 = {(alu_op != `MUL_MULHU) & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] mul_in2 = {(alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU) & alu_in2[i][31], alu_in2[i]};
|
||||
|
||||
wire [32:0] div_in1 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] div_in2 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in2[i][31], alu_in2[i]};
|
||||
|
||||
VX_mult #(
|
||||
.WIDTHA(33),
|
||||
.WIDTHB(33),
|
||||
.WIDTHP(64),
|
||||
.SIGNED(1),
|
||||
.PIPELINE(`MUL_LATENCY)
|
||||
) multiplier (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.dataa(mul_in1),
|
||||
.datab(mul_in2),
|
||||
.result(mul_result[i])
|
||||
);
|
||||
|
||||
VX_divide #(
|
||||
.WIDTHN(33),
|
||||
.WIDTHD(33),
|
||||
.WIDTHQ(32),
|
||||
.WIDTHR(32),
|
||||
.NSIGNED(1),
|
||||
.DSIGNED(1),
|
||||
.PIPELINE(`DIV_LATENCY)
|
||||
) sdiv (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.numer(div_in1),
|
||||
.denom(div_in2),
|
||||
.quotient(div_result[i]),
|
||||
.remainder(rem_result[i])
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`MUL_MUL: alu_result[i] = mul_result[i][31:0];
|
||||
`MUL_MULH,
|
||||
`MUL_MULHSU,
|
||||
`MUL_MULHU: alu_result[i] = mul_result[i][63:32];
|
||||
`MUL_DIV,
|
||||
`MUL_DIVU: alu_result[i] = (alu_in2[i] == 0) ? 32'hffffffff : div_result[i];
|
||||
`MUL_REM,
|
||||
`MUL_REMU: alu_result[i] = (alu_in2 == 0) ? alu_in1[i] : rem_result[i];
|
||||
default: alu_result[i] = alu_in1[i] + alu_in2[i]; // ADD, LUI, AUIPC, FENCE
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
reg result_avail;
|
||||
reg [4:0] pending_ctr;
|
||||
wire [4:0] instr_delay = `IS_DIV_OP(alu_op) ? `DIV_LATENCY : `MUL_LATENCY;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
result_avail <= 0;
|
||||
pending_ctr <= 0;
|
||||
end else begin
|
||||
if (result_avail && !stall) begin
|
||||
result_avail <= 0;
|
||||
pending_ctr <= 0;
|
||||
end
|
||||
if ((| mul_req_if.valid) && (pending_ctr == 0)) begin
|
||||
pending_ctr <= instr_delay - 1;
|
||||
if (instr_delay == 1)
|
||||
result_avail <= 1;
|
||||
end else if (pending_ctr != 0) begin
|
||||
pending_ctr <= pending_ctr - 1;
|
||||
if (pending_ctr == 1)
|
||||
result_avail <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire pipeline_stall = ~result_avail && (| mul_req_if.valid);
|
||||
|
||||
wire stall = (~mul_wb_if.ready && (| mul_wb_if.valid))
|
||||
|| pipeline_stall;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)),
|
||||
) mul_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.rd, mul_req_if.wb, alu_result}),
|
||||
.out ({mul_wb_if.valid, mul_wb_if.warp_num, mul_wb_if.curr_PC, mul_wb_if.rd, mul_wb_if.wb, mul_wb_if.data})
|
||||
);
|
||||
|
||||
assign mul_req_if.ready = ~stall;
|
||||
|
||||
endmodule
|
|
@ -58,16 +58,6 @@ module VX_pipeline #(
|
|||
output wire busy,
|
||||
output wire ebreak
|
||||
);
|
||||
|
||||
`DEBUG_BEGIN
|
||||
wire scheduler_empty;
|
||||
`DEBUG_END
|
||||
|
||||
wire memory_delay;
|
||||
wire exec_delay;
|
||||
wire gpr_stage_delay;
|
||||
wire schedule_delay;
|
||||
|
||||
// Dcache
|
||||
VX_cache_core_req_if #(
|
||||
.NUM_REQUESTS(`NUM_THREADS),
|
||||
|
@ -98,7 +88,6 @@ module VX_pipeline #(
|
|||
.CORE_TAG_ID_BITS(`ICORE_TAG_ID_BITS)
|
||||
) core_icache_rsp_if();
|
||||
|
||||
|
||||
// CSR I/O
|
||||
VX_csr_io_req_if csr_io_req_if();
|
||||
assign csr_io_req_if.valid = csr_io_req_valid;
|
||||
|
@ -112,69 +101,95 @@ module VX_pipeline #(
|
|||
assign csr_io_rsp_data = csr_io_rsp_if.data;
|
||||
assign csr_io_rsp_if.ready = csr_io_rsp_ready;
|
||||
|
||||
// Front-end to Back-end
|
||||
VX_backend_req_if bckE_req_if();
|
||||
|
||||
// Back-end to Front-end
|
||||
VX_wb_if writeback_if();
|
||||
VX_decode_if decode_if();
|
||||
VX_execute_if execute_if();
|
||||
VX_branch_rsp_if branch_rsp_if();
|
||||
VX_jal_rsp_if jal_rsp_if();
|
||||
|
||||
// Warp controls
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
VX_ifetch_rsp_if ifetch_rsp_if();
|
||||
VX_wb_if writeback_if();
|
||||
VX_wstall_if wstall_if();
|
||||
VX_join_if join_if();
|
||||
VX_wb_if alu_wb_if();
|
||||
VX_wb_if branch_wb_if();
|
||||
VX_wb_if lsu_wb_if();
|
||||
VX_wb_if csr_wb_if();
|
||||
VX_wb_if mul_wb_if();
|
||||
|
||||
VX_front_end #(
|
||||
wire notify_commit;
|
||||
|
||||
VX_fetch #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) front_end (
|
||||
`SCOPE_SIGNALS_ISTAGE_BIND
|
||||
|
||||
) fetch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.icache_rsp_if (core_icache_rsp_if),
|
||||
.icache_req_if (core_icache_req_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.icache_rsp_if (core_icache_rsp_if),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.ifetch_rsp_if (ifetch_rsp_if),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
VX_scheduler scheduler (
|
||||
VX_decode #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) decode (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ifetch_rsp_if (ifetch_rsp_if),
|
||||
.decode_if (decode_if),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if)
|
||||
);
|
||||
|
||||
VX_issue #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) issue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.execute_if (execute_if),
|
||||
`UNUSED_PIN (is_empty)
|
||||
);
|
||||
|
||||
VX_execute #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) execute (
|
||||
`SCOPE_SIGNALS_LSU_BIND
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.dcache_req_if (core_dcache_req_if),
|
||||
.dcache_rsp_if (core_dcache_rsp_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.execute_if (execute_if),
|
||||
.writeback_if (writeback_if),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.alu_wb_if (alu_wb_if),
|
||||
.branch_wb_if (branch_wb_if),
|
||||
.lsu_wb_if (lsu_wb_if),
|
||||
.csr_wb_if (csr_wb_if),
|
||||
.mul_wb_if (mul_wb_if),
|
||||
.notify_commit (notify_commit),
|
||||
.ebreak (ebreak)
|
||||
);
|
||||
|
||||
VX_writeback #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) writeback (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.memory_delay (memory_delay),
|
||||
.exec_delay (exec_delay),
|
||||
.gpr_stage_delay(gpr_stage_delay),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.alu_wb_if (alu_wb_if),
|
||||
.branch_wb_if (branch_wb_if),
|
||||
.lsu_wb_if (lsu_wb_if),
|
||||
.csr_wb_if (csr_wb_if),
|
||||
.mul_wb_if (mul_wb_if),
|
||||
.writeback_if (writeback_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.is_empty (scheduler_empty)
|
||||
);
|
||||
|
||||
VX_back_end #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) back_end (
|
||||
`SCOPE_SIGNALS_LSU_BIND
|
||||
`SCOPE_SIGNALS_BE_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.dcache_req_if (core_dcache_req_if),
|
||||
.dcache_rsp_if (core_dcache_rsp_if),
|
||||
.writeback_if (writeback_if),
|
||||
.mem_delay (memory_delay),
|
||||
.exec_delay (exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay),
|
||||
.ebreak (ebreak)
|
||||
);
|
||||
.notify_commit (notify_commit)
|
||||
);
|
||||
|
||||
assign dcache_req_valid = core_dcache_req_if.valid;
|
||||
assign dcache_req_rw = core_dcache_req_if.rw;
|
||||
|
@ -204,17 +219,14 @@ module VX_pipeline #(
|
|||
|
||||
`SCOPE_ASSIGN(scope_busy, busy);
|
||||
`SCOPE_ASSIGN(scope_schedule_delay, schedule_delay);
|
||||
`SCOPE_ASSIGN(scope_memory_delay, memory_delay);
|
||||
`SCOPE_ASSIGN(scope_mem_delay, mem_delay);
|
||||
`SCOPE_ASSIGN(scope_exec_delay, exec_delay);
|
||||
`SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_stage_delay);
|
||||
`SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_delay);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if ((| writeback_if.valid) && (writeback_if.wb != 0)) begin
|
||||
$display("%t: Core%0d-WB: warp=%0d, rd=%0d, data=%0h", $time, CORE_ID, writeback_if.warp_num, writeback_if.rd, writeback_if.data);
|
||||
end
|
||||
if (schedule_delay || memory_delay || exec_delay || gpr_stage_delay) begin
|
||||
$display("%t: Core%0d-Delay: sched=%b, mem=%b, exec=%b, gpr=%b ", $time, CORE_ID, schedule_delay, memory_delay, exec_delay, gpr_stage_delay);
|
||||
if ((| execute_if.valid) && (~execute_if.alu_ready || ~execute_if.br_ready || ~execute_if.lsu_ready || ~execute_if.csr_ready || ~execute_if.mul_ready || ~execute_if.gpu_ready)) begin
|
||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, alu=%b, br=%b, lsu=%b, csr=%b, mul=%b, gpu=%b", $time, CORE_ID, execute_if.warp_num, execute_if.curr_PC, ~execute_if.alu_ready, ~execute_if.br_ready, ~execute_if.lsu_ready, ~execute_if.csr_ready, ~execute_if.mul_ready, ~execute_if.gpu_ready);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -1,83 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_scheduler (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire memory_delay,
|
||||
input wire exec_delay,
|
||||
input wire gpr_stage_delay,
|
||||
|
||||
VX_backend_req_if bckE_req_if,
|
||||
VX_wb_if writeback_if,
|
||||
|
||||
output wire schedule_delay,
|
||||
output wire is_empty
|
||||
);
|
||||
localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1);
|
||||
|
||||
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
||||
reg [CTVW-1:0] count_valid;
|
||||
|
||||
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
|
||||
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
|
||||
wire is_mem = (is_store || is_load);
|
||||
wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split);
|
||||
wire is_csr = bckE_req_if.is_csr;
|
||||
wire is_exec = !is_mem && !is_gpu && !is_csr;
|
||||
|
||||
wire using_rs2 = is_store
|
||||
|| (bckE_req_if.rs2_src == `RS2_REG)
|
||||
|| bckE_req_if.is_barrier
|
||||
|| bckE_req_if.is_wspawn;
|
||||
|
||||
wire rs1_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0);
|
||||
wire rs2_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0);
|
||||
wire rd_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0);
|
||||
|
||||
wire rs1_rename_qual = (rs1_rename) && (bckE_req_if.rs1 != 0);
|
||||
wire rs2_rename_qual = (rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2);
|
||||
wire rd_rename_qual = (rd_rename) && (bckE_req_if.rd != 0);
|
||||
|
||||
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
|
||||
|
||||
assign schedule_delay = (| bckE_req_if.valid)
|
||||
&& ((rename_valid)
|
||||
|| (memory_delay && is_mem)
|
||||
|| (gpr_stage_delay && (is_mem || is_exec))
|
||||
|| (exec_delay && is_exec));
|
||||
|
||||
assign is_empty = (count_valid == 0);
|
||||
|
||||
integer i, w;
|
||||
|
||||
wire acquire_rd = (| bckE_req_if.valid) && (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0) && !schedule_delay;
|
||||
|
||||
wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0);
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid;
|
||||
|
||||
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
|
||||
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
|
||||
count_valid;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (w = 0; w < `NUM_WARPS; w++) begin
|
||||
for (i = 0; i < 32; i++) begin
|
||||
rename_table[w][i] <= 0;
|
||||
end
|
||||
end
|
||||
count_valid <= 0;
|
||||
end else begin
|
||||
if (acquire_rd) begin
|
||||
rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid;
|
||||
end
|
||||
if (release_rd) begin
|
||||
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
|
||||
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
|
||||
end
|
||||
count_valid <= count_valid_next;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -74,7 +74,7 @@
|
|||
scope_execute_valid, \
|
||||
scope_writeback_valid, \
|
||||
scope_schedule_delay, \
|
||||
scope_memory_delay, \
|
||||
scope_mem_delay, \
|
||||
scope_exec_delay, \
|
||||
scope_gpr_stage_delay, \
|
||||
scope_busy, \
|
||||
|
@ -127,26 +127,26 @@
|
|||
wire scope_busy; \
|
||||
wire scope_snp_rsp_ready; \
|
||||
wire scope_schedule_delay; \
|
||||
wire scope_memory_delay; \
|
||||
wire scope_mem_delay; \
|
||||
wire scope_exec_delay; \
|
||||
wire scope_gpr_stage_delay; \
|
||||
wire [`NUM_THREADS-1:0] scope_decode_valid; \
|
||||
wire [`NW_BITS-1:0] scope_decode_warp_num; \
|
||||
wire [31:0] scope_decode_curr_PC; \
|
||||
wire scope_decode_is_jal; \
|
||||
wire [4:0] scope_decode_rs1; \
|
||||
wire [4:0] scope_decode_rs2; \
|
||||
wire [`NR_BITS-1:0] scope_decode_rs1; \
|
||||
wire [`NR_BITS-1:0] scope_decode_rs2; \
|
||||
wire [`NUM_THREADS-1:0] scope_execute_valid; \
|
||||
wire [`NW_BITS-1:0] scope_execute_warp_num; \
|
||||
wire [31:0] scope_execute_curr_PC; \
|
||||
wire [4:0] scope_execute_rd; \
|
||||
wire [`NR_BITS-1:0] scope_execute_rd; \
|
||||
wire [63:0] scope_execute_a; \
|
||||
wire [63:0] scope_execute_b; \
|
||||
wire [`NUM_THREADS-1:0] scope_writeback_valid; \
|
||||
wire [`NW_BITS-1:0] scope_writeback_warp_num; \
|
||||
wire [31:0] scope_writeback_curr_PC; \
|
||||
wire [1:0] scope_writeback_wb; \
|
||||
wire [4:0] scope_writeback_rd; \
|
||||
wire [`WB_BITS-1:0] scope_writeback_wb; \
|
||||
wire [`NR_BITS-1:0] scope_writeback_rd; \
|
||||
wire [63:0] scope_writeback_data; \
|
||||
wire scope_bank_valid_st0; \
|
||||
wire scope_bank_valid_st1; \
|
||||
|
@ -204,7 +204,7 @@
|
|||
`define SCOPE_SIGNALS_PIPELINE_IO \
|
||||
output wire scope_busy, \
|
||||
output wire scope_schedule_delay, \
|
||||
output wire scope_memory_delay, \
|
||||
output wire scope_mem_delay, \
|
||||
output wire scope_exec_delay, \
|
||||
output wire scope_gpr_stage_delay,
|
||||
|
||||
|
@ -213,19 +213,19 @@
|
|||
output wire [`NW_BITS-1:0] scope_decode_warp_num, \
|
||||
output wire [31:0] scope_decode_curr_PC, \
|
||||
output wire scope_decode_is_jal, \
|
||||
output wire [4:0] scope_decode_rs1, \
|
||||
output wire [4:0] scope_decode_rs2, \
|
||||
output wire [`NR_BITS-1:0] scope_decode_rs1, \
|
||||
output wire [`NR_BITS-1:0] scope_decode_rs2, \
|
||||
output wire [`NUM_THREADS-1:0] scope_execute_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_execute_warp_num, \
|
||||
output wire [31:0] scope_execute_curr_PC, \
|
||||
output wire [4:0] scope_execute_rd, \
|
||||
output wire [`NR_BITS-1:0] scope_execute_rd, \
|
||||
output wire [63:0] scope_execute_a, \
|
||||
output wire [63:0] scope_execute_b, \
|
||||
output wire [`NUM_THREADS-1:0] scope_writeback_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_writeback_warp_num, \
|
||||
output wire [31:0] scope_writeback_curr_PC, \
|
||||
output wire [1:0] scope_writeback_wb, \
|
||||
output wire [4:0] scope_writeback_rd, \
|
||||
output wire [`WB_BITS-1:0] scope_writeback_wb, \
|
||||
output wire [`NR_BITS-1:0] scope_writeback_rd, \
|
||||
output wire [63:0] scope_writeback_data,
|
||||
|
||||
`define SCOPE_SIGNALS_ISTAGE_BIND \
|
||||
|
@ -326,7 +326,7 @@
|
|||
`define SCOPE_SIGNALS_PIPELINE_BIND \
|
||||
.scope_busy (scope_busy), \
|
||||
.scope_schedule_delay (scope_schedule_delay), \
|
||||
.scope_memory_delay (scope_memory_delay), \
|
||||
.scope_mem_delay (scope_mem_delay), \
|
||||
.scope_exec_delay (scope_exec_delay), \
|
||||
.scope_gpr_stage_delay (scope_gpr_stage_delay),
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ module VX_warp (
|
|||
input wire change_mask,
|
||||
input wire jal,
|
||||
input wire[31:0] dest,
|
||||
input wire branch_dir,
|
||||
input wire branch_taken,
|
||||
input wire[31:0] branch_dest,
|
||||
input wire wspawn,
|
||||
input wire[31:0] wspawn_pc,
|
||||
|
@ -44,7 +44,7 @@ module VX_warp (
|
|||
always @(*) begin
|
||||
if (jal == 1'b1) begin
|
||||
temp_PC = dest;
|
||||
end else if (branch_dir) begin
|
||||
end else if (branch_taken) begin
|
||||
temp_PC = branch_dest;
|
||||
end else begin
|
||||
temp_PC = real_PC;
|
||||
|
|
|
@ -1,90 +1,38 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_warp_sched (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire stall,
|
||||
module VX_warp_sched #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Wspawn
|
||||
input wire wspawn,
|
||||
input wire[31:0] wsapwn_pc,
|
||||
input wire[`NUM_WARPS-1:0] wspawn_new_active,
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if,
|
||||
VX_branch_rsp_if branch_rsp_if,
|
||||
|
||||
// CTM
|
||||
input wire ctm,
|
||||
input wire[`NUM_THREADS-1:0] ctm_mask,
|
||||
input wire[`NW_BITS-1:0] ctm_warp_num,
|
||||
VX_ifetch_rsp_if ifetch_rsp_if,
|
||||
VX_ifetch_req_if ifetch_req_if,
|
||||
|
||||
// WHALT
|
||||
input wire whalt,
|
||||
input wire[`NW_BITS-1:0] whalt_warp_num,
|
||||
|
||||
input wire is_barrier,
|
||||
`DEBUG_BEGIN
|
||||
input wire[31:0] barrier_id,
|
||||
`DEBUG_END
|
||||
input wire[$clog2(`NUM_WARPS):0] num_warps,
|
||||
input wire[`NW_BITS-1:0] barrier_warp_num,
|
||||
|
||||
// WSTALL
|
||||
input wire wstall,
|
||||
input wire [`NW_BITS-1:0] wstall_warp_num,
|
||||
|
||||
// Split
|
||||
input wire is_split,
|
||||
input wire dont_split,
|
||||
input wire [`NUM_THREADS-1:0] split_new_mask,
|
||||
input wire [`NUM_THREADS-1:0] split_later_mask,
|
||||
input wire [31:0] split_save_pc,
|
||||
input wire [`NW_BITS-1:0] split_warp_num,
|
||||
|
||||
// Join
|
||||
input wire is_join,
|
||||
input wire [`NW_BITS-1:0] join_warp_num,
|
||||
|
||||
// JAL
|
||||
input wire jal,
|
||||
input wire [31:0] dest,
|
||||
input wire [`NW_BITS-1:0] jal_warp_num,
|
||||
|
||||
// Branch
|
||||
input wire branch_valid,
|
||||
input wire branch_dir,
|
||||
input wire [31:0] branch_dest,
|
||||
input wire [`NW_BITS-1:0] branch_warp_num,
|
||||
|
||||
output wire [`NUM_THREADS-1:0] thread_mask,
|
||||
output wire [`NW_BITS-1:0] warp_num,
|
||||
output wire [31:0] warp_pc,
|
||||
output wire busy,
|
||||
output wire scheduled_warp,
|
||||
|
||||
input wire [`NW_BITS-1:0] icache_stage_wid,
|
||||
input wire icache_stage_response
|
||||
output wire busy
|
||||
);
|
||||
wire update_use_wspawn;
|
||||
wire update_visible_active;
|
||||
wire scheduled_warp;
|
||||
|
||||
wire[(1+32+`NUM_THREADS-1):0] d[`NUM_WARPS-1:0];
|
||||
wire [(1+32+`NUM_THREADS-1):0] d[`NUM_WARPS-1:0];
|
||||
|
||||
wire join_fall;
|
||||
wire[31:0] join_pc;
|
||||
wire[`NUM_THREADS-1:0] join_tm;
|
||||
wire join_fall;
|
||||
wire [31:0] join_pc;
|
||||
wire [`NUM_THREADS-1:0] join_tm;
|
||||
|
||||
`DEBUG_BEGIN
|
||||
wire in_wspawn = wspawn;
|
||||
wire in_ctm = ctm;
|
||||
wire in_whalt = whalt;
|
||||
wire in_wstall = wstall;
|
||||
`DEBUG_END
|
||||
|
||||
reg[`NUM_WARPS-1:0] warp_active;
|
||||
reg[`NUM_WARPS-1:0] warp_stalled;
|
||||
reg [`NUM_WARPS-1:0] warp_active;
|
||||
reg [`NUM_WARPS-1:0] warp_stalled;
|
||||
|
||||
reg [`NUM_WARPS-1:0] visible_active;
|
||||
wire[`NUM_WARPS-1:0] use_active;
|
||||
wire [`NUM_WARPS-1:0] use_active;
|
||||
|
||||
reg [`NUM_WARPS-1:0] warp_lock;
|
||||
reg [`NUM_WARPS-1:0] warp_lock;
|
||||
|
||||
wire wstall_this_cycle;
|
||||
|
||||
|
@ -92,17 +40,23 @@ module VX_warp_sched (
|
|||
reg [31:0] warp_pcs[`NUM_WARPS-1:0];
|
||||
|
||||
// barriers
|
||||
reg [`NUM_WARPS-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0];
|
||||
wire reached_barrier_limit;
|
||||
reg [`NUM_WARPS-1:0] barrier_stall_mask[`NUM_BARRIERS-1:0];
|
||||
wire [`NUM_WARPS-1:0] b_mask;
|
||||
wire [$clog2(`NUM_WARPS):0] b_count;
|
||||
wire [`NW_BITS:0] b_count;
|
||||
|
||||
// wsapwn
|
||||
reg [31:0] use_wsapwn_pc;
|
||||
reg [`NUM_WARPS-1:0] use_wsapwn;
|
||||
wire reached_barrier_limit;
|
||||
|
||||
wire [`NW_BITS-1:0] warp_to_schedule;
|
||||
wire schedule;
|
||||
// wspawn
|
||||
reg [31:0] use_wspawn_pc;
|
||||
reg [`NUM_WARPS-1:0] use_wspawn;
|
||||
|
||||
wire [`NW_BITS-1:0] warp_to_schedule;
|
||||
wire schedule;
|
||||
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] warp_pc;
|
||||
wire scheduled_warp;
|
||||
|
||||
wire hazard;
|
||||
wire global_stall;
|
||||
|
@ -115,15 +69,18 @@ module VX_warp_sched (
|
|||
|
||||
reg didnt_split;
|
||||
|
||||
integer w, b;
|
||||
wire stall;
|
||||
|
||||
integer i;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (b = 0; b < `NUM_BARRIERS; b=b+1) begin
|
||||
barrier_stall_mask[b] <= 0;
|
||||
for (i = 0; i < `NUM_BARRIERS; i++) begin
|
||||
barrier_stall_mask[i] <= 0;
|
||||
end
|
||||
use_wsapwn_pc <= 0;
|
||||
use_wsapwn <= 0;
|
||||
|
||||
use_wspawn_pc <= 0;
|
||||
use_wspawn <= 0;
|
||||
warp_pcs[0] <= `STARTUP_ADDR;
|
||||
warp_active[0] <= 1; // Activating first warp
|
||||
visible_active[0] <= 1; // Activating first warp
|
||||
|
@ -131,63 +88,62 @@ module VX_warp_sched (
|
|||
warp_stalled <= 0;
|
||||
didnt_split <= 0;
|
||||
warp_lock <= 0;
|
||||
// total_barrier_stall = 0;
|
||||
for (w = 1; w < `NUM_WARPS; w=w+1) begin
|
||||
warp_pcs[w] <= 0;
|
||||
warp_active[w] <= 0; // Activating first warp
|
||||
visible_active[w] <= 0; // Activating first warp
|
||||
thread_masks[w] <= 1; // Activating first thread in first warp
|
||||
|
||||
for (i = 1; i < `NUM_WARPS; i++) begin
|
||||
warp_pcs[i] <= 0;
|
||||
warp_active[i] <= 0; // Activating first warp
|
||||
visible_active[i] <= 0; // Activating first warp
|
||||
thread_masks[i] <= 1; // Activating first thread in first warp
|
||||
end
|
||||
|
||||
end else begin
|
||||
// Wsapwning warps
|
||||
if (wspawn) begin
|
||||
warp_active <= wspawn_new_active;
|
||||
use_wsapwn_pc <= wsapwn_pc;
|
||||
use_wsapwn <= wspawn_new_active & (~`NUM_WARPS'b1);
|
||||
|
||||
if (warp_ctl_if.wspawn) begin
|
||||
warp_active <= warp_ctl_if.wspawn_new_active;
|
||||
use_wspawn_pc <= warp_ctl_if.wspawn_pc;
|
||||
use_wspawn <= warp_ctl_if.wspawn_new_active & (~`NUM_WARPS'b1);
|
||||
end
|
||||
|
||||
if (is_barrier) begin
|
||||
warp_stalled[barrier_warp_num] <= 0;
|
||||
if (warp_ctl_if.is_barrier) begin
|
||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
||||
if (reached_barrier_limit) begin
|
||||
barrier_stall_mask[barrier_id] <= 0;
|
||||
barrier_stall_mask[warp_ctl_if.barrier_id] <= 0;
|
||||
end else begin
|
||||
barrier_stall_mask[barrier_id][barrier_warp_num] <= 1;
|
||||
barrier_stall_mask[warp_ctl_if.barrier_id][warp_ctl_if.warp_num] <= 1;
|
||||
end
|
||||
end else if (ctm) begin
|
||||
thread_masks[ctm_warp_num] <= ctm_mask;
|
||||
warp_stalled[ctm_warp_num] <= 0;
|
||||
end else if (is_join && !didnt_split) begin
|
||||
end else if (warp_ctl_if.change_mask) begin
|
||||
thread_masks[warp_ctl_if.warp_num] <= warp_ctl_if.thread_mask;
|
||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
||||
end else if (join_if.is_join && !didnt_split) begin
|
||||
if (!join_fall) begin
|
||||
warp_pcs[join_warp_num] <= join_pc;
|
||||
warp_pcs[join_if.warp_num] <= join_pc;
|
||||
end
|
||||
thread_masks[join_warp_num] <= join_tm;
|
||||
didnt_split <= 0;
|
||||
end else if (is_split) begin
|
||||
warp_stalled[split_warp_num] <= 0;
|
||||
if (!dont_split) begin
|
||||
thread_masks[split_warp_num] <= split_new_mask;
|
||||
thread_masks[join_if.warp_num] <= join_tm;
|
||||
didnt_split <= 0;
|
||||
end else if (warp_ctl_if.is_split) begin
|
||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
||||
if (warp_ctl_if.do_split) begin
|
||||
thread_masks[warp_ctl_if.warp_num] <= warp_ctl_if.split_new_mask;
|
||||
didnt_split <= 0;
|
||||
end else begin
|
||||
didnt_split <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
if (whalt) begin
|
||||
warp_active[whalt_warp_num] <= 0;
|
||||
visible_active[whalt_warp_num] <= 0;
|
||||
if (warp_ctl_if.whalt) begin
|
||||
warp_active[warp_ctl_if.warp_num] <= 0;
|
||||
visible_active[warp_ctl_if.warp_num] <= 0;
|
||||
end
|
||||
|
||||
if (update_use_wspawn) begin
|
||||
use_wsapwn[warp_to_schedule] <= 0;
|
||||
use_wspawn[warp_to_schedule] <= 0;
|
||||
thread_masks[warp_to_schedule] <= 1;
|
||||
end
|
||||
|
||||
|
||||
// Stalling the scheduling of warps
|
||||
if (wstall) begin
|
||||
warp_stalled[wstall_warp_num] <= 1;
|
||||
visible_active[wstall_warp_num] <= 0;
|
||||
if (wstall_if.wstall) begin
|
||||
warp_stalled[wstall_if.warp_num] <= 1;
|
||||
visible_active[wstall_if.warp_num] <= 0;
|
||||
end
|
||||
|
||||
// Refilling active warps
|
||||
|
@ -201,26 +157,20 @@ module VX_warp_sched (
|
|||
warp_pcs[warp_to_schedule] <= new_pc;
|
||||
end
|
||||
|
||||
// Jal
|
||||
if (jal) begin
|
||||
warp_pcs[jal_warp_num] <= dest;
|
||||
warp_stalled[jal_warp_num] <= 0;
|
||||
end
|
||||
|
||||
// Branch
|
||||
if (branch_valid) begin
|
||||
if (branch_dir) begin
|
||||
warp_pcs[branch_warp_num] <= branch_dest;
|
||||
if (branch_rsp_if.valid) begin
|
||||
if (branch_rsp_if.taken) begin
|
||||
warp_pcs[branch_rsp_if.warp_num] <= branch_rsp_if.dest;
|
||||
end
|
||||
warp_stalled[branch_warp_num] <= 0;
|
||||
warp_stalled[branch_rsp_if.warp_num] <= 0;
|
||||
end
|
||||
|
||||
// Lock/Release
|
||||
if (scheduled_warp && !stall) begin
|
||||
warp_lock[warp_num] <= 1'b1;
|
||||
warp_lock[warp_num] <= 1;
|
||||
end
|
||||
if (icache_stage_response) begin
|
||||
warp_lock[icache_stage_wid] <= 1'b0;
|
||||
if ((| ifetch_rsp_if.valid) && ifetch_rsp_if.ready) begin
|
||||
warp_lock[ifetch_rsp_if.warp_num] <= 0;
|
||||
end
|
||||
|
||||
end
|
||||
|
@ -233,7 +183,7 @@ module VX_warp_sched (
|
|||
.count (b_count)
|
||||
);
|
||||
|
||||
wire [$clog2(`NUM_WARPS):0] count_visible_active;
|
||||
wire [`NW_BITS:0] count_visible_active;
|
||||
|
||||
VX_countones #(
|
||||
.N(`NUM_WARPS)
|
||||
|
@ -242,30 +192,29 @@ module VX_warp_sched (
|
|||
.count (count_visible_active)
|
||||
);
|
||||
|
||||
// assign b_count = $countones(b_mask);
|
||||
assign b_mask = barrier_stall_mask[warp_ctl_if.barrier_id][`NUM_WARPS-1:0];
|
||||
|
||||
assign reached_barrier_limit = (b_count == warp_ctl_if.num_warps);
|
||||
|
||||
assign b_mask = barrier_stall_mask[barrier_id][`NUM_WARPS-1:0];
|
||||
assign reached_barrier_limit = b_count == (num_warps);
|
||||
|
||||
assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug
|
||||
assign wstall_this_cycle = wstall_if.wstall && (wstall_if.warp_num == warp_to_schedule); // Maybe bug
|
||||
|
||||
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
|
||||
|
||||
assign update_visible_active = (0 == count_visible_active) && !(stall || wstall_this_cycle || hazard || is_join);
|
||||
assign update_visible_active = (0 == count_visible_active) && !(stall || wstall_this_cycle || hazard || join_if.is_join);
|
||||
|
||||
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[split_warp_num]};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, split_save_pc, split_later_mask};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[warp_ctl_if.warp_num]};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, warp_ctl_if.split_save_pc, warp_ctl_if.split_later_mask};
|
||||
|
||||
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
|
||||
assign {join_fall, join_pc, join_tm} = d[join_if.warp_num];
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin : stacks
|
||||
wire correct_warp_s = (i == split_warp_num);
|
||||
wire correct_warp_j = (i == join_warp_num);
|
||||
genvar j;
|
||||
|
||||
wire push = (is_split && !dont_split) && correct_warp_s;
|
||||
wire pop = is_join && correct_warp_j;
|
||||
for (j = 0; j < `NUM_WARPS; j++) begin : stacks
|
||||
wire correct_warp_s = (j == warp_ctl_if.warp_num);
|
||||
wire correct_warp_j = (j == join_if.warp_num);
|
||||
|
||||
wire push = (warp_ctl_if.is_split && warp_ctl_if.do_split) && correct_warp_s;
|
||||
wire pop = join_if.is_join && correct_warp_j;
|
||||
|
||||
VX_generic_stack #(
|
||||
.WIDTH(1+32+`NUM_THREADS),
|
||||
|
@ -279,27 +228,27 @@ module VX_warp_sched (
|
|||
.q1 (q1),
|
||||
.q2 (q2)
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
end
|
||||
|
||||
wire should_jal = (jal && (warp_to_schedule == jal_warp_num));
|
||||
wire should_bra = (branch_valid && branch_dir && (warp_to_schedule == branch_warp_num));
|
||||
wire should_bra = (branch_rsp_if.valid && branch_rsp_if.taken && (warp_to_schedule == branch_rsp_if.warp_num));
|
||||
|
||||
assign hazard = (should_jal || should_bra) && schedule;
|
||||
assign hazard = should_bra && schedule;
|
||||
|
||||
assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule] && !warp_lock[0];
|
||||
|
||||
assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || is_join);
|
||||
assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || join_if.is_join);
|
||||
|
||||
assign scheduled_warp = !(wstall_this_cycle || hazard || !real_schedule || is_join) && !reset;
|
||||
assign scheduled_warp = !(wstall_this_cycle || hazard || !real_schedule || join_if.is_join) && !reset;
|
||||
|
||||
wire real_use_wspawn = use_wsapwn[warp_to_schedule];
|
||||
wire real_use_wspawn = use_wspawn[warp_to_schedule];
|
||||
|
||||
assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule];
|
||||
assign warp_pc = real_use_wspawn ? use_wspawn_pc : warp_pcs[warp_to_schedule];
|
||||
|
||||
assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NUM_THREADS'b1 : thread_masks[warp_to_schedule]);
|
||||
|
||||
assign warp_num = warp_to_schedule;
|
||||
|
||||
assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall;
|
||||
assign update_use_wspawn = use_wspawn[warp_to_schedule] && !global_stall;
|
||||
|
||||
assign new_pc = warp_pc + 4;
|
||||
|
||||
|
@ -315,13 +264,21 @@ module VX_warp_sched (
|
|||
.grant_index (warp_to_schedule),
|
||||
.grant_valid (schedule),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
|
||||
assign stall = ~ifetch_req_if.ready && (| ifetch_req_if.valid);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + 32 + `NW_BITS)
|
||||
) fetch_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({thread_mask, warp_pc, warp_num}),
|
||||
.out ({ifetch_req_if.valid, ifetch_req_if.curr_PC, ifetch_req_if.warp_num})
|
||||
);
|
||||
|
||||
// always @(*) begin
|
||||
// $display("WarpPC: %h",warp_pc);
|
||||
// $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]);
|
||||
// end
|
||||
|
||||
assign busy = (warp_active != 0);
|
||||
assign busy = (warp_active != 0);
|
||||
|
||||
endmodule
|
|
@ -1,89 +1,112 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_writeback (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
module VX_writeback #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Mem WB info
|
||||
VX_wb_if mem_wb_if,
|
||||
// inputs
|
||||
VX_wb_if alu_wb_if,
|
||||
VX_wb_if branch_wb_if,
|
||||
VX_wb_if lsu_wb_if,
|
||||
VX_wb_if mul_wb_if,
|
||||
VX_wb_if csr_wb_if,
|
||||
|
||||
// EXEC Unit WB info
|
||||
VX_wb_if inst_exec_wb_if,
|
||||
|
||||
// CSR Unit WB info
|
||||
VX_wb_if csr_wb_if,
|
||||
|
||||
// Actual WB to GPR
|
||||
VX_wb_if writeback_if,
|
||||
output wire no_slot_mem,
|
||||
output wire no_slot_exec,
|
||||
output wire no_slot_csr
|
||||
// outputs
|
||||
VX_wb_if writeback_if,
|
||||
output wire notify_commit
|
||||
);
|
||||
|
||||
VX_wb_if writeback_tmp_if();
|
||||
wire br_valid = (| branch_wb_if.valid);
|
||||
wire lsu_valid = (| lsu_wb_if.valid);
|
||||
wire mul_valid = (| mul_wb_if.valid);
|
||||
wire alu_valid = (| alu_wb_if.valid);
|
||||
wire csr_valid = (| csr_wb_if.valid);
|
||||
|
||||
wire exec_wb = (inst_exec_wb_if.wb != 0) && (| inst_exec_wb_if.valid);
|
||||
wire mem_wb = (mem_wb_if.wb != 0) && (| mem_wb_if.valid);
|
||||
wire csr_wb = (csr_wb_if.wb != 0) && (| csr_wb_if.valid);
|
||||
VX_wb_if writeback_tmp_if();
|
||||
|
||||
assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
|
||||
assign no_slot_csr = csr_wb && exec_wb;
|
||||
assign no_slot_exec = 0;
|
||||
assign writeback_tmp_if.valid = br_valid ? branch_wb_if.valid :
|
||||
lsu_valid ? lsu_wb_if.valid :
|
||||
mul_valid ? mul_wb_if.valid :
|
||||
alu_valid ? alu_wb_if.valid :
|
||||
csr_valid ? csr_wb_if.valid :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.data = exec_wb ? inst_exec_wb_if.data :
|
||||
csr_wb ? csr_wb_if.data :
|
||||
mem_wb ? mem_wb_if.data :
|
||||
0;
|
||||
assign writeback_tmp_if.warp_num = br_valid ? branch_wb_if.warp_num :
|
||||
lsu_valid ? lsu_wb_if.warp_num :
|
||||
mul_valid ? mul_wb_if.warp_num :
|
||||
alu_valid ? alu_wb_if.warp_num :
|
||||
csr_valid ? csr_wb_if.warp_num :
|
||||
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.valid = exec_wb ? inst_exec_wb_if.valid :
|
||||
csr_wb ? csr_wb_if.valid :
|
||||
mem_wb ? mem_wb_if.valid :
|
||||
0;
|
||||
assign writeback_tmp_if.curr_PC = br_valid ? branch_wb_if.curr_PC :
|
||||
lsu_valid ? lsu_wb_if.curr_PC :
|
||||
mul_valid ? mul_wb_if.curr_PC :
|
||||
alu_valid ? alu_wb_if.curr_PC :
|
||||
csr_valid ? csr_wb_if.curr_PC :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.rd = exec_wb ? inst_exec_wb_if.rd :
|
||||
csr_wb ? csr_wb_if.rd :
|
||||
mem_wb ? mem_wb_if.rd :
|
||||
0;
|
||||
assign writeback_tmp_if.data = br_valid ? branch_wb_if.data :
|
||||
lsu_valid ? lsu_wb_if.data :
|
||||
mul_valid ? mul_wb_if.data :
|
||||
alu_valid ? alu_wb_if.data :
|
||||
csr_valid ? csr_wb_if.data :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.wb = exec_wb ? inst_exec_wb_if.wb :
|
||||
csr_wb ? csr_wb_if.wb :
|
||||
mem_wb ? mem_wb_if.wb :
|
||||
0;
|
||||
assign writeback_tmp_if.rd = br_valid ? branch_wb_if.rd :
|
||||
lsu_valid ? lsu_wb_if.rd :
|
||||
mul_valid ? mul_wb_if.rd :
|
||||
alu_valid ? alu_wb_if.rd :
|
||||
csr_valid ? csr_wb_if.rd :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.warp_num = exec_wb ? inst_exec_wb_if.warp_num :
|
||||
csr_wb ? csr_wb_if.warp_num :
|
||||
mem_wb ? mem_wb_if.warp_num :
|
||||
0;
|
||||
assign writeback_tmp_if.wb = br_valid ? branch_wb_if.wb :
|
||||
lsu_valid ? lsu_wb_if.wb :
|
||||
alu_valid ? alu_wb_if.wb :
|
||||
csr_valid ? csr_wb_if.wb :
|
||||
mul_valid ? mul_wb_if.wb :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.curr_PC = exec_wb ? inst_exec_wb_if.curr_PC :
|
||||
csr_wb ? 32'hdeadbeef :
|
||||
mem_wb ? mem_wb_if.curr_PC :
|
||||
32'hdeadbeef;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] use_wb_data;
|
||||
wire stall = ~writeback_if.ready && (| writeback_if.valid);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(39 + `NW_BITS-1 + 1 + `NUM_THREADS*33)
|
||||
) wb_register (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(1'b0),
|
||||
.flush(1'b0),
|
||||
.in ({writeback_tmp_if.data, writeback_tmp_if.valid, writeback_tmp_if.rd, writeback_tmp_if.wb, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC}),
|
||||
.out ({use_wb_data, writeback_if.valid, writeback_if.rd, writeback_if.wb, writeback_if.warp_num, writeback_if.curr_PC})
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + (`NUM_THREADS * 32) + `WB_BITS)
|
||||
) wb_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC, writeback_tmp_if.rd, writeback_tmp_if.data, writeback_tmp_if.wb}),
|
||||
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.curr_PC, writeback_if.rd, writeback_if.data, writeback_if.wb})
|
||||
);
|
||||
|
||||
reg [31:0] last_data_wb /* verilator public */;
|
||||
assign branch_wb_if.ready = !stall;
|
||||
assign lsu_wb_if.ready = !stall && !br_valid;
|
||||
assign mul_wb_if.ready = !stall && !br_valid && !lsu_valid;
|
||||
assign alu_wb_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid;
|
||||
assign csr_wb_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid && !alu_valid;
|
||||
|
||||
assign notify_commit = (| writeback_tmp_if.valid) && ~stall;
|
||||
|
||||
// special workaround to control RISC-V benchmarks termination on Verilator
|
||||
reg [31:0] last_data_wb /* verilator public */;
|
||||
always @(posedge clk) begin
|
||||
if ( (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin
|
||||
last_data_wb <= use_wb_data[0];
|
||||
if (notify_commit && (writeback_tmp_if.wb != 0) && (writeback_tmp_if.rd == 28)) begin
|
||||
last_data_wb <= writeback_tmp_if.data[0];
|
||||
end
|
||||
end
|
||||
|
||||
assign writeback_if.data = use_wb_data;
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if ((| writeback_tmp_if.valid) && ~stall) begin
|
||||
$display("%t: Core%0d-WB: warp=%0d, PC=%0h, rd=%0d, wb=%0d, data=%0h", $time, CORE_ID, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC, writeback_tmp_if.rd, writeback_tmp_if.wb, writeback_tmp_if.data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule : VX_writeback
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
|
20
hw/rtl/cache/VX_bank.v
vendored
20
hw/rtl/cache/VX_bank.v
vendored
|
@ -106,8 +106,8 @@ module VX_bank #(
|
|||
`ifdef DBG_CORE_REQ_INFO
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] debug_use_pc_st0;
|
||||
wire[1:0] debug_wb_st0;
|
||||
wire[4:0] debug_rd_st0;
|
||||
wire[`WB_BITS-1:0] debug_wb_st0;
|
||||
wire[`NR_BITS-1:0] debug_rd_st0;
|
||||
wire[`NW_BITS-1:0] debug_warp_num_st0;
|
||||
wire debug_rw_st0;
|
||||
wire[WORD_SIZE-1:0] debug_byteen_st0;
|
||||
|
@ -115,8 +115,8 @@ module VX_bank #(
|
|||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0;
|
||||
|
||||
wire[31:0] debug_use_pc_st1e;
|
||||
wire[1:0] debug_wb_st1e;
|
||||
wire[4:0] debug_rd_st1e;
|
||||
wire[`WB_BITS-1:0] debug_wb_st1e;
|
||||
wire[`NR_BITS-1:0] debug_rd_st1e;
|
||||
wire[`NW_BITS-1:0] debug_warp_num_st1e;
|
||||
wire debug_rw_st1e;
|
||||
wire[WORD_SIZE-1:0] debug_byteen_st1e;
|
||||
|
@ -124,8 +124,8 @@ module VX_bank #(
|
|||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e;
|
||||
|
||||
wire[31:0] debug_use_pc_st2;
|
||||
wire[1:0] debug_wb_st2;
|
||||
wire[4:0] debug_rd_st2;
|
||||
wire[`WB_BITS-1:0] debug_wb_st2;
|
||||
wire[`NR_BITS-1:0] debug_rd_st2;
|
||||
wire[`NW_BITS-1:0] debug_warp_num_st2;
|
||||
wire debug_rw_st2;
|
||||
wire[WORD_SIZE-1:0] debug_byteen_st2;
|
||||
|
@ -370,7 +370,7 @@ module VX_bank #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_bank_pipe),
|
||||
.flush (1'b0),
|
||||
.flush (0),
|
||||
.in ({qual_is_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}),
|
||||
.out ({is_mrvq_st1[0] , is_snp_st1[0], snp_invalidate_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
|
||||
);
|
||||
|
@ -383,7 +383,7 @@ module VX_bank #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_bank_pipe),
|
||||
.flush (1'b0),
|
||||
.flush (0),
|
||||
.in ({is_mrvq_st1[i-1], is_snp_st1[i-1], snp_invalidate_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}),
|
||||
.out ({is_mrvq_st1[i] , is_snp_st1[i], snp_invalidate_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]})
|
||||
);
|
||||
|
@ -512,7 +512,7 @@ module VX_bank #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_bank_pipe),
|
||||
.flush (1'b0),
|
||||
.flush (0),
|
||||
.in ({mrvq_recover_ready_state_st1e, is_mrvq_st1e_st2, mrvq_init_ready_state_st1e , snp_to_mrvq_st1e, is_snp_st1e, snp_invalidate_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1e, wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
|
||||
.out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 })
|
||||
);
|
||||
|
@ -765,4 +765,4 @@ module VX_bank #(
|
|||
`SCOPE_ASSIGN(scope_bank_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1e, BANK_ID));
|
||||
`SCOPE_ASSIGN(scope_bank_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
|
||||
|
||||
endmodule : VX_bank
|
||||
endmodule
|
||||
|
|
4
hw/rtl/cache/VX_cache.v
vendored
4
hw/rtl/cache/VX_cache.v
vendored
|
@ -130,8 +130,8 @@ module VX_cache #(
|
|||
`ifdef DBG_CORE_REQ_INFO
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] debug_core_req_use_pc;
|
||||
wire[1:0] debug_core_req_wb;
|
||||
wire[4:0] debug_core_req_rd;
|
||||
wire[`WB_BITS-1:0] debug_core_req_wb;
|
||||
wire[`NR_BITS-1:0] debug_core_req_rd;
|
||||
wire[`NW_BITS-1:0] debug_core_req_warp_num;
|
||||
wire[`LOG2UP(CREQ_SIZE)-1:0] debug_core_req_idx;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
|
58
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
58
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
|
@ -23,9 +23,9 @@ module VX_cache_core_rsp_merge #(
|
|||
output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
|
||||
|
||||
// Core Writeback
|
||||
output reg [NUM_REQUESTS-1:0] core_rsp_valid,
|
||||
output reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
output reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
output wire [NUM_REQUESTS-1:0] core_rsp_valid,
|
||||
output wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire core_rsp_ready
|
||||
);
|
||||
|
||||
|
@ -41,47 +41,63 @@ module VX_cache_core_rsp_merge #(
|
|||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
|
||||
reg [NUM_BANKS-1:0] per_bank_core_rsp_pop_unqual;
|
||||
reg [NUM_REQUESTS-1:0] core_rsp_valid_unqual;
|
||||
reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
|
||||
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||
reg [NUM_BANKS-1:0] core_rsp_bank_select;
|
||||
|
||||
assign per_bank_core_rsp_ready = per_bank_core_rsp_pop_unqual & {NUM_BANKS{core_rsp_ready}};
|
||||
wire stall = ~core_rsp_ready;
|
||||
|
||||
integer i;
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
assign core_rsp_tag = per_bank_core_rsp_tag[main_bank_index];
|
||||
always @(*) begin
|
||||
core_rsp_valid = 0;
|
||||
core_rsp_data = 0;
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 0;
|
||||
core_rsp_tag_unqual = per_bank_core_rsp_tag[main_bank_index];
|
||||
for (i = 0; i < NUM_BANKS; i++) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
per_bank_core_rsp_pop_unqual[i] = 1;
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
core_rsp_bank_select[i] = 1;
|
||||
end else begin
|
||||
per_bank_core_rsp_pop_unqual[i] = 0;
|
||||
core_rsp_bank_select[i] = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
core_rsp_valid = 0;
|
||||
core_rsp_data = 0;
|
||||
core_rsp_tag = 0;
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 0;
|
||||
core_rsp_tag_unqual = 0;
|
||||
for (i = 0; i < NUM_BANKS; i++) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
|
||||
&& !core_rsp_valid_unqual[per_bank_core_rsp_tid[i]]
|
||||
&& ((main_bank_index == `BANK_BITS'(i))
|
||||
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin
|
||||
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
core_rsp_tag[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
|
||||
per_bank_core_rsp_pop_unqual[i] = 1;
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
|
||||
core_rsp_bank_select[i] = 1;
|
||||
end else begin
|
||||
per_bank_core_rsp_pop_unqual[i] = 0;
|
||||
core_rsp_bank_select[i] = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_generic_register #(
|
||||
.N(NUM_REQUESTS + (NUM_REQUESTS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH))
|
||||
) core_wb_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
|
||||
.out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
|
||||
);
|
||||
|
||||
assign per_bank_core_rsp_ready = core_rsp_bank_select & {NUM_BANKS{~stall}};
|
||||
|
||||
endmodule
|
||||
|
|
2
hw/rtl/cache/VX_snp_forwarder.v
vendored
2
hw/rtl/cache/VX_snp_forwarder.v
vendored
|
@ -59,7 +59,7 @@ module VX_snp_forwarder #(
|
|||
assign sfq_push = snp_req_valid && !sfq_full && fwdout_ready;
|
||||
assign sfq_pop = snp_rsp_valid;
|
||||
|
||||
VX_indexable_queue #(
|
||||
VX_index_queue #(
|
||||
.DATAW (`LOG2UP(SNRQ_SIZE) + 1 +`DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH),
|
||||
.SIZE (SNRQ_SIZE)
|
||||
) snp_fwd_queue (
|
||||
|
|
4
hw/rtl/cache/VX_tag_data_access.v
vendored
4
hw/rtl/cache/VX_tag_data_access.v
vendored
|
@ -116,7 +116,7 @@ module VX_tag_data_access #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.flush (0),
|
||||
.in ({qual_read_valid_st1, qual_read_dirty_st1, qual_read_dirtyb_st1, qual_read_tag_st1, qual_read_data_st1}),
|
||||
.out ({read_valid_st1c[0], read_dirty_st1c[0], read_dirtyb_st1c[0], read_tag_st1c[0], read_data_st1c[0]})
|
||||
);
|
||||
|
@ -129,7 +129,7 @@ module VX_tag_data_access #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.flush (0),
|
||||
.in ({read_valid_st1c[i-1], read_dirty_st1c[i-1], read_dirtyb_st1c[i-1], read_tag_st1c[i-1], read_data_st1c[i-1]}),
|
||||
.out ({read_valid_st1c[i], read_dirty_st1c[i], read_dirtyb_st1c[i], read_tag_st1c[i], read_data_st1c[i]})
|
||||
);
|
||||
|
|
24
hw/rtl/interfaces/VX_alu_req_if.v
Normal file
24
hw/rtl/interfaces/VX_alu_req_if.v
Normal file
|
@ -0,0 +1,24 @@
|
|||
`ifndef VX_ALU_REQ_IF
|
||||
`define VX_ALU_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_alu_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`ALU_BITS-1:0] alu_op;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,40 +0,0 @@
|
|||
`ifndef VX_FrE_to_BCKBE_REQ_IF
|
||||
`define VX_FrE_to_BCKBE_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_backend_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire [11:0] csr_addr;
|
||||
wire is_csr;
|
||||
wire csr_immed;
|
||||
wire [31:0] csr_mask;
|
||||
wire [4:0] rd;
|
||||
wire [4:0] rs1;
|
||||
wire [4:0] rs2;
|
||||
wire [4:0] alu_op;
|
||||
wire [1:0] wb;
|
||||
wire rs2_src;
|
||||
wire [31:0] itype_immed;
|
||||
wire [`BYTE_EN_BITS-1:0] mem_read;
|
||||
wire [`BYTE_EN_BITS-1:0] mem_write;
|
||||
wire [2:0] branch_type;
|
||||
wire [19:0] upper_immed;
|
||||
wire is_etype;
|
||||
wire is_jal;
|
||||
wire jal;
|
||||
wire [31:0] jal_offset;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
// GPGPU stuff
|
||||
wire is_wspawn;
|
||||
wire is_tmc;
|
||||
wire is_split;
|
||||
wire is_barrier;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,15 +0,0 @@
|
|||
`ifndef VX_BRANCH_RSP_IF
|
||||
`define VX_BRANCH_RSP_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_branch_rsp_if ();
|
||||
|
||||
wire valid;
|
||||
wire dir;
|
||||
wire [31:0] dest;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -5,11 +5,11 @@
|
|||
|
||||
interface VX_csr_io_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire rw;
|
||||
wire [11:0] addr;
|
||||
wire [31:0] data;
|
||||
wire ready;
|
||||
wire valid;
|
||||
wire [`CSR_ADDR_SIZE-1:0] addr;
|
||||
wire rw;
|
||||
wire [31:0] data;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -6,17 +6,20 @@
|
|||
interface VX_csr_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [4:0] rd;
|
||||
wire [1:0] wb;
|
||||
wire [4:0] alu_op;
|
||||
wire is_csr;
|
||||
wire [11:0] csr_addr;
|
||||
wire csr_immed;
|
||||
wire [31:0] csr_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire is_io;
|
||||
wire [`CSR_BITS-1:0] csr_op;
|
||||
|
||||
wire [`CSR_ADDR_SIZE-1:0] csr_addr;
|
||||
wire [31:0] csr_mask;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire is_io;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
|
|
33
hw/rtl/interfaces/VX_decode_if.v
Normal file
33
hw/rtl/interfaces/VX_decode_if.v
Normal file
|
@ -0,0 +1,33 @@
|
|||
`ifndef VX_DECODE_IF
|
||||
`define VX_DECODE_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_decode_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
wire [`EX_BITS-1:0] ex_type;
|
||||
wire [`OP_BITS-1:0] instr_op;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
wire [31:0] imm;
|
||||
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
|
||||
wire use_rs1;
|
||||
wire use_rs2;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,47 +0,0 @@
|
|||
`ifndef VX_EXE_UNIT_REQ_IF
|
||||
`define VX_EXE_UNIT_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_exec_unit_req_if ();
|
||||
|
||||
// Meta
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
// Write Back Info
|
||||
wire [4:0] rd;
|
||||
wire [1:0] wb;
|
||||
|
||||
// Data and alu op
|
||||
wire [`NUM_THREADS-1:0][31:0] a_reg_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] b_reg_data;
|
||||
wire [4:0] alu_op;
|
||||
wire [4:0] rs1;
|
||||
wire [4:0] rs2;
|
||||
wire rs2_src;
|
||||
wire [31:0] itype_immed;
|
||||
wire [19:0] upper_immed;
|
||||
|
||||
// Branch type
|
||||
wire [2:0] branch_type;
|
||||
|
||||
// Jal info
|
||||
wire is_jal;
|
||||
wire jal;
|
||||
wire [31:0] jal_offset;
|
||||
|
||||
wire is_etype;
|
||||
wire wspawn;
|
||||
|
||||
// CSR info
|
||||
wire is_csr;
|
||||
wire [11:0] csr_addr;
|
||||
wire csr_immed;
|
||||
wire [31:0] csr_mask;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
33
hw/rtl/interfaces/VX_execute_if.v
Normal file
33
hw/rtl/interfaces/VX_execute_if.v
Normal file
|
@ -0,0 +1,33 @@
|
|||
`ifndef VX_EXECUTE_IF
|
||||
`define VX_EXECUTE_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_execute_if();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
wire [`EX_BITS-1:0] ex_type;
|
||||
wire [`OP_BITS-1:0] instr_op;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
wire [31:0] imm;
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
|
||||
wire alu_ready;
|
||||
wire br_ready;
|
||||
wire mul_ready;
|
||||
wire lsu_ready;
|
||||
wire csr_ready;
|
||||
wire gpu_ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,19 +0,0 @@
|
|||
`ifndef VX_GPR_READ_IF
|
||||
`define VX_GPR_READ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_gpr_read_if ();
|
||||
|
||||
wire [4:0] rs1;
|
||||
wire [4:0] rs2;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire is_jal;
|
||||
wire[31:0] curr_PC;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] a_reg_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] b_reg_data;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,23 +0,0 @@
|
|||
`ifndef VX_GPGPU_INST_REQ_IF
|
||||
`define VX_GPGPU_INST_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_gpu_inst_req_if();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire is_wspawn;
|
||||
wire is_tmc;
|
||||
wire is_split;
|
||||
|
||||
wire is_barrier;
|
||||
|
||||
wire[31:0] next_PC;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] a_reg_data;
|
||||
wire [31:0] rd2;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
21
hw/rtl/interfaces/VX_gpu_req_if.v
Normal file
21
hw/rtl/interfaces/VX_gpu_req_if.v
Normal file
|
@ -0,0 +1,21 @@
|
|||
`ifndef VX_GPU_REQ_IF
|
||||
`define VX_GPU_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_gpu_req_if();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] next_PC;
|
||||
|
||||
wire [`GPU_BITS-1:0] gpu_op;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [31:0] rs2_data;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,14 +1,14 @@
|
|||
`ifndef VX_INST_META_IF
|
||||
`define VX_INST_META_IF
|
||||
`ifndef VX_IFETCH_REQ_IF
|
||||
`define VX_IFETCH_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_inst_meta_if ();
|
||||
interface VX_ifetch_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [31:0] curr_PC;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] instruction;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
16
hw/rtl/interfaces/VX_ifetch_rsp_if.v
Normal file
16
hw/rtl/interfaces/VX_ifetch_rsp_if.v
Normal file
|
@ -0,0 +1,16 @@
|
|||
`ifndef VX_IFETCH_RSP_IF
|
||||
`define VX_IFETCH_RSP_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_ifetch_rsp_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [31:0] curr_PC;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] instr;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,15 +0,0 @@
|
|||
|
||||
`ifndef VX_JAL_RSP_IF
|
||||
`define VX_JAL_RSP_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_jal_rsp_if ();
|
||||
|
||||
wire valid;
|
||||
wire [31:0] dest;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
`ifndef VX_JOIN_IF
|
||||
`define VX_JOIN_IF
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
`ifndef VX_LSU_REQ_IF
|
||||
`define VX_LSU_REQ_IF
|
||||
|
||||
|
@ -10,12 +9,13 @@ interface VX_lsu_req_if ();
|
|||
wire [31:0] curr_PC;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NUM_THREADS-1:0][31:0] store_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] base_addr; // A reg data
|
||||
wire [31:0] offset; // itype_immed
|
||||
wire [`BYTE_EN_BITS-1:0] mem_read;
|
||||
wire [`BYTE_EN_BITS-1:0] mem_write;
|
||||
wire [4:0] rd; // dest register
|
||||
wire [1:0] wb; //
|
||||
wire [`NUM_THREADS-1:0][31:0] base_addr;
|
||||
wire [31:0] offset;
|
||||
wire rw;
|
||||
wire [`BYTEEN_BITS-1:0] byteen;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
24
hw/rtl/interfaces/VX_mul_req_if.v
Normal file
24
hw/rtl/interfaces/VX_mul_req_if.v
Normal file
|
@ -0,0 +1,24 @@
|
|||
`ifndef VX_MUL_REQ_IF
|
||||
`define VX_MUL_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_mul_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
|
||||
wire [`MUL_BITS-1:0] mul_op;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
`ifndef VX_WARP_CTL_IF
|
||||
`define VX_WARP_CTL_IF
|
||||
|
||||
|
@ -7,6 +6,7 @@
|
|||
interface VX_warp_ctl_if ();
|
||||
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
wire change_mask;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
|
||||
|
@ -16,16 +16,13 @@ interface VX_warp_ctl_if ();
|
|||
|
||||
wire whalt;
|
||||
|
||||
// barrier
|
||||
wire is_barrier;
|
||||
wire [31:0] barrier_id;
|
||||
wire [$clog2(`NUM_WARPS):0] num_warps;
|
||||
wire [`NB_BITS-1:0] barrier_id;
|
||||
wire [`NW_BITS:0] num_warps;
|
||||
|
||||
wire is_split;
|
||||
wire dont_split;
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [`NW_BITS-1:0] split_warp_num;
|
||||
`IGNORE_WARNINGS_END
|
||||
wire do_split;
|
||||
|
||||
wire [`NUM_THREADS-1:0] split_new_mask;
|
||||
wire [`NUM_THREADS-1:0] split_later_mask;
|
||||
wire [31:0] split_save_pc;
|
||||
|
|
|
@ -6,12 +6,13 @@
|
|||
interface VX_wb_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [4:0] rd;
|
||||
wire [1:0] wb;
|
||||
wire [31:0] curr_PC;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire is_io;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
module VX_countones #(
|
||||
parameter N = 10
|
||||
) (
|
||||
input wire[N-1:0] valids,
|
||||
output reg[$clog2(N):0] count
|
||||
input wire [N-1:0] valids,
|
||||
output reg [$clog2(N):0] count
|
||||
);
|
||||
|
||||
integer i;
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
module VX_divide #(
|
||||
parameter WIDTHN = 1,
|
||||
parameter WIDTHD = 1,
|
||||
parameter WIDTHQ = 1,
|
||||
parameter WIDTHR = 1,
|
||||
parameter NSIGNED = 0,
|
||||
parameter DSIGNED = 0,
|
||||
parameter PIPELINE = 0
|
||||
|
@ -13,18 +15,21 @@ module VX_divide #(
|
|||
input wire [WIDTHN-1:0] numer,
|
||||
input wire [WIDTHD-1:0] denom,
|
||||
|
||||
output wire [WIDTHN-1:0] quotient,
|
||||
output wire [WIDTHD-1:0] remainder
|
||||
output wire [WIDTHQ-1:0] quotient,
|
||||
output wire [WIDTHR-1:0] remainder
|
||||
);
|
||||
|
||||
`ifdef QUARTUS
|
||||
|
||||
wire [WIDTHN-1:0] quotient_unqual;
|
||||
wire [WIDTHD-1:0] remainder_unqual;
|
||||
|
||||
lpm_divide quartus_div (
|
||||
.clock (clk),
|
||||
.numer (numer),
|
||||
.denom (denom),
|
||||
.quotient (quotient),
|
||||
.remain (remainder),
|
||||
.quotient (quotient_unqual),
|
||||
.remain (remainder_unqual),
|
||||
.aclr (1'b0),
|
||||
.clken (1'b1)
|
||||
);
|
||||
|
@ -38,6 +43,9 @@ module VX_divide #(
|
|||
quartus_div.lpm_hint = "MAXIMIZE_SPEED=6,LPM_REMAINDERPOSITIVE=FALSE",
|
||||
quartus_div.lpm_pipeline = PIPELINE;
|
||||
|
||||
assign quotient = quotient_unqual[WIDTHQ-1:0];
|
||||
assign remainder = remainder_unqual[WIDTHR-1:0];
|
||||
|
||||
`else
|
||||
|
||||
reg [WIDTHN-1:0] quotient_unqual;
|
||||
|
@ -47,7 +55,7 @@ module VX_divide #(
|
|||
`ifndef SYNTHESIS
|
||||
// this edge case kills verilator in some cases by causing a division
|
||||
// overflow exception. INT_MIN / -1 (on x86)
|
||||
if (numer == {1'b1, (WIDTHN-1)'(0)}
|
||||
if (numer == {1'b1, (WIDTHN-1)'(1'b0)}
|
||||
&& denom == {WIDTHD{1'b1}}) begin
|
||||
quotient_unqual = 0;
|
||||
remainder_unqual = 0;
|
||||
|
@ -74,8 +82,8 @@ module VX_divide #(
|
|||
end
|
||||
|
||||
if (PIPELINE == 0) begin
|
||||
assign quotient = quotient_unqual;
|
||||
assign remainder = remainder_unqual;
|
||||
assign quotient = quotient_unqual[WIDTHQ-1:0];
|
||||
assign remainder = remainder_unqual[WIDTHR-1:0];
|
||||
end else begin
|
||||
reg [WIDTHN-1:0] quotient_pipe [0:PIPELINE-1];
|
||||
reg [WIDTHD-1:0] remainder_pipe [0:PIPELINE-1];
|
||||
|
@ -99,8 +107,8 @@ module VX_divide #(
|
|||
end
|
||||
end
|
||||
|
||||
assign quotient = quotient_pipe[PIPELINE-1];
|
||||
assign remainder = remainder_pipe[PIPELINE-1];
|
||||
assign quotient = quotient_pipe[PIPELINE-1][WIDTHQ-1:0];
|
||||
assign remainder = remainder_pipe[PIPELINE-1][WIDTHR-1:0];
|
||||
end
|
||||
|
||||
`endif
|
||||
|
|
|
@ -11,18 +11,25 @@ module VX_generic_register #(
|
|||
input wire[N-1:0] in,
|
||||
output wire[N-1:0] out
|
||||
);
|
||||
reg [(N-1):0] value;
|
||||
if (PASSTHRU) begin
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (stall)
|
||||
assign out = flush ? N'(0) : in;
|
||||
end else begin
|
||||
reg [(N-1):0] value;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
value <= 0;
|
||||
end else if (flush) begin
|
||||
value <= 0;
|
||||
end else if (~stall) begin
|
||||
value <= in;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
value <= N'(0);
|
||||
end else if (~stall) begin
|
||||
value <= in;
|
||||
end else if (flush) begin
|
||||
value <= N'(0);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign out = PASSTHRU ? in : value;
|
||||
assign out = value;
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -1,6 +1,6 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_indexable_queue #(
|
||||
module VX_index_queue #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1
|
||||
) (
|
|
@ -56,7 +56,7 @@ module VX_matrix_arbiter #(
|
|||
end
|
||||
end
|
||||
|
||||
VX_encoder_onehot #(
|
||||
VX_onehot_encoder #(
|
||||
.N(N)
|
||||
) encoder (
|
||||
.onehot (grant_onehot),
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_encoder_onehot #(
|
||||
module VX_onehot_encoder #(
|
||||
parameter N = 6
|
||||
) (
|
||||
input wire [N-1:0] onehot,
|
|
@ -1,6 +1,8 @@
|
|||
SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
|
||||
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
|
@ -12,7 +14,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
|||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
|
||||
#DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
|
||||
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/simulate
|
||||
|
@ -34,13 +36,13 @@ DBG += -DDBG_CORE_REQ_INFO
|
|||
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
|
||||
gen-s:
|
||||
verilator $(VF) -DNDEBUG -cc Vortex.v -CFLAGS '$(CF) -DNDEBUG'
|
||||
verilator $(VF) -DNDEBUG -cc Vortex.v $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
|
||||
|
||||
gen-sd:
|
||||
verilator $(VF) -cc Vortex.v -CFLAGS '$(CF) -g -O0 $(DBG)' --trace $(DBG)
|
||||
verilator $(VF) -cc Vortex.v $(SINGLECORE) -CFLAGS '$(CF) -g -O0 $(DBG) $(SINGLECORE)' --trace $(DBG)
|
||||
|
||||
gen-st:
|
||||
verilator $(VF) -DNDEBUG -cc Vortex.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
|
||||
verilator $(VF) -DNDEBUG -cc Vortex.v $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS)
|
||||
|
||||
gen-m:
|
||||
verilator $(VF) -DNDEBUG -cc Vortex.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
|
||||
|
|
|
@ -13,6 +13,9 @@ Simulator::Simulator() {
|
|||
// force random values for unitialized signals
|
||||
Verilated::randReset(2);
|
||||
|
||||
// Turn off assertion before reset
|
||||
Verilated::assertOn(false);
|
||||
|
||||
ram_ = nullptr;
|
||||
vortex_ = new VVortex();
|
||||
|
||||
|
@ -49,6 +52,9 @@ void Simulator::reset() {
|
|||
vortex_->reset = 0;
|
||||
|
||||
dram_rsp_vec_.clear();
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
}
|
||||
|
||||
void Simulator::step() {
|
||||
|
@ -60,6 +66,7 @@ void Simulator::step() {
|
|||
|
||||
this->eval_dram_bus();
|
||||
this->eval_io_bus();
|
||||
this->eval_csr_bus();
|
||||
this->eval_snp_bus();
|
||||
}
|
||||
|
||||
|
@ -157,6 +164,15 @@ void Simulator::eval_io_bus() {
|
|||
vortex_->io_rsp_valid = 0;
|
||||
}
|
||||
|
||||
void Simulator::eval_csr_bus() {
|
||||
vortex_->csr_io_req_valid = 0;
|
||||
vortex_->csr_io_req_coreid = 0;
|
||||
vortex_->csr_io_req_addr = 0;
|
||||
vortex_->csr_io_req_rw = 0;
|
||||
vortex_->csr_io_req_data = 0;
|
||||
vortex_->csr_io_rsp_ready = 1;
|
||||
}
|
||||
|
||||
void Simulator::eval_snp_bus() {
|
||||
if (snp_req_active_) {
|
||||
if (vortex_->snp_rsp_valid) {
|
||||
|
@ -241,19 +257,18 @@ bool Simulator::run() {
|
|||
|
||||
// check riscv-tests PASSED/FAILED status
|
||||
#if (NUM_CLUSTERS == 1 && NUM_CORES == 1)
|
||||
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->back_end->writeback->last_data_wb & 0xf;
|
||||
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->writeback->last_data_wb & 0xf;
|
||||
#else
|
||||
#if (NUM_CLUSTERS == 1)
|
||||
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->back_end->writeback->last_data_wb & 0xf;
|
||||
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->writeback->last_data_wb & 0xf;
|
||||
#else
|
||||
int status = (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->back_end->writeback->last_data_wb & 0xf;
|
||||
int status = (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->writeback->last_data_wb & 0xf;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return (status == 1);
|
||||
}
|
||||
|
||||
|
||||
void Simulator::load_bin(const char* program_file) {
|
||||
if (ram_ == nullptr)
|
||||
return;
|
||||
|
|
|
@ -52,6 +52,7 @@ private:
|
|||
|
||||
void eval_dram_bus();
|
||||
void eval_io_bus();
|
||||
void eval_csr_bus();
|
||||
void eval_snp_bus();
|
||||
|
||||
std::vector<dram_req_t> dram_rsp_vec_;
|
||||
|
|
|
@ -5,12 +5,12 @@
|
|||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
//#define ALL_TESTS
|
||||
#define ALL_TESTS
|
||||
#ifdef ALL_TESTS
|
||||
bool passed = true;
|
||||
|
||||
std::string tests[] = {
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-add.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-add.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-addi.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-and.hex",
|
||||
"../../../benchmarks/riscv_tests/rv32ui-p-andi.hex",
|
||||
|
@ -67,12 +67,14 @@ int main(int argc, char **argv)
|
|||
Simulator simulator;
|
||||
simulator.attach_ram(&ram);
|
||||
simulator.load_ihex(test.c_str());
|
||||
bool curr = simulator.run();
|
||||
bool status = simulator.run();
|
||||
|
||||
if (curr) std::cerr << GREEN << "Test Passed: " << test << std::endl;
|
||||
if (!curr) std::cerr << RED << "Test Failed: " << test << std::endl;
|
||||
if (status) std::cerr << GREEN << "Test Passed: " << test << std::endl;
|
||||
if (!status) std::cerr << RED << "Test Failed: " << test << std::endl;
|
||||
std::cerr << DEFAULT;
|
||||
passed = passed && curr;
|
||||
passed = passed && status;
|
||||
if (!passed)
|
||||
break;
|
||||
}
|
||||
|
||||
std::cerr << DEFAULT << "\n***************************************\n";
|
||||
|
@ -95,12 +97,12 @@ int main(int argc, char **argv)
|
|||
Simulator simulator;
|
||||
simulator.attach_ram(&ram);
|
||||
simulator.load_ihex(test);
|
||||
bool curr = simulator.run();
|
||||
bool status = simulator.run();
|
||||
|
||||
if (curr) std::cerr << GREEN << "Test Passed: " << test << std::endl;
|
||||
if (!curr) std::cerr << RED << "Test Failed: " << test << std::endl;
|
||||
if (status) std::cerr << GREEN << "Test Passed: " << test << std::endl;
|
||||
if (!status) std::cerr << RED << "Test Failed: " << test << std::endl;
|
||||
|
||||
return !curr;
|
||||
return !status;
|
||||
|
||||
#endif
|
||||
}
|
|
@ -2,13 +2,13 @@
|
|||
|
||||
module VX_tb_divide();
|
||||
|
||||
`ifdef TRACE
|
||||
`ifdef TRACE
|
||||
initial
|
||||
begin
|
||||
$dumpfile("trace.vcd");
|
||||
$dumpvars(0,test);
|
||||
end
|
||||
`endif
|
||||
`endif
|
||||
|
||||
reg clk;
|
||||
reg rst;
|
||||
|
@ -23,6 +23,8 @@ module VX_tb_divide();
|
|||
VX_divide#(
|
||||
.WIDTHN(32),
|
||||
.WIDTHD(32),
|
||||
.WIDTHQ(32),
|
||||
.WIDTHR(32),
|
||||
.PIPELINE(i)
|
||||
) div(
|
||||
.clock(clk),
|
||||
|
@ -157,4 +159,4 @@ module VX_tb_divide();
|
|||
always #1
|
||||
clk = !clk;
|
||||
|
||||
endmodule: VX_tb_divide
|
||||
endmodule
|
Loading…
Add table
Add a link
Reference in a new issue