mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
ibuffer redesign to reduce critical path
This commit is contained in:
parent
3e645cee32
commit
964046dc31
9 changed files with 200 additions and 445 deletions
|
@ -5,7 +5,8 @@ module VX_dispatch (
|
|||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_dispatch_if.slave dispatch_if,
|
||||
VX_ibuffer_if.slave ibuffer_if,
|
||||
VX_gpr_stage_if.slave gpr_stage_if,
|
||||
|
||||
// outputs
|
||||
VX_alu_exe_if.master alu_exe_if,
|
||||
|
@ -32,17 +33,17 @@ module VX_dispatch (
|
|||
.N (`NUM_THREADS),
|
||||
.REVERSE (1)
|
||||
) tid_select (
|
||||
.data_in (dispatch_if.tmask),
|
||||
.data_in (ibuffer_if.tmask),
|
||||
.data_out (tid),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire [`XLEN-1:0] next_PC = dispatch_if.PC + 4;
|
||||
wire [`XLEN-1:0] next_PC = ibuffer_if.PC + 4;
|
||||
|
||||
// ALU unit
|
||||
|
||||
wire alu_req_valid = dispatch_if.valid && (dispatch_if.ex_type == `EX_ALU);
|
||||
wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(dispatch_if.op_type);
|
||||
wire alu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_ALU);
|
||||
wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(ibuffer_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `XLEN + `INST_ALU_BITS + `INST_MOD_BITS + `XLEN + 1 + 1 + `NR_BITS + 1 + `UP(`NT_BITS) + (2 * `NUM_THREADS * `XLEN)),
|
||||
|
@ -52,16 +53,16 @@ module VX_dispatch (
|
|||
.reset (reset),
|
||||
.valid_in (alu_req_valid),
|
||||
.ready_in (alu_req_ready),
|
||||
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, next_PC, alu_op_type, dispatch_if.op_mod, dispatch_if.imm, dispatch_if.use_PC, dispatch_if.use_imm, dispatch_if.rd, dispatch_if.wb, tid, dispatch_if.rs1_data, dispatch_if.rs2_data}),
|
||||
.data_out ({alu_exe_if.uuid, alu_exe_if.wid, alu_exe_if.tmask, alu_exe_if.PC, alu_exe_if.next_PC, alu_exe_if.op_type, alu_exe_if.op_mod, alu_exe_if.imm, alu_exe_if.use_PC, alu_exe_if.use_imm, alu_exe_if.rd, alu_exe_if.wb, alu_exe_if.tid, alu_exe_if.rs1_data, alu_exe_if.rs2_data}),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, alu_op_type, ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_stage_if.rs1_data, gpr_stage_if.rs2_data}),
|
||||
.data_out ({alu_exe_if.uuid, alu_exe_if.wid, alu_exe_if.tmask, alu_exe_if.PC, alu_exe_if.next_PC, alu_exe_if.op_type, alu_exe_if.op_mod, alu_exe_if.imm, alu_exe_if.use_PC, alu_exe_if.use_imm, alu_exe_if.rd, alu_exe_if.wb, alu_exe_if.tid, alu_exe_if.rs1_data, alu_exe_if.rs2_data}),
|
||||
.valid_out (alu_exe_if.valid),
|
||||
.ready_out (alu_exe_if.ready)
|
||||
);
|
||||
|
||||
// lsu unit
|
||||
|
||||
wire lsu_req_valid = dispatch_if.valid && (dispatch_if.ex_type == `EX_LSU);
|
||||
wire [`INST_LSU_BITS-1:0] lsu_op_type = `INST_LSU_BITS'(dispatch_if.op_type);
|
||||
wire lsu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_LSU);
|
||||
wire [`INST_LSU_BITS-1:0] lsu_op_type = `INST_LSU_BITS'(ibuffer_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `INST_LSU_BITS + `XLEN + `NR_BITS + 1 + `NUM_THREADS*`XLEN + `NUM_THREADS*`XLEN),
|
||||
|
@ -71,22 +72,22 @@ module VX_dispatch (
|
|||
.reset (reset),
|
||||
.valid_in (lsu_req_valid),
|
||||
.ready_in (lsu_req_ready),
|
||||
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, lsu_op_type, dispatch_if.imm, dispatch_if.rd, dispatch_if.wb, dispatch_if.rs1_data, dispatch_if.rs2_data}),
|
||||
.data_out ({lsu_exe_if.uuid, lsu_exe_if.wid, lsu_exe_if.tmask, lsu_exe_if.PC, lsu_exe_if.op_type, lsu_exe_if.offset, lsu_exe_if.rd, lsu_exe_if.wb, lsu_exe_if.base_addr, lsu_exe_if.store_data}),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, lsu_op_type, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_stage_if.rs1_data, gpr_stage_if.rs2_data}),
|
||||
.data_out ({lsu_exe_if.uuid, lsu_exe_if.wid, lsu_exe_if.tmask, lsu_exe_if.PC, lsu_exe_if.op_type, lsu_exe_if.offset, lsu_exe_if.rd, lsu_exe_if.wb, lsu_exe_if.base_addr, lsu_exe_if.store_data}),
|
||||
.valid_out (lsu_exe_if.valid),
|
||||
.ready_out (lsu_exe_if.ready)
|
||||
);
|
||||
|
||||
// csr unit
|
||||
|
||||
wire csr_req_valid = dispatch_if.valid && (dispatch_if.ex_type == `EX_CSR);
|
||||
wire [`INST_CSR_BITS-1:0] csr_op_type = `INST_CSR_BITS'(dispatch_if.op_type);
|
||||
wire [`VX_CSR_ADDR_BITS-1:0] csr_addr = dispatch_if.imm[`VX_CSR_ADDR_BITS-1:0];
|
||||
wire [`NRI_BITS-1:0] csr_imm = dispatch_if.imm[`VX_CSR_ADDR_BITS +: `NRI_BITS];
|
||||
wire csr_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_CSR);
|
||||
wire [`INST_CSR_BITS-1:0] csr_op_type = `INST_CSR_BITS'(ibuffer_if.op_type);
|
||||
wire [`VX_CSR_ADDR_BITS-1:0] csr_addr = ibuffer_if.imm[`VX_CSR_ADDR_BITS-1:0];
|
||||
wire [`NRI_BITS-1:0] csr_imm = ibuffer_if.imm[`VX_CSR_ADDR_BITS +: `NRI_BITS];
|
||||
wire [`NUM_THREADS-1:0][31:0] csr_data;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign csr_data[i] = dispatch_if.rs1_data[i][31:0];
|
||||
assign csr_data[i] = gpr_stage_if.rs1_data[i][31:0];
|
||||
end
|
||||
|
||||
VX_skid_buffer #(
|
||||
|
@ -97,8 +98,8 @@ module VX_dispatch (
|
|||
.reset (reset),
|
||||
.valid_in (csr_req_valid),
|
||||
.ready_in (csr_req_ready),
|
||||
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, csr_op_type, csr_addr, dispatch_if.rd, dispatch_if.wb, dispatch_if.use_imm, csr_imm, tid, csr_data}),
|
||||
.data_out ({csr_exe_if.uuid, csr_exe_if.wid, csr_exe_if.tmask, csr_exe_if.PC, csr_exe_if.op_type, csr_exe_if.addr, csr_exe_if.rd, csr_exe_if.wb, csr_exe_if.use_imm, csr_exe_if.imm, csr_exe_if.tid, csr_exe_if.rs1_data}),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, csr_op_type, csr_addr, ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, csr_imm, tid, csr_data}),
|
||||
.data_out ({csr_exe_if.uuid, csr_exe_if.wid, csr_exe_if.tmask, csr_exe_if.PC, csr_exe_if.op_type, csr_exe_if.addr, csr_exe_if.rd, csr_exe_if.wb, csr_exe_if.use_imm, csr_exe_if.imm, csr_exe_if.tid, csr_exe_if.rs1_data}),
|
||||
.valid_out (csr_exe_if.valid),
|
||||
.ready_out (csr_exe_if.ready)
|
||||
);
|
||||
|
@ -106,10 +107,10 @@ module VX_dispatch (
|
|||
// fpu unit
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire fpu_req_valid = dispatch_if.valid && (dispatch_if.ex_type == `EX_FPU);
|
||||
wire [`INST_FPU_BITS-1:0] fpu_op_type = `INST_FPU_BITS'(dispatch_if.op_type);
|
||||
wire [`INST_FMT_BITS-1:0] fpu_fmt = dispatch_if.imm[`INST_FMT_BITS-1:0];
|
||||
wire [`INST_FRM_BITS-1:0] fpu_frm = dispatch_if.op_mod[`INST_FRM_BITS-1:0];
|
||||
wire fpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_FPU);
|
||||
wire [`INST_FPU_BITS-1:0] fpu_op_type = `INST_FPU_BITS'(ibuffer_if.op_type);
|
||||
wire [`INST_FMT_BITS-1:0] fpu_fmt = ibuffer_if.imm[`INST_FMT_BITS-1:0];
|
||||
wire [`INST_FRM_BITS-1:0] fpu_frm = ibuffer_if.op_mod[`INST_FRM_BITS-1:0];
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `INST_FPU_BITS + `INST_FMT_BITS + `INST_FRM_BITS + `NR_BITS + (3 * `NUM_THREADS * `XLEN)),
|
||||
|
@ -119,19 +120,19 @@ module VX_dispatch (
|
|||
.reset (reset),
|
||||
.valid_in (fpu_req_valid),
|
||||
.ready_in (fpu_req_ready),
|
||||
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, fpu_op_type, fpu_fmt, fpu_frm, dispatch_if.rd, dispatch_if.rs1_data, dispatch_if.rs2_data, dispatch_if.rs3_data}),
|
||||
.data_out ({fpu_exe_if.uuid, fpu_exe_if.wid, fpu_exe_if.tmask, fpu_exe_if.PC, fpu_exe_if.op_type, fpu_exe_if.fmt, fpu_exe_if.frm, fpu_exe_if.rd, fpu_exe_if.rs1_data, fpu_exe_if.rs2_data, fpu_exe_if.rs3_data}),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, fpu_op_type, fpu_fmt, fpu_frm, ibuffer_if.rd, gpr_stage_if.rs1_data, gpr_stage_if.rs2_data, gpr_stage_if.rs3_data}),
|
||||
.data_out ({fpu_exe_if.uuid, fpu_exe_if.wid, fpu_exe_if.tmask, fpu_exe_if.PC, fpu_exe_if.op_type, fpu_exe_if.fmt, fpu_exe_if.frm, fpu_exe_if.rd, fpu_exe_if.rs1_data, fpu_exe_if.rs2_data, fpu_exe_if.rs3_data}),
|
||||
.valid_out (fpu_exe_if.valid),
|
||||
.ready_out (fpu_exe_if.ready)
|
||||
);
|
||||
`else
|
||||
`UNUSED_VAR (dispatch_if.rs3_data)
|
||||
`UNUSED_VAR (gpr_stage_if.rs3_data)
|
||||
`endif
|
||||
|
||||
// gpu unit
|
||||
|
||||
wire gpu_req_valid = dispatch_if.valid && (dispatch_if.ex_type == `EX_GPU);
|
||||
wire [`INST_GPU_BITS-1:0] gpu_op_type = `INST_GPU_BITS'(dispatch_if.op_type);
|
||||
wire gpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_GPU);
|
||||
wire [`INST_GPU_BITS-1:0] gpu_op_type = `INST_GPU_BITS'(ibuffer_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `XLEN + `INST_GPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + `UP(`NT_BITS) + (3 * `NUM_THREADS * `XLEN)),
|
||||
|
@ -141,8 +142,8 @@ module VX_dispatch (
|
|||
.reset (reset),
|
||||
.valid_in (gpu_req_valid),
|
||||
.ready_in (gpu_req_ready),
|
||||
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, next_PC, gpu_op_type, dispatch_if.op_mod, dispatch_if.rd, dispatch_if.wb, tid, dispatch_if.rs1_data, dispatch_if.rs2_data, dispatch_if.rs3_data}),
|
||||
.data_out ({gpu_exe_if.uuid, gpu_exe_if.wid, gpu_exe_if.tmask, gpu_exe_if.PC, gpu_exe_if.next_PC, gpu_exe_if.op_type, gpu_exe_if.op_mod, gpu_exe_if.rd, gpu_exe_if.wb, gpu_exe_if.tid, gpu_exe_if.rs1_data, gpu_exe_if.rs2_data, gpu_exe_if.rs3_data}),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, gpu_op_type, ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_stage_if.rs1_data, gpr_stage_if.rs2_data, gpr_stage_if.rs3_data}),
|
||||
.data_out ({gpu_exe_if.uuid, gpu_exe_if.wid, gpu_exe_if.tmask, gpu_exe_if.PC, gpu_exe_if.next_PC, gpu_exe_if.op_type, gpu_exe_if.op_mod, gpu_exe_if.rd, gpu_exe_if.wb, gpu_exe_if.tid, gpu_exe_if.rs1_data, gpu_exe_if.rs2_data, gpu_exe_if.rs3_data}),
|
||||
.valid_out (gpu_exe_if.valid),
|
||||
.ready_out (gpu_exe_if.ready)
|
||||
);
|
||||
|
@ -150,7 +151,7 @@ module VX_dispatch (
|
|||
// can take next request?
|
||||
reg ready_r;
|
||||
always @(*) begin
|
||||
case (dispatch_if.ex_type)
|
||||
case (ibuffer_if.ex_type)
|
||||
`EX_LSU: ready_r = lsu_req_ready;
|
||||
`EX_CSR: ready_r = csr_req_ready;
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -161,6 +162,6 @@ module VX_dispatch (
|
|||
default: ready_r = alu_req_ready;
|
||||
endcase
|
||||
end
|
||||
assign dispatch_if.ready = ready_r;
|
||||
assign ibuffer_if.ready = ready_r;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -7,6 +7,7 @@ module VX_gpr_stage #(
|
|||
input wire reset,
|
||||
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_ibuffer_if.gpr ibuffer_if,
|
||||
VX_gpr_stage_if.slave gpr_stage_if
|
||||
);
|
||||
|
||||
|
@ -27,14 +28,14 @@ module VX_gpr_stage #(
|
|||
wire [RAM_ADDRW-1:0] waddr, raddr1, raddr2;
|
||||
if (`NUM_WARPS > 1) begin
|
||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
||||
assign raddr1 = {gpr_stage_if.wid, gpr_stage_if.rs1};
|
||||
assign raddr2 = {gpr_stage_if.wid, gpr_stage_if.rs2};
|
||||
assign raddr1 = {ibuffer_if.wid, ibuffer_if.rs1};
|
||||
assign raddr2 = {ibuffer_if.wid, ibuffer_if.rs2};
|
||||
end else begin
|
||||
`UNUSED_VAR (writeback_if.wid)
|
||||
`UNUSED_VAR (gpr_stage_if.wid)
|
||||
`UNUSED_VAR (ibuffer_if.wid)
|
||||
assign waddr = writeback_if.rd;
|
||||
assign raddr1 = gpr_stage_if.rs1;
|
||||
assign raddr2 = gpr_stage_if.rs2;
|
||||
assign raddr1 = ibuffer_if.rs1;
|
||||
assign raddr2 = ibuffer_if.rs2;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
@ -72,9 +73,9 @@ module VX_gpr_stage #(
|
|||
`ifdef EXT_F_ENABLE
|
||||
wire [RAM_ADDRW-1:0] raddr3;
|
||||
if (`NUM_WARPS > 1) begin
|
||||
assign raddr3 = {gpr_stage_if.wid, gpr_stage_if.rs3};
|
||||
assign raddr3 = {ibuffer_if.wid, ibuffer_if.rs3};
|
||||
end else begin
|
||||
assign raddr3 = gpr_stage_if.rs3;
|
||||
assign raddr3 = ibuffer_if.rs3;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
@ -94,7 +95,7 @@ module VX_gpr_stage #(
|
|||
);
|
||||
end
|
||||
`else
|
||||
`UNUSED_VAR (gpr_stage_if.rs3)
|
||||
`UNUSED_VAR (ibuffer_if.rs3)
|
||||
assign gpr_stage_if.rs3_data = '0;
|
||||
`endif
|
||||
|
||||
|
|
|
@ -10,179 +10,19 @@ module VX_ibuffer #(
|
|||
VX_decode_if.slave decode_if,
|
||||
|
||||
// outputs
|
||||
VX_scoreboard_if.master scoreboard_if,
|
||||
VX_ibuffer_if.master ibuffer_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam NW_WIDTH = `UP(`NW_BITS);
|
||||
localparam SIZE = (`IBUF_SIZE + 1);
|
||||
localparam ALM_FULL = SIZE - 1;
|
||||
localparam ALM_EMPTY = 1;
|
||||
|
||||
localparam DATAW = `UP(`UUID_BITS) + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + (`NR_BITS * 4) + `XLEN + 1 + 1;
|
||||
localparam ADDRW = $clog2(SIZE);
|
||||
localparam NWARPSW = $clog2(`NUM_WARPS+1);
|
||||
localparam NW_WIDTH = `UP(`NW_BITS);
|
||||
localparam DATAW = `UP(`UUID_BITS) + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + (`NR_BITS * 4) + `XLEN + 1 + 1;
|
||||
|
||||
`STATIC_ASSERT ((`IBUF_SIZE > 1), ("invalid parameter"))
|
||||
|
||||
wire [`NUM_WARPS-1:0] q_full, q_empty, q_alm_full, q_alm_empty;
|
||||
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
|
||||
wire [DATAW-1:0] q_data_in;
|
||||
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
|
||||
reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
|
||||
|
||||
wire enq_fire = decode_if.valid && decode_if.ready;
|
||||
wire deq_fire = ibuffer_if.valid && ibuffer_if.ready;
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
reg [ADDRW-1:0] used_r;
|
||||
reg full_r, empty_r, alm_full_r, alm_empty_r;
|
||||
|
||||
wire push = enq_fire && (i == decode_if.wid);
|
||||
wire pop = deq_fire && (i == ibuffer_if.wid);
|
||||
|
||||
wire going_empty = empty_r || (alm_empty_r && pop);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE-1),
|
||||
.OUT_REG (1)
|
||||
) queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (push && !going_empty),
|
||||
.data_in (q_data_in),
|
||||
.ready_out(pop),
|
||||
.data_out (q_data_prev[i]),
|
||||
`UNUSED_PIN (ready_in),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
used_r <= '0;
|
||||
full_r <= 0;
|
||||
alm_full_r <= 0;
|
||||
empty_r <= 1;
|
||||
alm_empty_r <= 1;
|
||||
end else begin
|
||||
if (push) begin
|
||||
if (!pop) begin
|
||||
empty_r <= 0;
|
||||
if (used_r == ADDRW'(ALM_EMPTY))
|
||||
alm_empty_r <= 0;
|
||||
if (used_r == ADDRW'(SIZE-1))
|
||||
full_r <= 1;
|
||||
if (used_r == ADDRW'(ALM_FULL-1))
|
||||
alm_full_r <= 1;
|
||||
end
|
||||
end else if (pop) begin
|
||||
full_r <= 0;
|
||||
if (used_r == ADDRW'(ALM_FULL))
|
||||
alm_full_r <= 0;
|
||||
if (used_r == ADDRW'(1))
|
||||
empty_r <= 1;
|
||||
if (used_r == ADDRW'(ALM_EMPTY+1))
|
||||
alm_empty_r <= 1;
|
||||
end
|
||||
used_r <= $signed(used_r) + ADDRW'($signed(2'(push) - 2'(pop)));
|
||||
end
|
||||
|
||||
if (push && going_empty) begin
|
||||
q_data_out[i] <= q_data_in;
|
||||
end else if (pop) begin
|
||||
q_data_out[i] <= q_data_prev[i];
|
||||
end
|
||||
end
|
||||
wire [`NUM_WARPS-1:0] q_full, q_empty;
|
||||
wire [`NUM_WARPS-1:0] deq_valid_in, deq_ready_in;
|
||||
|
||||
assign q_full[i] = full_r;
|
||||
assign q_empty[i] = empty_r;
|
||||
assign q_alm_full[i] = alm_full_r;
|
||||
assign q_alm_empty[i] = alm_empty_r;
|
||||
end
|
||||
|
||||
`UNUSED_VAR (q_alm_full)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
reg [`NUM_WARPS-1:0] valid_table, valid_table_n;
|
||||
reg [NW_WIDTH-1:0] deq_wid, deq_wid_n;
|
||||
reg [NW_WIDTH-1:0] deq_wid_rr, deq_wid_rr_n;
|
||||
reg deq_valid, deq_valid_n;
|
||||
reg [DATAW-1:0] deq_instr, deq_instr_n;
|
||||
reg [NWARPSW-1:0] num_warps;
|
||||
|
||||
`UNUSED_VAR (deq_instr)
|
||||
|
||||
// calculate valid table
|
||||
always @(*) begin
|
||||
valid_table_n = valid_table;
|
||||
if (deq_fire) begin
|
||||
valid_table_n[deq_wid] = !q_alm_empty[deq_wid];
|
||||
end
|
||||
if (enq_fire) begin
|
||||
valid_table_n[decode_if.wid] = 1;
|
||||
end
|
||||
end
|
||||
|
||||
// round-robin warp scheduling
|
||||
VX_rr_arbiter #(
|
||||
.NUM_REQS (`NUM_WARPS)
|
||||
) rr_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_table_n),
|
||||
.grant_index (deq_wid_rr_n),
|
||||
`UNUSED_PIN (grant_valid),
|
||||
`UNUSED_PIN (grant_onehot),
|
||||
`UNUSED_PIN (unlock)
|
||||
);
|
||||
|
||||
// schedule the next instruction to issue
|
||||
always @(*) begin
|
||||
if (num_warps > 1) begin
|
||||
deq_valid_n = 1;
|
||||
deq_wid_n = deq_wid_rr;
|
||||
deq_instr_n = q_data_out[deq_wid_rr];
|
||||
end else if (1 == num_warps && !(deq_fire && q_alm_empty[deq_wid])) begin
|
||||
deq_valid_n = 1;
|
||||
deq_wid_n = deq_wid;
|
||||
deq_instr_n = deq_fire ? q_data_prev[deq_wid] : q_data_out[deq_wid];
|
||||
end else begin
|
||||
deq_valid_n = enq_fire;
|
||||
deq_wid_n = decode_if.wid;
|
||||
deq_instr_n = q_data_in;
|
||||
end
|
||||
end
|
||||
|
||||
wire warp_added = enq_fire && q_empty[decode_if.wid];
|
||||
wire warp_removed = deq_fire && q_alm_empty[deq_wid] && ~(enq_fire && decode_if.wid == deq_wid);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_table <= '0;
|
||||
deq_valid <= 0;
|
||||
num_warps <= '0;
|
||||
end else begin
|
||||
valid_table <= valid_table_n;
|
||||
deq_valid <= deq_valid_n;
|
||||
if (warp_added && !warp_removed) begin
|
||||
num_warps <= num_warps + NWARPSW'(1);
|
||||
end else if (warp_removed && !warp_added) begin
|
||||
num_warps <= num_warps - NWARPSW'(1);
|
||||
end
|
||||
end
|
||||
deq_wid <= deq_wid_n;
|
||||
deq_wid_rr <= deq_wid_rr_n;
|
||||
deq_instr <= deq_instr_n;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
assign decode_if.ibuf_pop[i] = deq_fire && (ibuffer_if.wid == NW_WIDTH'(i));
|
||||
end
|
||||
|
||||
assign decode_if.ready = ~q_full[decode_if.wid];
|
||||
|
||||
assign q_data_in = {decode_if.uuid,
|
||||
decode_if.tmask,
|
||||
decode_if.PC,
|
||||
|
@ -198,9 +38,68 @@ module VX_ibuffer #(
|
|||
decode_if.rs2,
|
||||
decode_if.rs3};
|
||||
|
||||
assign ibuffer_if.valid = deq_valid;
|
||||
assign ibuffer_if.wid = deq_wid;
|
||||
assign {ibuffer_if.uuid,
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
wire q_push = decode_if.valid && decode_if.ready && (i == decode_if.wid);
|
||||
wire q_pop = deq_valid_in[i] && deq_ready_in[i];
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (DATAW),
|
||||
.DEPTH (`IBUF_SIZE),
|
||||
.OUT_REG (1)
|
||||
) inst_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (q_push),
|
||||
.pop (q_pop),
|
||||
.data_in (q_data_in),
|
||||
.data_out (q_data_out[i]),
|
||||
.full (q_full[i]),
|
||||
.empty (q_empty[i]),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign decode_if.ibuf_pop[i] = q_pop;
|
||||
end
|
||||
|
||||
assign decode_if.ready = ~q_full[decode_if.wid];
|
||||
|
||||
// scoreboad access
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
assign scoreboard_if.valid[i] = ~q_empty[i];
|
||||
assign scoreboard_if.rd[i] = q_data_out[i][3*`NR_BITS +: `NR_BITS];
|
||||
assign scoreboard_if.rs1[i] = q_data_out[i][2*`NR_BITS +: `NR_BITS];
|
||||
assign scoreboard_if.rs2[i] = q_data_out[i][1*`NR_BITS +: `NR_BITS];
|
||||
assign scoreboard_if.rs3[i] = q_data_out[i][0*`NR_BITS +: `NR_BITS];
|
||||
end
|
||||
|
||||
// round-robin select
|
||||
|
||||
wire [`NUM_WARPS-1:0][(NW_WIDTH+DATAW)-1:0] deq_data_in;
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
assign deq_valid_in[i] = scoreboard_if.valid[i] && scoreboard_if.ready[i];
|
||||
assign deq_data_in[i] = {NW_WIDTH'(i), q_data_out[i]};
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (`NUM_WARPS),
|
||||
.DATAW (NW_WIDTH+DATAW),
|
||||
.ARBITER ("R"),
|
||||
.LOCK_ENABLE (1),
|
||||
.BUFFERED (3)
|
||||
) req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (deq_valid_in),
|
||||
.ready_in (deq_ready_in),
|
||||
.data_in (deq_data_in),
|
||||
.data_out ({
|
||||
ibuffer_if.wid,
|
||||
ibuffer_if.uuid,
|
||||
ibuffer_if.tmask,
|
||||
ibuffer_if.PC,
|
||||
ibuffer_if.ex_type,
|
||||
|
@ -213,13 +112,9 @@ module VX_ibuffer #(
|
|||
ibuffer_if.rd,
|
||||
ibuffer_if.rs1,
|
||||
ibuffer_if.rs2,
|
||||
ibuffer_if.rs3} = deq_instr;
|
||||
|
||||
// scoreboard forwarding
|
||||
assign ibuffer_if.wid_n = deq_wid_n;
|
||||
assign ibuffer_if.rd_n = deq_instr_n[3*`NR_BITS +: `NR_BITS];
|
||||
assign ibuffer_if.rs1_n = deq_instr_n[2*`NR_BITS +: `NR_BITS];
|
||||
assign ibuffer_if.rs2_n = deq_instr_n[1*`NR_BITS +: `NR_BITS];
|
||||
assign ibuffer_if.rs3_n = deq_instr_n[0*`NR_BITS +: `NR_BITS];
|
||||
ibuffer_if.rs3}),
|
||||
.valid_out (ibuffer_if.valid),
|
||||
.ready_out (ibuffer_if.ready)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -27,50 +27,9 @@ module VX_issue #(
|
|||
VX_gpu_exe_if.master gpu_exe_if
|
||||
);
|
||||
VX_ibuffer_if ibuffer_if();
|
||||
VX_gpr_stage_if gpr_stage_if();
|
||||
VX_scoreboard_if scoreboard_if();
|
||||
VX_dispatch_if dispatch_if();
|
||||
|
||||
// scoreboard interface
|
||||
assign scoreboard_if.valid = ibuffer_if.valid && dispatch_if.ready;
|
||||
assign scoreboard_if.uuid = ibuffer_if.uuid;
|
||||
assign scoreboard_if.wid = ibuffer_if.wid;
|
||||
assign scoreboard_if.tmask = ibuffer_if.tmask;
|
||||
assign scoreboard_if.PC = ibuffer_if.PC;
|
||||
assign scoreboard_if.wb = ibuffer_if.wb;
|
||||
assign scoreboard_if.rd = ibuffer_if.rd;
|
||||
assign scoreboard_if.rd_n = ibuffer_if.rd_n;
|
||||
assign scoreboard_if.rs1_n = ibuffer_if.rs1_n;
|
||||
assign scoreboard_if.rs2_n = ibuffer_if.rs2_n;
|
||||
assign scoreboard_if.rs3_n = ibuffer_if.rs3_n;
|
||||
assign scoreboard_if.wid_n = ibuffer_if.wid_n;
|
||||
|
||||
// GPR request interface
|
||||
assign gpr_stage_if.wid = ibuffer_if.wid;
|
||||
assign gpr_stage_if.rs1 = ibuffer_if.rs1;
|
||||
assign gpr_stage_if.rs2 = ibuffer_if.rs2;
|
||||
assign gpr_stage_if.rs3 = ibuffer_if.rs3;
|
||||
|
||||
// dispatch interface
|
||||
assign dispatch_if.valid = ibuffer_if.valid && scoreboard_if.ready;
|
||||
assign dispatch_if.uuid = ibuffer_if.uuid;
|
||||
assign dispatch_if.wid = ibuffer_if.wid;
|
||||
assign dispatch_if.tmask = ibuffer_if.tmask;
|
||||
assign dispatch_if.PC = ibuffer_if.PC;
|
||||
assign dispatch_if.ex_type = ibuffer_if.ex_type;
|
||||
assign dispatch_if.op_type = ibuffer_if.op_type;
|
||||
assign dispatch_if.op_mod = ibuffer_if.op_mod;
|
||||
assign dispatch_if.wb = ibuffer_if.wb;
|
||||
assign dispatch_if.use_PC = ibuffer_if.use_PC;
|
||||
assign dispatch_if.use_imm = ibuffer_if.use_imm;
|
||||
assign dispatch_if.imm = ibuffer_if.imm;
|
||||
assign dispatch_if.rd = ibuffer_if.rd;
|
||||
assign dispatch_if.rs1_data = gpr_stage_if.rs1_data;
|
||||
assign dispatch_if.rs2_data = gpr_stage_if.rs2_data;
|
||||
assign dispatch_if.rs3_data = gpr_stage_if.rs3_data;
|
||||
|
||||
// issue the instruction
|
||||
assign ibuffer_if.ready = scoreboard_if.ready && dispatch_if.ready;
|
||||
VX_gpr_stage_if gpr_stage_if();
|
||||
wire [3:0] used_regs;
|
||||
|
||||
`RESET_RELAY (ibuf_reset, reset);
|
||||
`RESET_RELAY (scoreboard_reset, reset);
|
||||
|
@ -83,31 +42,36 @@ module VX_issue #(
|
|||
.clk (clk),
|
||||
.reset (ibuf_reset),
|
||||
.decode_if (decode_if),
|
||||
.scoreboard_if (scoreboard_if),
|
||||
.ibuffer_if (ibuffer_if)
|
||||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (scoreboard_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.scoreboard_if (scoreboard_if)
|
||||
.clk (clk),
|
||||
.reset (scoreboard_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.scoreboard_if (scoreboard_if),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.used_regs (used_regs)
|
||||
);
|
||||
|
||||
VX_gpr_stage #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) gpr_stage (
|
||||
.clk (clk),
|
||||
.reset (gpr_reset),
|
||||
.clk (clk),
|
||||
.reset (gpr_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.gpr_stage_if (gpr_stage_if)
|
||||
);
|
||||
|
||||
VX_dispatch dispatch (
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
.dispatch_if(dispatch_if),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.gpr_stage_if (gpr_stage_if),
|
||||
.alu_exe_if (alu_exe_if),
|
||||
.lsu_exe_if (lsu_exe_if),
|
||||
.csr_exe_if (csr_exe_if),
|
||||
|
@ -128,7 +92,7 @@ module VX_issue #(
|
|||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
`TRACE(3, ("%d: *** core%0d-stall: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, wb=%0d, cycles=%0d, inuse=%b%b%b%b, dispatch=%b (#%0d)\n",
|
||||
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.tmask, ibuffer_if.rd, ibuffer_if.wb, timeout_ctr,
|
||||
scoreboard_if.used_regs[0], scoreboard_if.used_regs[1], scoreboard_if.used_regs[2], scoreboard_if.used_regs[3], ~dispatch_if.ready, ibuffer_if.uuid));
|
||||
used_regs[0], used_regs[1], used_regs[2], used_regs[3], ~ibuffer_if.ready, ibuffer_if.uuid));
|
||||
`endif
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
end else if (ibuffer_if_fire) begin
|
||||
|
@ -139,14 +103,14 @@ module VX_issue #(
|
|||
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT,
|
||||
("%t: *** core%0d-issue-timeout: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, wb=%0d, inuse=%b%b%b%b, dispatch=%b (#%0d)",
|
||||
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.tmask, ibuffer_if.rd, ibuffer_if.wb,
|
||||
scoreboard_if.used_regs[0], scoreboard_if.used_regs[1], scoreboard_if.used_regs[2], scoreboard_if.used_regs[3], ~dispatch_if.ready, ibuffer_if.uuid));
|
||||
used_regs[0], used_regs[1], used_regs[2], used_regs[3], ~ibuffer_if.ready, ibuffer_if.uuid));
|
||||
|
||||
`ifdef DBG_SCOPE_ISSUE
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
localparam UUID_WIDTH = `UP(`UUID_BITS);
|
||||
wire scoreboard_if_not_ready = ~scoreboard_if.ready;
|
||||
wire dispatch_if_not_ready = ~dispatch_if.ready;
|
||||
wire ibuffer_if_not_ready = ~ibuffer_if.ready;
|
||||
wire writeback_if_valid = writeback_if.valid;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (2),
|
||||
|
@ -163,7 +127,7 @@ module VX_issue #(
|
|||
reset,
|
||||
ibuffer_if_fire,
|
||||
scoreboard_if_not_ready,
|
||||
dispatch_if_not_ready,
|
||||
ibuffer_if_not_ready,
|
||||
writeback_if_valid
|
||||
}),
|
||||
.probes({
|
||||
|
@ -180,9 +144,9 @@ module VX_issue #(
|
|||
ibuffer_if.imm,
|
||||
ibuffer_if.use_PC,
|
||||
ibuffer_if.use_imm,
|
||||
dispatch_if.rs1_data,
|
||||
dispatch_if.rs2_data,
|
||||
dispatch_if.rs3_data,
|
||||
ibuffer_if.rs1_data,
|
||||
ibuffer_if.rs2_data,
|
||||
ibuffer_if.rs3_data,
|
||||
writeback_if.uuid,
|
||||
writeback_if.tmask,
|
||||
writeback_if.rd,
|
||||
|
@ -196,7 +160,7 @@ module VX_issue #(
|
|||
`ifdef CHIPSCOPE
|
||||
ila_issue ila_issue_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({ibuffer_if.uuid, ibuffer.rs3, ibuffer.rs2, ibuffer.rs1, ibuffer_if.PC, ibuffer_if.tmask, ibuffer_if.wid, ibuffer_if.ex_type, ibuffer_if.op_type, ibuffer_if.ready, ibuffer_if.valid, scoreboard_if.used_regs, scoreboard_if.ready, dispatch_if.ready, ibuffer_if.ready, ibuffer_if.valid}),
|
||||
.probe0 ({ibuffer_if.uuid, ibuffer.rs3, ibuffer.rs2, ibuffer.rs1, ibuffer_if.PC, ibuffer_if.tmask, ibuffer_if.wid, ibuffer_if.ex_type, ibuffer_if.op_type, ibuffer_if.ready, ibuffer_if.valid, used_regs, scoreboard_if.ready, ibuffer_if.ready, ibuffer_if.ready, ibuffer_if.valid}),
|
||||
.probe1 ({writeback_if.uuid, writeback_if.data[0], writeback_if.PC, writeback_if.tmask, writeback_if.wid, writeback_if.eop, writeback_if.valid})
|
||||
);
|
||||
`endif
|
||||
|
@ -234,8 +198,8 @@ module VX_issue #(
|
|||
if (scoreboard_if.valid && ~scoreboard_if.ready) begin
|
||||
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(1);
|
||||
end
|
||||
if (dispatch_if.valid && ~dispatch_if.ready) begin
|
||||
case (dispatch_if.ex_type)
|
||||
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
|
||||
case (ibuffer_if.ex_type)
|
||||
`EX_ALU: perf_alu_stalls <= perf_alu_stalls + `PERF_CTR_BITS'(1);
|
||||
`ifdef EXT_F_ENABLE
|
||||
`EX_FPU: perf_fpu_stalls <= perf_fpu_stalls + `PERF_CTR_BITS'(1);
|
||||
|
@ -262,18 +226,18 @@ module VX_issue #(
|
|||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (dispatch_if.valid && dispatch_if.ready) begin
|
||||
`TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, dispatch_if.wid, dispatch_if.PC));
|
||||
trace_ex_type(1, dispatch_if.ex_type);
|
||||
if (ibuffer_if.valid && ibuffer_if.ready) begin
|
||||
`TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC));
|
||||
trace_ex_type(1, ibuffer_if.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
trace_ex_op(1, dispatch_if.ex_type, dispatch_if.op_type, dispatch_if.op_mod, dispatch_if.imm);
|
||||
`TRACE(1, (", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1_data=", dispatch_if.op_mod, dispatch_if.tmask, dispatch_if.wb, dispatch_if.rd));
|
||||
`TRACE_ARRAY1D(1, dispatch_if.rs1_data, `NUM_THREADS);
|
||||
trace_ex_op(1, ibuffer_if.ex_type, ibuffer_if.op_type, ibuffer_if.op_mod, ibuffer_if.imm);
|
||||
`TRACE(1, (", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1_data=", ibuffer_if.op_mod, ibuffer_if.tmask, ibuffer_if.wb, ibuffer_if.rd));
|
||||
`TRACE_ARRAY1D(1, gpr_stage_if.rs1_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs2_data="));
|
||||
`TRACE_ARRAY1D(1, dispatch_if.rs2_data, `NUM_THREADS);
|
||||
`TRACE_ARRAY1D(1, gpr_stage_if.rs2_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs3_data="));
|
||||
`TRACE_ARRAY1D(1, dispatch_if.rs3_data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", dispatch_if.uuid));
|
||||
`TRACE_ARRAY1D(1, gpr_stage_if.rs3_data, `NUM_THREADS);
|
||||
`TRACE(1, (" (#%0d)\n", ibuffer_if.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -6,18 +6,22 @@ module VX_scoreboard #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_scoreboard_if.slave scoreboard_if,
|
||||
VX_writeback_if.slave writeback_if
|
||||
VX_ibuffer_if.scoreboard ibuffer_if,
|
||||
output wire [3:0] used_regs
|
||||
);
|
||||
localparam NW_WIDTH = `UP(`NW_BITS);
|
||||
|
||||
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs, inuse_regs_n;
|
||||
|
||||
wire reserve_reg = scoreboard_if.valid && scoreboard_if.ready && scoreboard_if.wb;
|
||||
wire reserve_reg = ibuffer_if.valid && ibuffer_if.ready && ibuffer_if.wb;
|
||||
wire release_reg = writeback_if.valid && writeback_if.ready && writeback_if.eop;
|
||||
|
||||
always @(*) begin
|
||||
inuse_regs_n = inuse_regs;
|
||||
if (reserve_reg) begin
|
||||
inuse_regs_n[scoreboard_if.wid][scoreboard_if.rd] = 1;
|
||||
inuse_regs_n[ibuffer_if.wid][ibuffer_if.rd] = 1;
|
||||
end
|
||||
if (release_reg) begin
|
||||
inuse_regs_n[writeback_if.wid][writeback_if.rd] = 0;
|
||||
|
@ -31,32 +35,30 @@ module VX_scoreboard #(
|
|||
inuse_regs <= inuse_regs_n;
|
||||
end
|
||||
end
|
||||
|
||||
reg deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3;
|
||||
|
||||
always @(posedge clk) begin
|
||||
deq_inuse_rd <= inuse_regs_n[scoreboard_if.wid_n][scoreboard_if.rd_n];
|
||||
deq_inuse_rs1 <= inuse_regs_n[scoreboard_if.wid_n][scoreboard_if.rs1_n];
|
||||
deq_inuse_rs2 <= inuse_regs_n[scoreboard_if.wid_n][scoreboard_if.rs2_n];
|
||||
deq_inuse_rs3 <= inuse_regs_n[scoreboard_if.wid_n][scoreboard_if.rs3_n];
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
assign scoreboard_if.ready[i] = ~(inuse_regs_n[i][scoreboard_if.rd[i]]
|
||||
| inuse_regs_n[i][scoreboard_if.rs1[i]]
|
||||
| inuse_regs_n[i][scoreboard_if.rs2[i]]
|
||||
| inuse_regs_n[i][scoreboard_if.rs3[i]]);
|
||||
end
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
wire [NW_WIDTH-1:0] wid_sel;
|
||||
VX_lzc #(
|
||||
.N (`NUM_WARPS),
|
||||
.REVERSE (1)
|
||||
) wid_select (
|
||||
.data_in (scoreboard_if.valid),
|
||||
.data_out (wid_sel),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
assign scoreboard_if.ready = ~(deq_inuse_rd
|
||||
| deq_inuse_rs1
|
||||
| deq_inuse_rs2
|
||||
| deq_inuse_rs3);
|
||||
|
||||
assign scoreboard_if.used_regs[0] = deq_inuse_rd;
|
||||
assign scoreboard_if.used_regs[1] = deq_inuse_rs1;
|
||||
assign scoreboard_if.used_regs[2] = deq_inuse_rs2;
|
||||
assign scoreboard_if.used_regs[3] = deq_inuse_rs3;
|
||||
assign used_regs[0] = inuse_regs_n[wid_sel][scoreboard_if.rd[wid_sel]];
|
||||
assign used_regs[1] = inuse_regs_n[wid_sel][scoreboard_if.rs1[wid_sel]];
|
||||
assign used_regs[2] = inuse_regs_n[wid_sel][scoreboard_if.rs2[wid_sel]];
|
||||
assign used_regs[3] = inuse_regs_n[wid_sel][scoreboard_if.rs3[wid_sel]];
|
||||
|
||||
`UNUSED_VAR (writeback_if.PC)
|
||||
`UNUSED_VAR (scoreboard_if.PC)
|
||||
`UNUSED_VAR (scoreboard_if.tmask)
|
||||
`UNUSED_VAR (scoreboard_if.uuid)
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (release_reg) begin
|
||||
|
|
|
@ -1,65 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
interface VX_dispatch_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`UP(`UUID_BITS)-1:0] uuid;
|
||||
wire [`UP(`NW_BITS)-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [`XLEN-1:0] PC;
|
||||
wire [`EX_BITS-1:0] ex_type;
|
||||
wire [`INST_OP_BITS-1:0] op_type;
|
||||
wire [`INST_MOD_BITS-1:0] op_mod;
|
||||
wire wb;
|
||||
wire use_PC;
|
||||
wire use_imm;
|
||||
wire [`XLEN-1:0] imm;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data;
|
||||
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output uuid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
output ex_type,
|
||||
output op_type,
|
||||
output op_mod,
|
||||
output wb,
|
||||
output use_PC,
|
||||
output use_imm,
|
||||
output imm,
|
||||
output rd,
|
||||
output rs1_data,
|
||||
output rs2_data,
|
||||
output rs3_data,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input uuid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
input ex_type,
|
||||
input op_type,
|
||||
input op_mod,
|
||||
input wb,
|
||||
input use_PC,
|
||||
input use_imm,
|
||||
input imm,
|
||||
input rd,
|
||||
input rs1_data,
|
||||
input rs2_data,
|
||||
input rs3_data,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
|
@ -2,32 +2,17 @@
|
|||
|
||||
interface VX_gpr_stage_if ();
|
||||
|
||||
wire [`UP(`NW_BITS)-1:0] wid;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data;
|
||||
|
||||
modport master (
|
||||
output wid,
|
||||
output rs1,
|
||||
output rs2,
|
||||
output rs3,
|
||||
|
||||
input rs1_data,
|
||||
input rs2_data,
|
||||
input rs3_data
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input wid,
|
||||
input rs1,
|
||||
input rs2,
|
||||
input rs3,
|
||||
|
||||
output rs1_data,
|
||||
output rs2_data,
|
||||
output rs3_data
|
||||
|
|
|
@ -18,12 +18,6 @@ interface VX_ibuffer_if ();
|
|||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
|
||||
wire [`NR_BITS-1:0] rd_n;
|
||||
wire [`NR_BITS-1:0] rs1_n;
|
||||
wire [`NR_BITS-1:0] rs2_n;
|
||||
wire [`NR_BITS-1:0] rs3_n;
|
||||
wire [`UP(`NW_BITS)-1:0] wid_n;
|
||||
|
||||
wire ready;
|
||||
|
||||
|
@ -44,12 +38,7 @@ interface VX_ibuffer_if ();
|
|||
output rs1,
|
||||
output rs2,
|
||||
output rs3,
|
||||
output rd_n,
|
||||
output rs1_n,
|
||||
output rs2_n,
|
||||
output rs3_n,
|
||||
output wid_n,
|
||||
input ready
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
|
@ -68,13 +57,23 @@ interface VX_ibuffer_if ();
|
|||
input rd,
|
||||
input rs1,
|
||||
input rs2,
|
||||
input rs3,
|
||||
input rd_n,
|
||||
input rs1_n,
|
||||
input rs2_n,
|
||||
input rs3_n,
|
||||
input wid_n,
|
||||
input rs3,
|
||||
output ready
|
||||
);
|
||||
|
||||
modport scoreboard (
|
||||
input valid,
|
||||
input wid,
|
||||
input wb,
|
||||
input rd,
|
||||
output ready
|
||||
);
|
||||
|
||||
modport gpr (
|
||||
input wid,
|
||||
input rs1,
|
||||
input rs2,
|
||||
input rs3
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -2,55 +2,28 @@
|
|||
|
||||
interface VX_scoreboard_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`UP(`UUID_BITS)-1:0] uuid;
|
||||
wire [`UP(`NW_BITS)-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [`XLEN-1:0] PC;
|
||||
wire wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
||||
wire [`NR_BITS-1:0] rd_n;
|
||||
wire [`NR_BITS-1:0] rs1_n;
|
||||
wire [`NR_BITS-1:0] rs2_n;
|
||||
wire [`NR_BITS-1:0] rs3_n;
|
||||
wire [`UP(`NW_BITS)-1:0] wid_n;
|
||||
|
||||
wire [3:0] used_regs;
|
||||
|
||||
wire ready;
|
||||
wire [`NUM_WARPS-1:0] valid;
|
||||
wire [`NUM_WARPS-1:0][`NR_BITS-1:0] rd;
|
||||
wire [`NUM_WARPS-1:0][`NR_BITS-1:0] rs1;
|
||||
wire [`NUM_WARPS-1:0][`NR_BITS-1:0] rs2;
|
||||
wire [`NUM_WARPS-1:0][`NR_BITS-1:0] rs3;
|
||||
wire [`NUM_WARPS-1:0] ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output uuid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
output wb,
|
||||
output rd,
|
||||
output rd_n,
|
||||
output rs1_n,
|
||||
output rs2_n,
|
||||
output rs3_n,
|
||||
output wid_n,
|
||||
input used_regs,
|
||||
output rs1,
|
||||
output rs2,
|
||||
output rs3,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input uuid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
input wb,
|
||||
input rd,
|
||||
input rd_n,
|
||||
input rs1_n,
|
||||
input rs2_n,
|
||||
input rs3_n,
|
||||
input wid_n,
|
||||
output used_regs,
|
||||
input rs1,
|
||||
input rs2,
|
||||
input rs3,
|
||||
output ready
|
||||
);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue