mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 22:07:41 -04:00
pipeline refactoring: centralized issue buffer
This commit is contained in:
parent
1f63f9da25
commit
7c86b68977
62 changed files with 923 additions and 820 deletions
|
@ -16,12 +16,12 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||||
|
|
||||||
CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||||
|
|
||||||
#DEBUG=1
|
DEBUG=1
|
||||||
#AFU=1
|
#AFU=1
|
||||||
|
|
||||||
CFLAGS += -fPIC
|
CFLAGS += -fPIC
|
||||||
|
|
|
@ -19,6 +19,7 @@ Simulator::Simulator() {
|
||||||
#ifdef VCD_OUTPUT
|
#ifdef VCD_OUTPUT
|
||||||
Verilated::traceEverOn(true);
|
Verilated::traceEverOn(true);
|
||||||
trace_ = new VerilatedVcdC;
|
trace_ = new VerilatedVcdC;
|
||||||
|
trace_->set_time_unit("1ns");
|
||||||
vortex_->trace(trace_, 99);
|
vortex_->trace(trace_, 99);
|
||||||
trace_->open("trace.vcd");
|
trace_->open("trace.vcd");
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1017,8 +1017,8 @@ localparam SCOPE_SR_DEPTH = 2;
|
||||||
|
|
||||||
wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready)
|
wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready)
|
||||||
|| (scope_icache_rsp_valid && scope_icache_rsp_ready)
|
|| (scope_icache_rsp_valid && scope_icache_rsp_ready)
|
||||||
|| ((| scope_dcache_req_valid) && scope_dcache_req_ready)
|
|| (scope_dcache_req_valid && scope_dcache_req_ready)
|
||||||
|| ((| scope_dcache_rsp_valid) && scope_dcache_rsp_ready)
|
|| (scope_dcache_rsp_valid && scope_dcache_rsp_ready)
|
||||||
|| (scope_dram_req_valid && scope_dram_req_ready)
|
|| (scope_dram_req_valid && scope_dram_req_ready)
|
||||||
|| (scope_dram_rsp_valid && scope_dram_rsp_ready)
|
|| (scope_dram_rsp_valid && scope_dram_rsp_ready)
|
||||||
|| (scope_snp_req_valid && scope_snp_req_ready)
|
|| (scope_snp_req_valid && scope_snp_req_ready)
|
||||||
|
|
|
@ -53,7 +53,7 @@ module VX_alu_unit #(
|
||||||
VX_priority_encoder #(
|
VX_priority_encoder #(
|
||||||
.N(`NUM_THREADS)
|
.N(`NUM_THREADS)
|
||||||
) choose_alu_result (
|
) choose_alu_result (
|
||||||
.data_in (alu_req_if.valid),
|
.data_in (alu_req_if.thread_mask),
|
||||||
.data_out (br_result_index),
|
.data_out (br_result_index),
|
||||||
`UNUSED_PIN (valid_out)
|
`UNUSED_PIN (valid_out)
|
||||||
);
|
);
|
||||||
|
@ -81,11 +81,11 @@ module VX_alu_unit #(
|
||||||
wire [31:0] br_dest = $signed(br_addr) + $signed(alu_req_if.offset);
|
wire [31:0] br_dest = $signed(br_addr) + $signed(alu_req_if.offset);
|
||||||
|
|
||||||
wire is_jal = (alu_op == `ALU_JAL || alu_op == `ALU_JALR);
|
wire is_jal = (alu_op == `ALU_JAL || alu_op == `ALU_JALR);
|
||||||
wire is_br_valid = `IS_BR_OP(alu_op) && (| alu_req_if.valid);
|
wire is_br_valid = `IS_BR_OP(alu_op) && alu_req_if.valid;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
|
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
|
||||||
|
|
||||||
wire stall = ~alu_commit_if.ready && (| alu_commit_if.valid);
|
wire stall = ~alu_commit_if.ready && alu_commit_if.valid;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + 1 + 32)
|
.N(1 + `NW_BITS + 1 + 32)
|
||||||
|
@ -99,14 +99,14 @@ module VX_alu_unit #(
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32))
|
||||||
) alu_reg (
|
) alu_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result}),
|
.in ({alu_req_if.valid, alu_req_if.issue_tag, alu_jal_result}),
|
||||||
.out ({alu_commit_if.valid, alu_commit_if.warp_num, alu_commit_if.curr_PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data})
|
.out ({alu_commit_if.valid, alu_commit_if.issue_tag, alu_commit_if.data})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign alu_req_if.ready = ~stall;
|
assign alu_req_if.ready = ~stall;
|
||||||
|
|
|
@ -11,21 +11,22 @@ module VX_commit #(
|
||||||
VX_commit_if lsu_commit_if,
|
VX_commit_if lsu_commit_if,
|
||||||
VX_commit_if mul_commit_if,
|
VX_commit_if mul_commit_if,
|
||||||
VX_commit_if csr_commit_if,
|
VX_commit_if csr_commit_if,
|
||||||
VX_commit_fp_if fpu_commit_if,
|
VX_commit_if fpu_commit_if,
|
||||||
VX_commit_if gpu_commit_if,
|
VX_commit_if gpu_commit_if,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
|
VX_commit_is_if commit_is_if,
|
||||||
VX_wb_if writeback_if,
|
VX_wb_if writeback_if,
|
||||||
VX_perf_cntrs_if perf_cntrs_if
|
VX_perf_cntrs_if perf_cntrs_if
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [`NUM_EXS-1:0] commited_mask;
|
wire [`NUM_EXS-1:0] commited_mask;
|
||||||
assign commited_mask = {((| alu_commit_if.valid) && alu_commit_if.ready),
|
assign commited_mask = {(alu_commit_if.valid && alu_commit_if.ready),
|
||||||
((| lsu_commit_if.valid) && lsu_commit_if.ready),
|
(lsu_commit_if.valid && lsu_commit_if.ready),
|
||||||
((| csr_commit_if.valid) && csr_commit_if.ready),
|
(csr_commit_if.valid && csr_commit_if.ready),
|
||||||
((| mul_commit_if.valid) && mul_commit_if.ready),
|
(mul_commit_if.valid && mul_commit_if.ready),
|
||||||
((| fpu_commit_if.valid) && fpu_commit_if.ready),
|
(fpu_commit_if.valid && fpu_commit_if.ready),
|
||||||
((| gpu_commit_if.valid) && gpu_commit_if.ready)};
|
(gpu_commit_if.valid && gpu_commit_if.ready)};
|
||||||
|
|
||||||
wire [`NE_BITS:0] num_commits;
|
wire [`NE_BITS:0] num_commits;
|
||||||
|
|
||||||
|
@ -55,6 +56,20 @@ module VX_commit #(
|
||||||
assign perf_cntrs_if.total_cycles = total_cycles;
|
assign perf_cntrs_if.total_cycles = total_cycles;
|
||||||
assign perf_cntrs_if.total_instrs = total_instrs;
|
assign perf_cntrs_if.total_instrs = total_instrs;
|
||||||
|
|
||||||
|
assign commit_is_if.alu_valid = alu_commit_if.valid && alu_commit_if.ready;
|
||||||
|
assign commit_is_if.lsu_valid = lsu_commit_if.valid && lsu_commit_if.ready;
|
||||||
|
assign commit_is_if.csr_valid = csr_commit_if.valid && csr_commit_if.ready;
|
||||||
|
assign commit_is_if.mul_valid = mul_commit_if.valid && mul_commit_if.ready;
|
||||||
|
assign commit_is_if.fpu_valid = fpu_commit_if.valid && fpu_commit_if.ready;
|
||||||
|
assign commit_is_if.gpu_valid = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||||
|
|
||||||
|
assign commit_is_if.alu_tag = alu_commit_if.issue_tag;
|
||||||
|
assign commit_is_if.lsu_tag = lsu_commit_if.issue_tag;
|
||||||
|
assign commit_is_if.csr_tag = csr_commit_if.issue_tag;
|
||||||
|
assign commit_is_if.mul_tag = mul_commit_if.issue_tag;
|
||||||
|
assign commit_is_if.fpu_tag = fpu_commit_if.issue_tag;
|
||||||
|
assign commit_is_if.gpu_tag = gpu_commit_if.issue_tag;
|
||||||
|
|
||||||
assign gpu_commit_if.ready = 1'b1; // doesn't writeback
|
assign gpu_commit_if.ready = 1'b1; // doesn't writeback
|
||||||
|
|
||||||
VX_writeback #(
|
VX_writeback #(
|
||||||
|
@ -68,29 +83,30 @@ module VX_commit #(
|
||||||
.csr_commit_if (csr_commit_if),
|
.csr_commit_if (csr_commit_if),
|
||||||
.mul_commit_if (mul_commit_if),
|
.mul_commit_if (mul_commit_if),
|
||||||
.fpu_commit_if (fpu_commit_if),
|
.fpu_commit_if (fpu_commit_if),
|
||||||
|
.commit_is_if (commit_is_if),
|
||||||
|
|
||||||
.writeback_if (writeback_if)
|
.writeback_if (writeback_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
`ifdef DBG_PRINT_PIPELINE
|
`ifdef DBG_PRINT_PIPELINE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if ((| alu_commit_if.valid) && alu_commit_if.ready) begin
|
if (alu_commit_if.valid && alu_commit_if.ready) begin
|
||||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=ALU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.warp_num, alu_commit_if.curr_PC, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data);
|
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, commit_is_if.alu_data.warp_num, commit_is_if.alu_data.curr_PC, alu_commit_if.issue_tag, commit_is_if.alu_data.thread_mask, commit_is_if.alu_data.wb, commit_is_if.alu_data.rd, alu_commit_if.data);
|
||||||
end
|
end
|
||||||
if ((| lsu_commit_if.valid) && lsu_commit_if.ready) begin
|
if (lsu_commit_if.valid && lsu_commit_if.ready) begin
|
||||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=LSU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data);
|
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, commit_is_if.lsu_data.warp_num, commit_is_if.lsu_data.curr_PC, lsu_commit_if.issue_tag, commit_is_if.lsu_data.thread_mask, commit_is_if.lsu_data.wb, commit_is_if.lsu_data.rd, lsu_commit_if.data);
|
||||||
end
|
end
|
||||||
if ((| csr_commit_if.valid) && csr_commit_if.ready) begin
|
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=CSR, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.warp_num, csr_commit_if.curr_PC, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
|
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, commit_is_if.csr_data.warp_num, commit_is_if.csr_data.curr_PC, csr_commit_if.issue_tag, commit_is_if.csr_data.thread_mask, commit_is_if.csr_data.wb, commit_is_if.csr_data.rd, csr_commit_if.data);
|
||||||
end
|
end
|
||||||
if ((| mul_commit_if.valid) && mul_commit_if.ready) begin
|
if (mul_commit_if.valid && mul_commit_if.ready) begin
|
||||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
|
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, commit_is_if.mul_data.warp_num, commit_is_if.mul_data.curr_PC, mul_commit_if.issue_tag, commit_is_if.mul_data.thread_mask, commit_is_if.mul_data.wb, commit_is_if.mul_data.rd, mul_commit_if.data);
|
||||||
end
|
end
|
||||||
if ((| fpu_commit_if.valid) && fpu_commit_if.ready) begin
|
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
||||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=FPU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.warp_num, fpu_commit_if.curr_PC, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data);
|
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, commit_is_if.fpu_data.warp_num, commit_is_if.fpu_data.curr_PC, fpu_commit_if.issue_tag, commit_is_if.fpu_data.thread_mask, commit_is_if.fpu_data.wb, commit_is_if.fpu_data.rd, fpu_commit_if.data);
|
||||||
end
|
end
|
||||||
if ((| gpu_commit_if.valid) && gpu_commit_if.ready) begin
|
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
|
||||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=GPU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.warp_num, gpu_commit_if.curr_PC, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data);
|
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, commit_is_if.gpu_data.warp_num, commit_is_if.gpu_data.curr_PC, gpu_commit_if.issue_tag, commit_is_if.gpu_data.thread_mask, commit_is_if.gpu_data.wb, commit_is_if.gpu_data.rd, gpu_commit_if.data);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
|
@ -39,10 +39,6 @@
|
||||||
`define SHARED_MEM_BASE_ADDR 32'h6FFFF000
|
`define SHARED_MEM_BASE_ADDR 32'h6FFFF000
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifndef STACK_BASE_ADDR
|
|
||||||
`define STACK_BASE_ADDR 20'h6FFFF
|
|
||||||
`endif
|
|
||||||
|
|
||||||
`ifndef IO_BUS_BASE_ADDR
|
`ifndef IO_BUS_BASE_ADDR
|
||||||
`define IO_BUS_BASE_ADDR 32'hFFFFFF00
|
`define IO_BUS_BASE_ADDR 32'hFFFFFF00
|
||||||
`endif
|
`endif
|
||||||
|
@ -59,13 +55,9 @@
|
||||||
`define L3_ENABLE (`NUM_CLUSTERS > 1)
|
`define L3_ENABLE (`NUM_CLUSTERS > 1)
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifndef EXT_M_ENABLE
|
`define EXT_M_ENABLE
|
||||||
`define EXT_M_ENABLE 1
|
|
||||||
`endif
|
|
||||||
|
|
||||||
`ifndef EXT_F_ENABLE
|
// define EXT_F_ENABLE
|
||||||
`define EXT_F_ENABLE 1
|
|
||||||
`endif
|
|
||||||
|
|
||||||
// Configuration Values =======================================================
|
// Configuration Values =======================================================
|
||||||
|
|
||||||
|
@ -109,6 +101,11 @@
|
||||||
`define FPURQ_SIZE 8
|
`define FPURQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
// Size of issue queue
|
||||||
|
`ifndef ISSUEQ_SIZE
|
||||||
|
`define ISSUEQ_SIZE (8 + `NUM_WARPS)
|
||||||
|
`endif
|
||||||
|
|
||||||
// Dcache Configurable Knobs ==================================================
|
// Dcache Configurable Knobs ==================================================
|
||||||
|
|
||||||
// Size of cache in bytes
|
// Size of cache in bytes
|
||||||
|
@ -148,12 +145,12 @@
|
||||||
|
|
||||||
// Dram Fill Rsp Queue Size
|
// Dram Fill Rsp Queue Size
|
||||||
`ifndef DDFPQ_SIZE
|
`ifndef DDFPQ_SIZE
|
||||||
`define DDFPQ_SIZE 16
|
`define DDFPQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Snoop Req Queue Size
|
// Snoop Req Queue Size
|
||||||
`ifndef DSNRQ_SIZE
|
`ifndef DSNRQ_SIZE
|
||||||
`define DSNRQ_SIZE 16
|
`define DSNRQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Core Writeback Queue Size
|
// Core Writeback Queue Size
|
||||||
|
@ -173,7 +170,7 @@
|
||||||
|
|
||||||
// Prefetcher
|
// Prefetcher
|
||||||
`ifndef DPRFQ_SIZE
|
`ifndef DPRFQ_SIZE
|
||||||
`define DPRFQ_SIZE 16
|
`define DPRFQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifndef DPRFQ_STRIDE
|
`ifndef DPRFQ_STRIDE
|
||||||
|
@ -219,7 +216,7 @@
|
||||||
|
|
||||||
// Dram Fill Rsp Queue Size
|
// Dram Fill Rsp Queue Size
|
||||||
`ifndef IDFPQ_SIZE
|
`ifndef IDFPQ_SIZE
|
||||||
`define IDFPQ_SIZE 16
|
`define IDFPQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Core Writeback Queue Size
|
// Core Writeback Queue Size
|
||||||
|
@ -229,7 +226,7 @@
|
||||||
|
|
||||||
// Dram Writeback Queue Size
|
// Dram Writeback Queue Size
|
||||||
`ifndef IDWBQ_SIZE
|
`ifndef IDWBQ_SIZE
|
||||||
`define IDWBQ_SIZE 16
|
`define IDWBQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Dram Fill Req Queue Size
|
// Dram Fill Req Queue Size
|
||||||
|
@ -239,7 +236,7 @@
|
||||||
|
|
||||||
// Prefetcher
|
// Prefetcher
|
||||||
`ifndef IPRFQ_SIZE
|
`ifndef IPRFQ_SIZE
|
||||||
`define IPRFQ_SIZE 16
|
`define IPRFQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifndef IPRFQ_STRIDE
|
`ifndef IPRFQ_STRIDE
|
||||||
|
@ -312,7 +309,7 @@
|
||||||
|
|
||||||
// Core Request Queue Size
|
// Core Request Queue Size
|
||||||
`ifndef L2CREQ_SIZE
|
`ifndef L2CREQ_SIZE
|
||||||
`define L2CREQ_SIZE 16
|
`define L2CREQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Miss Reserv Queue Knob
|
// Miss Reserv Queue Knob
|
||||||
|
@ -322,12 +319,12 @@
|
||||||
|
|
||||||
// Dram Fill Rsp Queue Size
|
// Dram Fill Rsp Queue Size
|
||||||
`ifndef L2DFPQ_SIZE
|
`ifndef L2DFPQ_SIZE
|
||||||
`define L2DFPQ_SIZE 16
|
`define L2DFPQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Snoop Req Queue Size
|
// Snoop Req Queue Size
|
||||||
`ifndef L2SNRQ_SIZE
|
`ifndef L2SNRQ_SIZE
|
||||||
`define L2SNRQ_SIZE 16
|
`define L2SNRQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Core Writeback Queue Size
|
// Core Writeback Queue Size
|
||||||
|
@ -337,7 +334,7 @@
|
||||||
|
|
||||||
// Dram Writeback Queue Size
|
// Dram Writeback Queue Size
|
||||||
`ifndef L2DWBQ_SIZE
|
`ifndef L2DWBQ_SIZE
|
||||||
`define L2DWBQ_SIZE 16
|
`define L2DWBQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Dram Fill Req Queue Size
|
// Dram Fill Req Queue Size
|
||||||
|
@ -347,7 +344,7 @@
|
||||||
|
|
||||||
// Prefetcher
|
// Prefetcher
|
||||||
`ifndef L2PRFQ_SIZE
|
`ifndef L2PRFQ_SIZE
|
||||||
`define L2PRFQ_SIZE 16
|
`define L2PRFQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifndef L2PRFQ_STRIDE
|
`ifndef L2PRFQ_STRIDE
|
||||||
|
@ -383,7 +380,7 @@
|
||||||
|
|
||||||
// Core Request Queue Size
|
// Core Request Queue Size
|
||||||
`ifndef L3CREQ_SIZE
|
`ifndef L3CREQ_SIZE
|
||||||
`define L3CREQ_SIZE 16
|
`define L3CREQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Miss Reserv Queue Knob
|
// Miss Reserv Queue Knob
|
||||||
|
@ -393,12 +390,12 @@
|
||||||
|
|
||||||
// Dram Fill Rsp Queue Size
|
// Dram Fill Rsp Queue Size
|
||||||
`ifndef L3DFPQ_SIZE
|
`ifndef L3DFPQ_SIZE
|
||||||
`define L3DFPQ_SIZE 16
|
`define L3DFPQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Snoop Req Queue Size
|
// Snoop Req Queue Size
|
||||||
`ifndef L3SNRQ_SIZE
|
`ifndef L3SNRQ_SIZE
|
||||||
`define L3SNRQ_SIZE 16
|
`define L3SNRQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Core Writeback Queue Size
|
// Core Writeback Queue Size
|
||||||
|
@ -408,7 +405,7 @@
|
||||||
|
|
||||||
// Dram Writeback Queue Size
|
// Dram Writeback Queue Size
|
||||||
`ifndef L3DWBQ_SIZE
|
`ifndef L3DWBQ_SIZE
|
||||||
`define L3DWBQ_SIZE 16
|
`define L3DWBQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Dram Fill Req Queue Size
|
// Dram Fill Req Queue Size
|
||||||
|
@ -418,7 +415,7 @@
|
||||||
|
|
||||||
// Prefetcher
|
// Prefetcher
|
||||||
`ifndef L3PRFQ_SIZE
|
`ifndef L3PRFQ_SIZE
|
||||||
`define L3PRFQ_SIZE 16
|
`define L3PRFQ_SIZE 8
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifndef L3PRFQ_STRIDE
|
`ifndef L3PRFQ_STRIDE
|
||||||
|
|
|
@ -166,15 +166,15 @@ module VX_core #(
|
||||||
VX_cache_core_req_if #(
|
VX_cache_core_req_if #(
|
||||||
.NUM_REQUESTS(`INUM_REQUESTS),
|
.NUM_REQUESTS(`INUM_REQUESTS),
|
||||||
.WORD_SIZE(`IWORD_SIZE),
|
.WORD_SIZE(`IWORD_SIZE),
|
||||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH),
|
.CORE_TAG_WIDTH(`ICORE_TAG_WIDTH),
|
||||||
.CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS)
|
.CORE_TAG_ID_BITS(`ICORE_TAG_ID_BITS)
|
||||||
) core_icache_req_if();
|
) core_icache_req_if();
|
||||||
|
|
||||||
VX_cache_core_rsp_if #(
|
VX_cache_core_rsp_if #(
|
||||||
.NUM_REQUESTS(`INUM_REQUESTS),
|
.NUM_REQUESTS(`INUM_REQUESTS),
|
||||||
.WORD_SIZE(`IWORD_SIZE),
|
.WORD_SIZE(`IWORD_SIZE),
|
||||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH),
|
.CORE_TAG_WIDTH(`ICORE_TAG_WIDTH),
|
||||||
.CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS)
|
.CORE_TAG_ID_BITS(`ICORE_TAG_ID_BITS)
|
||||||
) core_icache_rsp_if();
|
) core_icache_rsp_if();
|
||||||
|
|
||||||
VX_pipeline #(
|
VX_pipeline #(
|
||||||
|
|
|
@ -26,7 +26,8 @@ module VX_csr_arb (
|
||||||
`UNUSED_VAR (reset)
|
`UNUSED_VAR (reset)
|
||||||
|
|
||||||
// requests
|
// requests
|
||||||
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : {`NUM_THREADS{csr_io_req_if.valid}};
|
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
|
||||||
|
assign csr_req_if.issue_tag = (~select_io_req) ? csr_core_req_if.issue_tag : 0;
|
||||||
assign csr_req_if.warp_num = (~select_io_req) ? csr_core_req_if.warp_num : 0;
|
assign csr_req_if.warp_num = (~select_io_req) ? csr_core_req_if.warp_num : 0;
|
||||||
assign csr_req_if.curr_PC = (~select_io_req) ? csr_core_req_if.curr_PC : 0;
|
assign csr_req_if.curr_PC = (~select_io_req) ? csr_core_req_if.curr_PC : 0;
|
||||||
assign csr_req_if.csr_op = (~select_io_req) ? csr_core_req_if.csr_op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
assign csr_req_if.csr_op = (~select_io_req) ? csr_core_req_if.csr_op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||||
|
@ -40,15 +41,12 @@ module VX_csr_arb (
|
||||||
assign csr_io_req_if.ready = csr_req_if.ready && select_io_req;
|
assign csr_io_req_if.ready = csr_req_if.ready && select_io_req;
|
||||||
|
|
||||||
// responses
|
// responses
|
||||||
assign csr_io_rsp_if.valid = csr_rsp_if.valid[0] & select_io_rsp;
|
assign csr_io_rsp_if.valid = csr_rsp_if.valid & select_io_rsp;
|
||||||
assign csr_io_rsp_if.data = csr_rsp_if.data[0];
|
assign csr_io_rsp_if.data = csr_rsp_if.data[0];
|
||||||
|
|
||||||
assign csr_commit_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~select_io_rsp}};
|
assign csr_commit_if.valid = csr_rsp_if.valid & ~select_io_rsp;
|
||||||
assign csr_commit_if.warp_num = csr_rsp_if.warp_num;
|
assign csr_commit_if.issue_tag= csr_rsp_if.issue_tag;
|
||||||
assign csr_commit_if.curr_PC = csr_rsp_if.curr_PC;
|
|
||||||
assign csr_commit_if.data = csr_rsp_if.data;
|
assign csr_commit_if.data = csr_rsp_if.data;
|
||||||
assign csr_commit_if.rd = csr_rsp_if.rd;
|
|
||||||
assign csr_commit_if.wb = csr_rsp_if.wb;
|
|
||||||
|
|
||||||
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
|
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ module VX_csr_unit #(
|
||||||
VX_csr_req_if csr_pipe_req_if();
|
VX_csr_req_if csr_pipe_req_if();
|
||||||
VX_commit_if csr_pipe_commit_if();
|
VX_commit_if csr_pipe_commit_if();
|
||||||
|
|
||||||
wire select_io_req = (| csr_io_req_if.valid);
|
wire select_io_req = csr_io_req_if.valid;
|
||||||
wire select_io_rsp;
|
wire select_io_rsp;
|
||||||
|
|
||||||
VX_csr_arb csr_arb (
|
VX_csr_arb csr_arb (
|
||||||
|
@ -44,7 +44,7 @@ module VX_csr_unit #(
|
||||||
wire [31:0] csr_updated_data_s2;
|
wire [31:0] csr_updated_data_s2;
|
||||||
wire [31:0] csr_read_data_unqual;
|
wire [31:0] csr_read_data_unqual;
|
||||||
|
|
||||||
wire is_csr_s2 = (| csr_pipe_commit_if.valid);
|
wire is_csr_s2 = csr_pipe_commit_if.valid;
|
||||||
|
|
||||||
VX_csr_data #(
|
VX_csr_data #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
|
@ -62,8 +62,10 @@ module VX_csr_unit #(
|
||||||
.warp_num (csr_pipe_req_if.warp_num)
|
.warp_num (csr_pipe_req_if.warp_num)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
wire [`NW_BITS-1:0] warp_num_s2;
|
||||||
|
|
||||||
wire csr_hazard = (csr_addr_s2 == csr_pipe_req_if.csr_addr)
|
wire csr_hazard = (csr_addr_s2 == csr_pipe_req_if.csr_addr)
|
||||||
&& (csr_pipe_commit_if.warp_num == csr_pipe_req_if.warp_num)
|
&& (warp_num_s2 == csr_pipe_req_if.warp_num)
|
||||||
&& is_csr_s2;
|
&& is_csr_s2;
|
||||||
|
|
||||||
wire [31:0] csr_read_data = csr_hazard ? csr_updated_data_s2 : csr_read_data_unqual;
|
wire [31:0] csr_read_data = csr_hazard ? csr_updated_data_s2 : csr_read_data_unqual;
|
||||||
|
@ -79,17 +81,17 @@ module VX_csr_unit #(
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
wire stall = ~csr_pipe_commit_if.ready && (| csr_pipe_commit_if.valid);
|
wire stall = ~csr_pipe_commit_if.ready && csr_pipe_commit_if.valid;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + 1 + `CSR_ADDR_SIZE + 1 + 32 + 32)
|
.N(1 + `ISTAG_BITS + `NW_BITS + `CSR_ADDR_SIZE + 1 + 32 + 32)
|
||||||
) csr_reg (
|
) csr_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.warp_num, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
|
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.issue_tag, csr_pipe_req_if.warp_num, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
|
||||||
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.warp_num, csr_pipe_commit_if.curr_PC, csr_pipe_commit_if.rd, csr_pipe_commit_if.wb, csr_addr_s2, select_io_rsp, csr_read_data_s2, csr_updated_data_s2})
|
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.issue_tag, warp_num_s2, csr_addr_s2, select_io_rsp, csr_read_data_s2, csr_updated_data_s2})
|
||||||
);
|
);
|
||||||
|
|
||||||
genvar i;
|
genvar i;
|
||||||
|
|
|
@ -15,7 +15,7 @@ module VX_decode #(
|
||||||
VX_wstall_if wstall_if,
|
VX_wstall_if wstall_if,
|
||||||
VX_join_if join_if
|
VX_join_if join_if
|
||||||
);
|
);
|
||||||
wire in_valid = (| ifetch_rsp_if.valid);
|
wire in_valid = ifetch_rsp_if.valid;
|
||||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||||
|
|
||||||
reg [`ALU_BITS-1:0] alu_op;
|
reg [`ALU_BITS-1:0] alu_op;
|
||||||
|
@ -167,9 +167,8 @@ module VX_decode #(
|
||||||
end
|
end
|
||||||
|
|
||||||
// MUL
|
// MUL
|
||||||
|
`ifdef EXT_M_ENABLE
|
||||||
wire is_mul = is_rtype && (func7 == 7'h1);
|
wire is_mul = is_rtype && (func7 == 7'h1);
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
mul_op = `MUL_MUL;
|
mul_op = `MUL_MUL;
|
||||||
case (func3)
|
case (func3)
|
||||||
|
@ -184,9 +183,15 @@ module VX_decode #(
|
||||||
default:;
|
default:;
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
`else
|
||||||
|
wire is_mul = 0;
|
||||||
|
always @(*) begin
|
||||||
|
mul_op = `MUL_MUL;
|
||||||
|
end
|
||||||
|
`endif
|
||||||
|
|
||||||
// FPU
|
// FPU
|
||||||
|
`ifdef EXT_F_ENABLE
|
||||||
wire is_fl = (opcode == `INST_FL) && ((func3 == 2));
|
wire is_fl = (opcode == `INST_FL) && ((func3 == 2));
|
||||||
wire is_fs = (opcode == `INST_FS) && ((func3 == 2));
|
wire is_fs = (opcode == `INST_FS) && ((func3 == 2));
|
||||||
wire is_fci = (opcode == `INST_FCI);
|
wire is_fci = (opcode == `INST_FCI);
|
||||||
|
@ -226,6 +231,15 @@ module VX_decode #(
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
`else
|
||||||
|
wire is_fs = 0;
|
||||||
|
wire is_fci = 0;
|
||||||
|
wire is_fr4 = 0;
|
||||||
|
wire is_fpu = 0;
|
||||||
|
always @(*) begin
|
||||||
|
fpu_op = `FPU_OTHER;
|
||||||
|
end
|
||||||
|
`endif
|
||||||
|
|
||||||
// GPU
|
// GPU
|
||||||
|
|
||||||
|
@ -245,6 +259,7 @@ module VX_decode #(
|
||||||
|
|
||||||
assign decode_tmp_if.valid = ifetch_rsp_if.valid;
|
assign decode_tmp_if.valid = ifetch_rsp_if.valid;
|
||||||
assign decode_tmp_if.warp_num = ifetch_rsp_if.warp_num;
|
assign decode_tmp_if.warp_num = ifetch_rsp_if.warp_num;
|
||||||
|
assign decode_tmp_if.thread_mask= ifetch_rsp_if.thread_mask;
|
||||||
assign decode_tmp_if.curr_PC = ifetch_rsp_if.curr_PC;
|
assign decode_tmp_if.curr_PC = ifetch_rsp_if.curr_PC;
|
||||||
assign decode_tmp_if.next_PC = ifetch_rsp_if.curr_PC + 32'h4;
|
assign decode_tmp_if.next_PC = ifetch_rsp_if.curr_PC + 32'h4;
|
||||||
|
|
||||||
|
@ -299,29 +314,29 @@ module VX_decode #(
|
||||||
assign wstall_if.wstall = in_valid && (is_btype || is_jal || is_jalr || (is_gpu && (gpu_op == `GPU_TMC || gpu_op == `GPU_SPLIT || gpu_op == `GPU_BAR)));
|
assign wstall_if.wstall = in_valid && (is_btype || is_jal || is_jalr || (is_gpu && (gpu_op == `GPU_TMC || gpu_op == `GPU_SPLIT || gpu_op == `GPU_BAR)));
|
||||||
assign wstall_if.warp_num = ifetch_rsp_if.warp_num;
|
assign wstall_if.warp_num = ifetch_rsp_if.warp_num;
|
||||||
|
|
||||||
wire stall = ~decode_if.ready && (| decode_if.valid);
|
wire stall = ~decode_if.ready && decode_if.valid;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + 1 + `FRM_BITS)
|
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + 1 + 1 + `FRM_BITS)
|
||||||
) decode_reg (
|
) decode_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.rs1_is_fp, decode_tmp_if.rs2_is_fp, decode_tmp_if.frm}),
|
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.rs1_is_fp, decode_tmp_if.rs2_is_fp, decode_tmp_if.rd_is_fp, decode_tmp_if.frm}),
|
||||||
.out ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.rs1_is_fp, decode_if.rs2_is_fp, decode_if.frm})
|
.out ({decode_if.valid, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.rs1_is_fp, decode_if.rs2_is_fp, decode_if.rd_is_fp, decode_if.frm})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign ifetch_rsp_if.ready = ~stall;
|
assign ifetch_rsp_if.ready = ~stall;
|
||||||
|
|
||||||
`ifdef DBG_PRINT_PIPELINE
|
`ifdef DBG_PRINT_PIPELINE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if ((| decode_tmp_if.valid) && ~stall) begin
|
if (decode_tmp_if.valid && ~stall) begin
|
||||||
$write("%t: Core%0d-Decode: warp=%0d, PC=%0h, ex=", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC);
|
$write("%t: Core%0d-Decode: warp=%0d, PC=%0h, ex=", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC);
|
||||||
print_ex_type(decode_tmp_if.ex_type);
|
print_ex_type(decode_tmp_if.ex_type);
|
||||||
$write(", op=");
|
$write(", op=");
|
||||||
print_instr_op(decode_tmp_if.ex_type, decode_tmp_if.instr_op);
|
print_instr_op(decode_tmp_if.ex_type, decode_tmp_if.instr_op);
|
||||||
$write(", wb=%b, rd=%0d, rs1=%0d, rs2=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_rs1=%b, use_rs2=%b\n", decode_tmp_if.wb, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2);
|
$write(", tmask=%b, wb=%b, rd=%0d, rd_is_fp=%b, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_rs1=%b, use_rs2=%b, use_rs3=%b\n", decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, decode_tmp_if.rd_is_fp, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.rs3, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.use_rs3);
|
||||||
|
|
||||||
// trap unsupported instructions
|
// trap unsupported instructions
|
||||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.instr_op) == `ALU_OTHER));
|
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.instr_op) == `ALU_OTHER));
|
||||||
|
|
|
@ -27,12 +27,14 @@
|
||||||
/* verilator lint_off PINCONNECTEMPTY */ \
|
/* verilator lint_off PINCONNECTEMPTY */ \
|
||||||
/* verilator lint_off WIDTH */ \
|
/* verilator lint_off WIDTH */ \
|
||||||
/* verilator lint_off UNOPTFLAT */ \
|
/* verilator lint_off UNOPTFLAT */ \
|
||||||
|
/* verilator lint_off UNDRIVEN */ \
|
||||||
/* verilator lint_off DECLFILENAME */
|
/* verilator lint_off DECLFILENAME */
|
||||||
|
|
||||||
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
|
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
|
||||||
/* verilator lint_on PINCONNECTEMPTY */ \
|
/* verilator lint_on PINCONNECTEMPTY */ \
|
||||||
/* verilator lint_on WIDTH */ \
|
/* verilator lint_on WIDTH */ \
|
||||||
/* verilator lint_on UNOPTFLAT */ \
|
/* verilator lint_on UNOPTFLAT */ \
|
||||||
|
/* verilator lint_on UNDRIVEN */ \
|
||||||
/* verilator lint_on DECLFILENAME */
|
/* verilator lint_on DECLFILENAME */
|
||||||
|
|
||||||
`define UNUSED_VAR(x) /* verilator lint_off UNUSED */ \
|
`define UNUSED_VAR(x) /* verilator lint_off UNUSED */ \
|
||||||
|
@ -50,6 +52,9 @@
|
||||||
if (!(cond)) $error(msg); \
|
if (!(cond)) $error(msg); \
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
|
`define ENABLE_TRACING /* verilator tracing_on */
|
||||||
|
`define DISABLE_TRACING /* verilator tracing_off */
|
||||||
|
|
||||||
`define CLOG2(x) $clog2(x)
|
`define CLOG2(x) $clog2(x)
|
||||||
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > (x)) ? 1 : 0))
|
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > (x)) ? 1 : 0))
|
||||||
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
|
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
|
||||||
|
@ -80,8 +85,9 @@
|
||||||
|
|
||||||
`define CSR_WIDTH 12
|
`define CSR_WIDTH 12
|
||||||
|
|
||||||
`define LATENCY_IDIV 21
|
`define ISTAG_BITS `LOG2UP(`ISSUEQ_SIZE)
|
||||||
|
|
||||||
|
`define LATENCY_IDIV 21
|
||||||
`define LATENCY_IMUL 2
|
`define LATENCY_IMUL 2
|
||||||
|
|
||||||
`define LATENCY_FMULADD 2
|
`define LATENCY_FMULADD 2
|
||||||
|
@ -259,19 +265,31 @@
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
`ifdef EXT_M_ENABLE
|
||||||
|
`define ISA_EXT_M (1 << 12)
|
||||||
|
`else
|
||||||
|
`define ISA_EXT_M 0
|
||||||
|
`endif
|
||||||
|
|
||||||
|
`ifdef EXT_F_ENABLE
|
||||||
|
`define ISA_EXT_F (1 << 5)
|
||||||
|
`else
|
||||||
|
`define ISA_EXT_F 0
|
||||||
|
`endif
|
||||||
|
|
||||||
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
|
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
|
||||||
| (0 << 1) // B - Tentatively reserved for Bit operations extension \
|
| (0 << 1) // B - Tentatively reserved for Bit operations extension \
|
||||||
| (0 << 2) // C - Compressed extension \
|
| (0 << 2) // C - Compressed extension \
|
||||||
| (0 << 3) // D - Double precsision floating-point extension \
|
| (0 << 3) // D - Double precsision floating-point extension \
|
||||||
| (0 << 4) // E - RV32E base ISA \
|
| (0 << 4) // E - RV32E base ISA \
|
||||||
| (`EXT_F_ENABLE << 5) // F - Single precsision floating-point extension \
|
|`ISA_EXT_F // F - Single precsision floating-point extension \
|
||||||
| (0 << 6) // G - Additional standard extensions present \
|
| (0 << 6) // G - Additional standard extensions present \
|
||||||
| (0 << 7) // H - Hypervisor mode implemented \
|
| (0 << 7) // H - Hypervisor mode implemented \
|
||||||
| (1 << 8) // I - RV32I/64I/128I base ISA \
|
| (1 << 8) // I - RV32I/64I/128I base ISA \
|
||||||
| (0 << 9) // J - Reserved \
|
| (0 << 9) // J - Reserved \
|
||||||
| (0 << 10) // K - Reserved \
|
| (0 << 10) // K - Reserved \
|
||||||
| (0 << 11) // L - Tentatively reserved for Bit operations extension \
|
| (0 << 11) // L - Tentatively reserved for Bit operations extension \
|
||||||
| (`EXT_M_ENABLE << 12) // M - Integer Multiply/Divide extension \
|
|`ISA_EXT_M // M - Integer Multiply/Divide extension \
|
||||||
| (0 << 13) // N - User level interrupts supported \
|
| (0 << 13) // N - User level interrupts supported \
|
||||||
| (0 << 14) // O - Reserved \
|
| (0 << 14) // O - Reserved \
|
||||||
| (0 << 15) // P - Tentatively reserved for Packed-SIMD extension \
|
| (0 << 15) // P - Tentatively reserved for Packed-SIMD extension \
|
||||||
|
@ -300,7 +318,7 @@
|
||||||
`define DCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 0)
|
`define DCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 0)
|
||||||
|
|
||||||
// TAG sharing enable
|
// TAG sharing enable
|
||||||
`define DCORE_TAG_ID_BITS `LOG2UP(`DCREQ_SIZE)
|
`define DCORE_TAG_ID_BITS `ISTAG_BITS
|
||||||
|
|
||||||
// Core request tag bits
|
// Core request tag bits
|
||||||
`define DCORE_TAG_WIDTH (`DEBUG_CORE_REQ_MDATA_WIDTH + `DCORE_TAG_ID_BITS)
|
`define DCORE_TAG_WIDTH (`DEBUG_CORE_REQ_MDATA_WIDTH + `DCORE_TAG_ID_BITS)
|
||||||
|
@ -335,7 +353,7 @@
|
||||||
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
|
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
|
||||||
|
|
||||||
// TAG sharing enable
|
// TAG sharing enable
|
||||||
`define ICORE_TAG_ID_BITS `LOG2UP(`ICREQ_SIZE)
|
`define ICORE_TAG_ID_BITS `NW_BITS
|
||||||
|
|
||||||
// Core request tag bits
|
// Core request tag bits
|
||||||
`define ICORE_TAG_WIDTH (`DEBUG_CORE_REQ_MDATA_WIDTH + `ICORE_TAG_ID_BITS)
|
`define ICORE_TAG_WIDTH (`DEBUG_CORE_REQ_MDATA_WIDTH + `ICORE_TAG_ID_BITS)
|
||||||
|
@ -438,8 +456,6 @@
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
task print_ex_type;
|
task print_ex_type;
|
||||||
input [`EX_BITS-1:0] ex;
|
input [`EX_BITS-1:0] ex;
|
||||||
begin
|
begin
|
||||||
|
@ -489,19 +505,6 @@ task print_instr_op;
|
||||||
default: $write("?");
|
default: $write("?");
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
`EX_MUL: begin
|
|
||||||
case (`MUL_BITS'(op))
|
|
||||||
`MUL_MUL: $write("MUL");
|
|
||||||
`MUL_MULH: $write("MULH");
|
|
||||||
`MUL_MULHSU:$write("MULHSU");
|
|
||||||
`MUL_MULHU: $write("MULHU");
|
|
||||||
`MUL_DIV: $write("DIV");
|
|
||||||
`MUL_DIVU: $write("DIVU");
|
|
||||||
`MUL_REM: $write("REM");
|
|
||||||
`MUL_REMU: $write("REMU");
|
|
||||||
default: $write("?");
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
`EX_LSU: begin
|
`EX_LSU: begin
|
||||||
case (`LSU_BITS'(op))
|
case (`LSU_BITS'(op))
|
||||||
`LSU_LB: $write("LB");
|
`LSU_LB: $write("LB");
|
||||||
|
@ -525,6 +528,45 @@ task print_instr_op;
|
||||||
default: $write("?");
|
default: $write("?");
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
`EX_MUL: begin
|
||||||
|
case (`MUL_BITS'(op))
|
||||||
|
`MUL_MUL: $write("MUL");
|
||||||
|
`MUL_MULH: $write("MULH");
|
||||||
|
`MUL_MULHSU:$write("MULHSU");
|
||||||
|
`MUL_MULHU: $write("MULHU");
|
||||||
|
`MUL_DIV: $write("DIV");
|
||||||
|
`MUL_DIVU: $write("DIVU");
|
||||||
|
`MUL_REM: $write("REM");
|
||||||
|
`MUL_REMU: $write("REMU");
|
||||||
|
default: $write("?");
|
||||||
|
endcase
|
||||||
|
end
|
||||||
|
`EX_FPU: begin
|
||||||
|
case (`FPU_BITS'(op))
|
||||||
|
`FPU_ADD: $write("ADD");
|
||||||
|
`FPU_SUB: $write("SUB");
|
||||||
|
`FPU_MUL: $write("MUL");
|
||||||
|
`FPU_DIV: $write("DIV");
|
||||||
|
`FPU_SQRT: $write("SQRT");
|
||||||
|
`FPU_MADD: $write("MADD");
|
||||||
|
`FPU_NMSUB: $write("NMSUB");
|
||||||
|
`FPU_NMADD: $write("NMADD");
|
||||||
|
`FPU_SGNJ: $write("SGNJ");
|
||||||
|
`FPU_SGNJN: $write("SGNJN");
|
||||||
|
`FPU_SGNJX: $write("SGNJX");
|
||||||
|
`FPU_MIN: $write("MIN");
|
||||||
|
`FPU_MAX: $write("MAX");
|
||||||
|
`FPU_CVTWS: $write("CVTWS");
|
||||||
|
`FPU_CVTWUS:$write("CVTWUS");
|
||||||
|
`FPU_CVTSW: $write("CVTSW");
|
||||||
|
`FPU_CVTSWU:$write("CVTSWU");
|
||||||
|
`FPU_MVXW: $write("MVXW");
|
||||||
|
`FPU_MVWX: $write("MVWX");
|
||||||
|
`FPU_CLASS: $write("CLASS");
|
||||||
|
`FPU_CMP: $write("CMP");
|
||||||
|
default: $write("?");
|
||||||
|
endcase
|
||||||
|
end
|
||||||
`EX_GPU: begin
|
`EX_GPU: begin
|
||||||
case (`GPU_BITS'(op))
|
case (`GPU_BITS'(op))
|
||||||
`GPU_TMC: $write("TMC");
|
`GPU_TMC: $write("TMC");
|
||||||
|
|
|
@ -35,7 +35,7 @@ module VX_execute #(
|
||||||
VX_commit_if lsu_commit_if,
|
VX_commit_if lsu_commit_if,
|
||||||
VX_commit_if csr_commit_if,
|
VX_commit_if csr_commit_if,
|
||||||
VX_commit_if mul_commit_if,
|
VX_commit_if mul_commit_if,
|
||||||
VX_commit_fp_if fpu_commit_if,
|
VX_commit_if fpu_commit_if,
|
||||||
VX_commit_if gpu_commit_if,
|
VX_commit_if gpu_commit_if,
|
||||||
|
|
||||||
output wire ebreak
|
output wire ebreak
|
||||||
|
@ -79,15 +79,21 @@ module VX_execute #(
|
||||||
.csr_commit_if (csr_commit_if)
|
.csr_commit_if (csr_commit_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
`ifdef EXT_M_ENABLE
|
||||||
VX_mul_unit #(
|
VX_mul_unit #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
) mul_unit (
|
) mul_unit (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.mul_req_if (mul_req_if),
|
.alu_req_if (mul_req_if),
|
||||||
.mul_commit_if (mul_commit_if)
|
.alu_commit_if (mul_commit_if)
|
||||||
);
|
);
|
||||||
|
`else
|
||||||
|
assign mul_req_if.ready = 0;
|
||||||
|
assign mul_commit_if.valid = 0;
|
||||||
|
`endif
|
||||||
|
|
||||||
|
`ifdef EXT_F_ENABLE
|
||||||
VX_fpu_unit #(
|
VX_fpu_unit #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
) fpu_unit (
|
) fpu_unit (
|
||||||
|
@ -98,6 +104,11 @@ module VX_execute #(
|
||||||
.fpu_to_csr_if (fpu_to_csr_if),
|
.fpu_to_csr_if (fpu_to_csr_if),
|
||||||
.fpu_commit_if (fpu_commit_if)
|
.fpu_commit_if (fpu_commit_if)
|
||||||
);
|
);
|
||||||
|
`else
|
||||||
|
assign fpu_req_if.ready = 0;
|
||||||
|
assign fpu_commit_if.valid = 0;
|
||||||
|
assign fpu_to_csr_if.valid = 0;
|
||||||
|
`endif
|
||||||
|
|
||||||
VX_gpu_unit #(
|
VX_gpu_unit #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
|
@ -107,7 +118,7 @@ module VX_execute #(
|
||||||
.gpu_commit_if (gpu_commit_if)
|
.gpu_commit_if (gpu_commit_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign ebreak = (| alu_req_if.valid) && (alu_req_if.alu_op == `ALU_EBREAK || alu_req_if.alu_op == `ALU_ECALL);
|
assign ebreak = alu_req_if.valid && (alu_req_if.alu_op == `ALU_EBREAK || alu_req_if.alu_op == `ALU_ECALL);
|
||||||
|
|
||||||
`SCOPE_ASSIGN(scope_decode_valid, decode_if.valid);
|
`SCOPE_ASSIGN(scope_decode_valid, decode_if.valid);
|
||||||
`SCOPE_ASSIGN(scope_decode_warp_num, decode_if.warp_num);
|
`SCOPE_ASSIGN(scope_decode_warp_num, decode_if.warp_num);
|
||||||
|
|
|
@ -14,7 +14,7 @@ module VX_fpu_unit #(
|
||||||
VX_fpu_from_csr_if fpu_from_csr_if,
|
VX_fpu_from_csr_if fpu_from_csr_if,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
VX_commit_fp_if fpu_commit_if,
|
VX_commit_if fpu_commit_if,
|
||||||
VX_fpu_to_csr_if fpu_to_csr_if
|
VX_fpu_to_csr_if fpu_to_csr_if
|
||||||
);
|
);
|
||||||
localparam FOP_BITS = fpnew_pkg::OP_BITS;
|
localparam FOP_BITS = fpnew_pkg::OP_BITS;
|
||||||
|
@ -98,6 +98,8 @@ module VX_fpu_unit #(
|
||||||
|
|
||||||
assign fpu_operands = {fpu_req_if.rs3_data, fpu_req_if.rs2_data, fpu_req_if.rs1_data};
|
assign fpu_operands = {fpu_req_if.rs3_data, fpu_req_if.rs2_data, fpu_req_if.rs1_data};
|
||||||
|
|
||||||
|
`DISABLE_TRACING
|
||||||
|
|
||||||
fpnew_top #(
|
fpnew_top #(
|
||||||
.Features (FPU_FEATURES),
|
.Features (FPU_FEATURES),
|
||||||
.Implementation (FPU_IMPLEMENTATION),
|
.Implementation (FPU_IMPLEMENTATION),
|
||||||
|
@ -125,47 +127,28 @@ module VX_fpu_unit #(
|
||||||
`UNUSED_PIN (busy_o)
|
`UNUSED_PIN (busy_o)
|
||||||
);
|
);
|
||||||
|
|
||||||
wire req_push = fpu_req_if.valid && fpu_req_if.ready;
|
`ENABLE_TRACING
|
||||||
wire req_pop = fpu_out_valid && fpu_out_ready;
|
|
||||||
wire req_full;
|
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] rsp_valid;
|
reg [`NW_BITS-1:0] rsp_warp_num_buf [`ISSUEQ_SIZE];
|
||||||
wire [`NW_BITS-1:0] rsp_warp_num;
|
|
||||||
wire [31:0] rsp_curr_PC;
|
|
||||||
wire rsp_wb;
|
|
||||||
wire [`NR_BITS-1:0] rsp_rd;
|
|
||||||
wire rsp_rd_is_fp;
|
|
||||||
|
|
||||||
VX_index_queue #(
|
assign fpu_in_valid = fpu_req_if.valid;
|
||||||
.DATAW (`NUM_THREADS + `NW_BITS + 32 + 1 + `NR_BITS + 1),
|
assign fpu_in_tag = fpu_req_if.issue_tag;
|
||||||
.SIZE (`FPURQ_SIZE)
|
|
||||||
) fpu_req_queue (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.write_data ({fpu_req_if.valid, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.wb, fpu_req_if.rd, fpu_req_if.rd_is_fp}),
|
|
||||||
.write_addr (fpu_in_tag),
|
|
||||||
.push (req_push),
|
|
||||||
.full (req_full),
|
|
||||||
.pop (req_pop),
|
|
||||||
.read_addr (fpu_out_tag),
|
|
||||||
.read_data ({rsp_valid, rsp_warp_num, rsp_curr_PC, rsp_wb, rsp_rd, rsp_rd_is_fp}),
|
|
||||||
`UNUSED_PIN (empty)
|
|
||||||
);
|
|
||||||
|
|
||||||
assign fpu_in_valid = (| fpu_req_if.valid) && ~req_full;
|
always @(posedge clk) begin
|
||||||
assign fpu_req_if.ready = fpu_in_ready && ~req_full;
|
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||||
|
rsp_warp_num_buf[fpu_in_tag] <= fpu_req_if.warp_num;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
assign fpu_commit_if.valid = rsp_valid & {`NUM_THREADS{fpu_out_valid}};
|
assign fpu_req_if.ready = fpu_in_ready;
|
||||||
assign fpu_commit_if.warp_num = rsp_warp_num;
|
|
||||||
assign fpu_commit_if.curr_PC = rsp_curr_PC;
|
assign fpu_commit_if.valid = fpu_out_valid;
|
||||||
|
assign fpu_commit_if.issue_tag = fpu_out_tag;
|
||||||
assign fpu_commit_if.data = fpu_result;
|
assign fpu_commit_if.data = fpu_result;
|
||||||
assign fpu_commit_if.wb = rsp_wb;
|
|
||||||
assign fpu_commit_if.rd = rsp_rd;
|
|
||||||
assign fpu_commit_if.rd_is_fp = rsp_rd_is_fp;
|
|
||||||
assign fpu_out_ready = fpu_commit_if.ready;
|
assign fpu_out_ready = fpu_commit_if.ready;
|
||||||
|
|
||||||
assign fpu_to_csr_if.valid = fpu_out_valid;
|
assign fpu_to_csr_if.valid = fpu_out_valid && fpu_req_if.ready;
|
||||||
assign fpu_to_csr_if.warp_num = rsp_warp_num;
|
assign fpu_to_csr_if.warp_num = rsp_warp_num_buf[fpu_out_tag];
|
||||||
assign fpu_to_csr_if.fflags_NV = fpu_status.NV;
|
assign fpu_to_csr_if.fflags_NV = fpu_status.NV;
|
||||||
assign fpu_to_csr_if.fflags_DZ = fpu_status.DZ;
|
assign fpu_to_csr_if.fflags_DZ = fpu_status.DZ;
|
||||||
assign fpu_to_csr_if.fflags_OF = fpu_status.OF;
|
assign fpu_to_csr_if.fflags_OF = fpu_status.OF;
|
||||||
|
|
|
@ -50,7 +50,7 @@ module VX_gpr_fp_ctrl (
|
||||||
if (decode_if.rs1_is_fp) begin
|
if (decode_if.rs1_is_fp) begin
|
||||||
tmp_rs1_data <= rs1_fp_data;
|
tmp_rs1_data <= rs1_fp_data;
|
||||||
end else begin
|
end else begin
|
||||||
tmp_rs1_data <= decode_if.rs1_is_PC ? {`NUM_THREADS{decode_if.curr_PC}} : rs1_int_data;
|
tmp_rs1_data <= rs1_int_data;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -63,7 +63,7 @@ module VX_gpr_fp_ctrl (
|
||||||
if (decode_if.rs2_is_fp) begin
|
if (decode_if.rs2_is_fp) begin
|
||||||
tmp_rs2_data <= rs2_fp_data;
|
tmp_rs2_data <= rs2_fp_data;
|
||||||
end else begin
|
end else begin
|
||||||
tmp_rs2_data <= decode_if.rs2_is_imm ? {`NUM_THREADS{decode_if.imm}} : rs2_int_data;
|
tmp_rs2_data <= rs2_int_data;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -35,8 +35,6 @@ module VX_gpr_ram (
|
||||||
ram[waddr][i][3] <= wdata[i][31:24];
|
ram[waddr][i][3] <= wdata[i][31:24];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
assert(~(|we) || (waddr != 0)); // ensure r0 is never written!
|
|
||||||
assert(0 == ram[0]);
|
|
||||||
end
|
end
|
||||||
|
|
||||||
assign rs1_data = ram[rs1];
|
assign rs1_data = ram[rs1];
|
||||||
|
|
|
@ -16,11 +16,10 @@ module VX_gpr_stage #(
|
||||||
input wire schedule_delay,
|
input wire schedule_delay,
|
||||||
output wire gpr_delay
|
output wire gpr_delay
|
||||||
);
|
);
|
||||||
|
`UNUSED_VAR (reset)
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs1_int_data [`NUM_WARPS-1:0];
|
wire [`NUM_THREADS-1:0][31:0] rs1_int_data [`NUM_WARPS-1:0];
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs2_int_data [`NUM_WARPS-1:0];
|
wire [`NUM_THREADS-1:0][31:0] rs2_int_data [`NUM_WARPS-1:0];
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs1_fp_data [`NUM_WARPS-1:0];
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs2_fp_data [`NUM_WARPS-1:0];
|
|
||||||
wire [`NUM_THREADS-1:0] we [`NUM_WARPS-1:0];
|
wire [`NUM_THREADS-1:0] we [`NUM_WARPS-1:0];
|
||||||
|
|
||||||
wire [`NR_BITS-1:0] raddr1;
|
wire [`NR_BITS-1:0] raddr1;
|
||||||
|
@ -29,12 +28,10 @@ module VX_gpr_stage #(
|
||||||
genvar i;
|
genvar i;
|
||||||
|
|
||||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||||
assign we[i] = writeback_if.valid & {`NUM_THREADS{(i == writeback_if.warp_num)}};
|
assign we[i] = writeback_if.thread_mask & {`NUM_THREADS{~writeback_if.rd_is_fp && (i == writeback_if.warp_num)}};
|
||||||
|
|
||||||
// Int GPRs
|
|
||||||
VX_gpr_ram gpr_int_ram (
|
VX_gpr_ram gpr_int_ram (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.we (we[i] & {`NUM_THREADS{~writeback_if.rd_is_fp}}),
|
.we (we[i]),
|
||||||
.waddr (writeback_if.rd),
|
.waddr (writeback_if.rd),
|
||||||
.wdata (writeback_if.data),
|
.wdata (writeback_if.data),
|
||||||
.rs1 (raddr1),
|
.rs1 (raddr1),
|
||||||
|
@ -42,11 +39,18 @@ module VX_gpr_stage #(
|
||||||
.rs1_data (rs1_int_data[i]),
|
.rs1_data (rs1_int_data[i]),
|
||||||
.rs2_data (rs2_int_data[i])
|
.rs2_data (rs2_int_data[i])
|
||||||
);
|
);
|
||||||
|
end
|
||||||
|
|
||||||
// FP GPRs
|
`ifdef EXT_F_ENABLE
|
||||||
|
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] rs1_fp_data [`NUM_WARPS-1:0];
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] rs2_fp_data [`NUM_WARPS-1:0];
|
||||||
|
|
||||||
|
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||||
|
assign we[i] = writeback_if.thread_mask & {`NUM_THREADS{writeback_if.rd_is_fp && (i == writeback_if.warp_num)}};
|
||||||
VX_gpr_ram gpr_fp_ram (
|
VX_gpr_ram gpr_fp_ram (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.we (we[i] & {`NUM_THREADS{writeback_if.rd_is_fp}}),
|
.we (we[i]),
|
||||||
.waddr (writeback_if.rd),
|
.waddr (writeback_if.rd),
|
||||||
.wdata (writeback_if.data),
|
.wdata (writeback_if.data),
|
||||||
.rs1 (raddr1),
|
.rs1 (raddr1),
|
||||||
|
@ -54,18 +58,18 @@ module VX_gpr_stage #(
|
||||||
.rs1_data (rs1_fp_data[i]),
|
.rs1_data (rs1_fp_data[i]),
|
||||||
.rs2_data (rs2_fp_data[i])
|
.rs2_data (rs2_fp_data[i])
|
||||||
);
|
);
|
||||||
|
end
|
||||||
|
|
||||||
// controller for multi-cycle read
|
|
||||||
VX_gpr_fp_ctrl VX_gpr_fp_ctrl (
|
VX_gpr_fp_ctrl VX_gpr_fp_ctrl (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
||||||
//inputs
|
//inputs
|
||||||
.decode_if (decode_if),
|
.decode_if (decode_if),
|
||||||
.rs1_int_data (rs1_int_data[i]),
|
.rs1_int_data (rs1_int_data[decode_if.warp_num]),
|
||||||
.rs2_int_data (rs2_int_data[i]),
|
.rs2_int_data (rs2_int_data[decode_if.warp_num]),
|
||||||
.rs1_fp_data (rs1_fp_data[i]),
|
.rs1_fp_data (rs1_fp_data[decode_if.warp_num]),
|
||||||
.rs2_fp_data (rs2_fp_data[i]),
|
.rs2_fp_data (rs2_fp_data[decode_if.warp_num]),
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
.raddr1 (raddr1),
|
.raddr1 (raddr1),
|
||||||
|
@ -74,7 +78,16 @@ module VX_gpr_stage #(
|
||||||
.schedule_delay (schedule_delay),
|
.schedule_delay (schedule_delay),
|
||||||
.gpr_delay (gpr_delay)
|
.gpr_delay (gpr_delay)
|
||||||
);
|
);
|
||||||
end
|
|
||||||
|
`else
|
||||||
|
assign raddr1 = decode_if.rs1;
|
||||||
|
assign raddr2 = decode_if.rs2;
|
||||||
|
assign gpr_data_if.rs1_data = rs1_int_data[decode_if.warp_num];
|
||||||
|
assign gpr_data_if.rs2_data = rs2_int_data[decode_if.warp_num];
|
||||||
|
assign gpr_data_if.rs3_data = 0;
|
||||||
|
assign gpr_delay = 0;
|
||||||
|
`UNUSED_VAR (schedule_delay)
|
||||||
|
`endif
|
||||||
|
|
||||||
assign writeback_if.ready = 1'b1;
|
assign writeback_if.ready = 1'b1;
|
||||||
|
|
||||||
|
|
|
@ -10,52 +10,53 @@ module VX_gpu_unit #(
|
||||||
VX_warp_ctl_if warp_ctl_if,
|
VX_warp_ctl_if warp_ctl_if,
|
||||||
VX_commit_if gpu_commit_if
|
VX_commit_if gpu_commit_if
|
||||||
);
|
);
|
||||||
wire [`NUM_THREADS-1:0] curr_valids = gpu_req_if.valid;
|
|
||||||
wire is_wspawn = (gpu_req_if.gpu_op == `GPU_WSPAWN);
|
wire is_wspawn = (gpu_req_if.gpu_op == `GPU_WSPAWN);
|
||||||
wire is_tmc = (gpu_req_if.gpu_op == `GPU_TMC);
|
wire is_tmc = (gpu_req_if.gpu_op == `GPU_TMC);
|
||||||
wire is_split = (gpu_req_if.gpu_op == `GPU_SPLIT);
|
wire is_split = (gpu_req_if.gpu_op == `GPU_SPLIT);
|
||||||
wire is_bar = (gpu_req_if.gpu_op == `GPU_BAR);
|
wire is_bar = (gpu_req_if.gpu_op == `GPU_BAR);
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
wire gpu_req_fire = gpu_req_if.valid && gpu_commit_if.ready;
|
||||||
|
|
||||||
genvar i;
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin : tmc_new_mask_init
|
|
||||||
assign tmc_new_mask[i] = (i < gpu_req_if.rs1_data[0]);
|
|
||||||
end
|
|
||||||
|
|
||||||
wire valid_inst = (| curr_valids);
|
|
||||||
|
|
||||||
assign warp_ctl_if.warp_num = gpu_req_if.warp_num;
|
assign warp_ctl_if.warp_num = gpu_req_if.warp_num;
|
||||||
|
|
||||||
assign warp_ctl_if.change_mask = is_tmc && valid_inst;
|
// tmc
|
||||||
assign warp_ctl_if.thread_mask = is_tmc ? tmc_new_mask : 0;
|
|
||||||
|
|
||||||
assign warp_ctl_if.whalt = warp_ctl_if.change_mask && (0 == warp_ctl_if.thread_mask);
|
genvar i;
|
||||||
|
|
||||||
wire wspawn = is_wspawn && valid_inst;
|
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
||||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
|
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||||
wire [`NUM_WARPS-1:0] wspawn_new_active;
|
assign tmc_new_mask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||||
|
|
||||||
for (i = 0; i < `NUM_WARPS; i++) begin : wspawn_new_active_init
|
|
||||||
assign wspawn_new_active[i] = (i < gpu_req_if.rs1_data[0]);
|
|
||||||
end
|
end
|
||||||
|
assign warp_ctl_if.change_mask = is_tmc && gpu_req_fire;
|
||||||
|
assign warp_ctl_if.thread_mask = tmc_new_mask;
|
||||||
|
|
||||||
assign warp_ctl_if.is_barrier = is_bar && valid_inst;
|
// barrier
|
||||||
|
|
||||||
|
assign warp_ctl_if.is_barrier = is_bar && gpu_req_fire;
|
||||||
assign warp_ctl_if.barrier_id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
assign warp_ctl_if.barrier_id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
||||||
|
assign warp_ctl_if.barrier_num_warps = (`NW_BITS+1)'(gpu_req_if.rs2_data - 1);
|
||||||
|
|
||||||
assign warp_ctl_if.num_warps = (`NW_BITS+1)'(gpu_req_if.rs2_data - 1);
|
// wspawn
|
||||||
|
|
||||||
assign warp_ctl_if.wspawn = wspawn;
|
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
|
||||||
|
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||||
|
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||||
|
assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||||
|
end
|
||||||
|
assign warp_ctl_if.wspawn = is_wspawn && gpu_req_fire;
|
||||||
assign warp_ctl_if.wspawn_pc = wspawn_pc;
|
assign warp_ctl_if.wspawn_pc = wspawn_pc;
|
||||||
assign warp_ctl_if.wspawn_new_active = wspawn_new_active;
|
assign warp_ctl_if.wspawn_wmask = wspawn_wmask;
|
||||||
|
|
||||||
|
// split
|
||||||
|
|
||||||
wire[`NUM_THREADS-1:0] split_new_use_mask;
|
wire[`NUM_THREADS-1:0] split_new_use_mask;
|
||||||
wire[`NUM_THREADS-1:0] split_new_later_mask;
|
wire[`NUM_THREADS-1:0] split_new_later_mask;
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin : masks_init
|
for (i = 0; i < `NUM_THREADS; i++) begin : masks_init
|
||||||
wire curr_bool = (gpu_req_if.rs1_data[i] == 32'b1);
|
wire curr_bool = (gpu_req_if.rs1_data[i] == 32'b1);
|
||||||
assign split_new_use_mask[i] = curr_valids[i] & (curr_bool);
|
assign split_new_use_mask[i] = gpu_req_if.thread_mask[i] & (curr_bool);
|
||||||
assign split_new_later_mask[i] = curr_valids[i] & (!curr_bool);
|
assign split_new_later_mask[i] = gpu_req_if.thread_mask[i] & (!curr_bool);
|
||||||
end
|
end
|
||||||
|
|
||||||
wire [`NT_BITS:0] num_valids;
|
wire [`NT_BITS:0] num_valids;
|
||||||
|
@ -63,24 +64,20 @@ module VX_gpu_unit #(
|
||||||
VX_countones #(
|
VX_countones #(
|
||||||
.N(`NUM_THREADS)
|
.N(`NUM_THREADS)
|
||||||
) valids_counter (
|
) valids_counter (
|
||||||
.valids(curr_valids),
|
.valids(gpu_req_if.thread_mask),
|
||||||
.count (num_valids)
|
.count (num_valids)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign warp_ctl_if.is_split = is_split && (num_valids > 1);
|
assign warp_ctl_if.is_split = is_split && (num_valids > 1) && gpu_req_fire;
|
||||||
assign warp_ctl_if.do_split = (split_new_use_mask != 0) && (split_new_use_mask != {`NUM_THREADS{1'b1}});
|
assign warp_ctl_if.do_split = (split_new_use_mask != 0) && (split_new_use_mask != {`NUM_THREADS{1'b1}});
|
||||||
assign warp_ctl_if.split_new_mask = split_new_use_mask;
|
assign warp_ctl_if.split_new_mask = split_new_use_mask;
|
||||||
assign warp_ctl_if.split_later_mask = split_new_later_mask;
|
assign warp_ctl_if.split_later_mask = split_new_later_mask;
|
||||||
assign warp_ctl_if.split_save_pc = gpu_req_if.next_PC;
|
assign warp_ctl_if.split_save_pc = gpu_req_if.next_PC;
|
||||||
|
|
||||||
assign gpu_req_if.ready = gpu_commit_if.ready;
|
|
||||||
|
|
||||||
// commit
|
// commit
|
||||||
assign gpu_commit_if.valid = gpu_req_if.valid;
|
assign gpu_commit_if.valid = gpu_req_if.valid;
|
||||||
assign gpu_commit_if.warp_num = gpu_req_if.warp_num;
|
assign gpu_commit_if.issue_tag = gpu_req_if.issue_tag;
|
||||||
assign gpu_commit_if.curr_PC = gpu_req_if.curr_PC;
|
|
||||||
assign gpu_commit_if.wb = 0;
|
|
||||||
assign gpu_commit_if.rd = 0;
|
|
||||||
assign gpu_commit_if.data = 0;
|
assign gpu_commit_if.data = 0;
|
||||||
|
assign gpu_req_if.ready = gpu_commit_if.ready;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
|
@ -18,61 +18,46 @@ module VX_icache_stage #(
|
||||||
// reponse
|
// reponse
|
||||||
VX_ifetch_rsp_if ifetch_rsp_if
|
VX_ifetch_rsp_if ifetch_rsp_if
|
||||||
);
|
);
|
||||||
|
`UNUSED_VAR (reset)
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0] valid_threads [`NUM_WARPS-1:0];
|
reg [31:0] rsp_curr_PC_buf [`NUM_WARPS-1:0];
|
||||||
|
reg [`NUM_THREADS-1:0] rsp_thread_mask_buf [`NUM_WARPS-1:0];
|
||||||
|
|
||||||
wire valid_inst = (| ifetch_req_if.valid);
|
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
|
||||||
|
|
||||||
wire [`LOG2UP(`ICREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr;
|
wire [`NW_BITS-1:0] req_tag = ifetch_req_if.warp_num;
|
||||||
wire mrq_full;
|
wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0];
|
||||||
|
|
||||||
wire mrq_push = icache_req_if.valid && icache_req_if.ready;
|
|
||||||
wire mrq_pop = icache_rsp_if.valid && icache_rsp_if.ready;
|
|
||||||
|
|
||||||
assign mrq_read_addr = icache_rsp_if.tag[0][`LOG2UP(`ICREQ_SIZE)-1:0];
|
|
||||||
|
|
||||||
VX_index_queue #(
|
|
||||||
.DATAW (32 + `NW_BITS),
|
|
||||||
.SIZE (`ICREQ_SIZE)
|
|
||||||
) mem_req_queue (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.write_data ({ifetch_req_if.curr_PC, ifetch_req_if.warp_num}),
|
|
||||||
.write_addr (mrq_write_addr),
|
|
||||||
.push (mrq_push),
|
|
||||||
.full (mrq_full),
|
|
||||||
.pop (mrq_pop),
|
|
||||||
.read_addr (mrq_read_addr),
|
|
||||||
.read_data ({ifetch_rsp_if.curr_PC, ifetch_rsp_if.warp_num}),
|
|
||||||
`UNUSED_PIN (empty)
|
|
||||||
);
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (mrq_push) begin
|
if (icache_req_fire) begin
|
||||||
valid_threads[ifetch_req_if.warp_num] <= ifetch_req_if.valid;
|
rsp_curr_PC_buf[req_tag] <= ifetch_req_if.curr_PC;
|
||||||
|
rsp_thread_mask_buf[req_tag] <= ifetch_req_if.thread_mask;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
// Icache Request
|
// Icache Request
|
||||||
assign icache_req_if.valid = valid_inst && !mrq_full;
|
assign icache_req_if.valid = ifetch_req_if.valid;
|
||||||
assign icache_req_if.rw = 0;
|
assign icache_req_if.rw = 0;
|
||||||
assign icache_req_if.byteen = 4'b1111;
|
assign icache_req_if.byteen = 4'b1111;
|
||||||
assign icache_req_if.addr = ifetch_req_if.curr_PC[31:2];
|
assign icache_req_if.addr = ifetch_req_if.curr_PC[31:2];
|
||||||
assign icache_req_if.data = 0;
|
assign icache_req_if.data = 0;
|
||||||
|
|
||||||
// Can't accept new request
|
// Can accept new request?
|
||||||
assign ifetch_req_if.ready = !mrq_full && icache_req_if.ready;
|
assign ifetch_req_if.ready = icache_req_if.ready;
|
||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, 5'b0, ifetch_req_if.warp_num, mrq_write_addr};
|
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, 5'b0, ifetch_req_if.warp_num, req_tag};
|
||||||
`else
|
`else
|
||||||
assign icache_req_if.tag = mrq_write_addr;
|
assign icache_req_if.tag = req_tag;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
assign ifetch_rsp_if.valid = icache_rsp_if.valid ? valid_threads[ifetch_rsp_if.warp_num] : 0;
|
assign ifetch_rsp_if.valid = icache_rsp_if.valid;
|
||||||
|
assign ifetch_rsp_if.warp_num = rsp_tag;
|
||||||
|
assign ifetch_rsp_if.thread_mask = rsp_thread_mask_buf[rsp_tag];
|
||||||
|
assign ifetch_rsp_if.curr_PC = rsp_curr_PC_buf[rsp_tag];
|
||||||
assign ifetch_rsp_if.instr = icache_rsp_if.data[0];
|
assign ifetch_rsp_if.instr = icache_rsp_if.data[0];
|
||||||
|
|
||||||
// Can't accept new response
|
// Can accept new response?
|
||||||
assign icache_rsp_if.ready = ifetch_rsp_if.ready;
|
assign icache_rsp_if.ready = ifetch_rsp_if.ready;
|
||||||
|
|
||||||
`SCOPE_ASSIGN(scope_icache_req_valid, icache_req_if.valid);
|
`SCOPE_ASSIGN(scope_icache_req_valid, icache_req_if.valid);
|
||||||
|
@ -89,10 +74,10 @@ module VX_icache_stage #(
|
||||||
`ifdef DBG_PRINT_CORE_ICACHE
|
`ifdef DBG_PRINT_CORE_ICACHE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (icache_req_if.valid && icache_req_if.ready) begin
|
if (icache_req_if.valid && icache_req_if.ready) begin
|
||||||
$display("%t: I$%0d req: tag=%0h, PC=%0h, warp=%0d", $time, CORE_ID, mrq_write_addr, ifetch_req_if.curr_PC, ifetch_req_if.warp_num);
|
$display("%t: I$%0d req: warp=%0d, PC=%0h", $time, CORE_ID, ifetch_req_if.warp_num, ifetch_req_if.curr_PC);
|
||||||
end
|
end
|
||||||
if (icache_rsp_if.valid && icache_rsp_if.ready) begin
|
if (icache_rsp_if.valid && icache_rsp_if.ready) begin
|
||||||
$display("%t: I$%0d rsp: tag=%0h, PC=%0h, warp=%0d, instr=%0h", $time, CORE_ID, mrq_read_addr, ifetch_rsp_if.curr_PC, ifetch_rsp_if.warp_num, ifetch_rsp_if.instr);
|
$display("%t: I$%0d rsp: warp=%0d, PC=%0h, instr=%0h", $time, CORE_ID, ifetch_rsp_if.warp_num, ifetch_req_if.curr_PC, ifetch_rsp_if.instr);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
|
@ -8,6 +8,7 @@ module VX_issue #(
|
||||||
|
|
||||||
VX_decode_if decode_if,
|
VX_decode_if decode_if,
|
||||||
VX_wb_if writeback_if,
|
VX_wb_if writeback_if,
|
||||||
|
VX_commit_is_if commit_is_if,
|
||||||
|
|
||||||
VX_alu_req_if alu_req_if,
|
VX_alu_req_if alu_req_if,
|
||||||
VX_lsu_req_if lsu_req_if,
|
VX_lsu_req_if lsu_req_if,
|
||||||
|
@ -19,6 +20,7 @@ module VX_issue #(
|
||||||
VX_gpr_data_if gpr_data_if();
|
VX_gpr_data_if gpr_data_if();
|
||||||
wire schedule_delay;
|
wire schedule_delay;
|
||||||
wire gpr_delay;
|
wire gpr_delay;
|
||||||
|
wire [`ISTAG_BITS-1:0] issue_tag, issue_tmp_tag;
|
||||||
|
|
||||||
wire alu_busy = ~alu_req_if.ready;
|
wire alu_busy = ~alu_req_if.ready;
|
||||||
wire lsu_busy = ~lsu_req_if.ready;
|
wire lsu_busy = ~lsu_req_if.ready;
|
||||||
|
@ -34,6 +36,7 @@ module VX_issue #(
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.decode_if (decode_if),
|
.decode_if (decode_if),
|
||||||
.writeback_if (writeback_if),
|
.writeback_if (writeback_if),
|
||||||
|
.commit_is_if (commit_is_if),
|
||||||
.gpr_busy (gpr_delay),
|
.gpr_busy (gpr_delay),
|
||||||
.alu_busy (alu_busy),
|
.alu_busy (alu_busy),
|
||||||
.lsu_busy (lsu_busy),
|
.lsu_busy (lsu_busy),
|
||||||
|
@ -41,6 +44,7 @@ module VX_issue #(
|
||||||
.mul_busy (mul_busy),
|
.mul_busy (mul_busy),
|
||||||
.fpu_busy (fpu_busy),
|
.fpu_busy (fpu_busy),
|
||||||
.gpu_busy (gpu_busy),
|
.gpu_busy (gpu_busy),
|
||||||
|
.issue_tag (issue_tag),
|
||||||
.schedule_delay (schedule_delay),
|
.schedule_delay (schedule_delay),
|
||||||
`UNUSED_PIN (is_empty)
|
`UNUSED_PIN (is_empty)
|
||||||
);
|
);
|
||||||
|
@ -57,123 +61,54 @@ module VX_issue #(
|
||||||
.gpr_delay (gpr_delay)
|
.gpr_delay (gpr_delay)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_alu_req_if alu_req_tmp_if();
|
VX_decode_if decode_tmp_if();
|
||||||
VX_lsu_req_if lsu_req_tmp_if();
|
VX_gpr_data_if gpr_data_tmp_if();
|
||||||
VX_csr_req_if csr_req_tmp_if();
|
|
||||||
VX_mul_req_if mul_req_tmp_if();
|
wire stall = ~alu_req_if.ready || schedule_delay;
|
||||||
VX_fpu_req_if fpu_req_tmp_if();
|
wire flush = alu_req_if.ready && schedule_delay;
|
||||||
VX_gpu_req_if gpu_req_tmp_if();
|
|
||||||
|
VX_generic_register #(
|
||||||
|
.N(1 + `ISTAG_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + 1 + `FRM_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||||
|
) decode_reg (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.stall (stall),
|
||||||
|
.flush (flush),
|
||||||
|
.in ({decode_if.valid, issue_tag, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.rs1_is_fp, decode_if.rs2_is_fp, decode_if.frm, gpr_data_if.rs1_data, gpr_data_if.rs2_data, gpr_data_if.rs3_data}),
|
||||||
|
.out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.rs1_is_fp, decode_tmp_if.rs2_is_fp, decode_tmp_if.frm, gpr_data_tmp_if.rs1_data, gpr_data_tmp_if.rs2_data, gpr_data_tmp_if.rs3_data})
|
||||||
|
);
|
||||||
|
|
||||||
VX_issue_demux issue_demux (
|
VX_issue_demux issue_demux (
|
||||||
.decode_if (decode_if),
|
.decode_if (decode_tmp_if),
|
||||||
.gpr_data_if (gpr_data_if),
|
.gpr_data_if (gpr_data_tmp_if),
|
||||||
.alu_req_if (alu_req_tmp_if),
|
.issue_tag (issue_tmp_tag),
|
||||||
.lsu_req_if (lsu_req_tmp_if),
|
.alu_req_if (alu_req_if),
|
||||||
.csr_req_if (csr_req_tmp_if),
|
.lsu_req_if (lsu_req_if),
|
||||||
.mul_req_if (mul_req_tmp_if),
|
.csr_req_if (csr_req_if),
|
||||||
.fpu_req_if (fpu_req_tmp_if),
|
.mul_req_if (mul_req_if),
|
||||||
.gpu_req_if (gpu_req_tmp_if)
|
.fpu_req_if (fpu_req_if),
|
||||||
);
|
.gpu_req_if (gpu_req_if)
|
||||||
|
|
||||||
wire stall_alu = ~alu_req_if.ready || schedule_delay;
|
|
||||||
wire stall_lsu = ~lsu_req_if.ready || schedule_delay;
|
|
||||||
wire stall_csr = ~csr_req_if.ready || schedule_delay;
|
|
||||||
wire stall_mul = ~mul_req_if.ready || schedule_delay;
|
|
||||||
wire stall_fpu = ~fpu_req_if.ready || schedule_delay;
|
|
||||||
wire stall_gpu = ~gpu_req_if.ready || schedule_delay;
|
|
||||||
|
|
||||||
wire flush_alu = alu_req_if.ready && schedule_delay;
|
|
||||||
wire flush_lsu = lsu_req_if.ready && schedule_delay;
|
|
||||||
wire flush_csr = csr_req_if.ready && schedule_delay;
|
|
||||||
wire flush_mul = mul_req_if.ready && schedule_delay;
|
|
||||||
wire flush_fpu = fpu_req_if.ready && schedule_delay;
|
|
||||||
wire flush_gpu = gpu_req_if.ready && schedule_delay;
|
|
||||||
|
|
||||||
VX_generic_register #(
|
|
||||||
.N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
|
|
||||||
) alu_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.stall (stall_alu),
|
|
||||||
.flush (flush_alu),
|
|
||||||
.in ({alu_req_tmp_if.valid, alu_req_tmp_if.warp_num, alu_req_tmp_if.curr_PC, alu_req_tmp_if.alu_op, alu_req_tmp_if.wb, alu_req_tmp_if.rd, alu_req_tmp_if.rs1_data, alu_req_tmp_if.rs2_data, alu_req_tmp_if.offset, alu_req_tmp_if.next_PC}),
|
|
||||||
.out ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.alu_op, alu_req_if.wb, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC})
|
|
||||||
);
|
|
||||||
|
|
||||||
VX_generic_register #(
|
|
||||||
.N(`NUM_THREADS + `NW_BITS + 32 + 1 + `BYTEEN_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32)
|
|
||||||
) lsu_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.stall (stall_lsu),
|
|
||||||
.flush (flush_lsu),
|
|
||||||
.in ({lsu_req_tmp_if.valid, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.rw, lsu_req_tmp_if.byteen, lsu_req_tmp_if.wb, lsu_req_tmp_if.rd, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset, lsu_req_tmp_if.store_data}),
|
|
||||||
.out ({lsu_req_if.valid, lsu_req_if.warp_num, lsu_req_if.curr_PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.wb, lsu_req_if.rd, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data})
|
|
||||||
);
|
|
||||||
|
|
||||||
VX_generic_register #(
|
|
||||||
.N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + 1 + `NR_BITS + `CSR_ADDR_SIZE + 32 + 1)
|
|
||||||
) csr_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.stall (stall_csr),
|
|
||||||
.flush (flush_csr),
|
|
||||||
.in ({csr_req_tmp_if.valid, csr_req_tmp_if.warp_num, csr_req_tmp_if.curr_PC, csr_req_tmp_if.csr_op, csr_req_tmp_if.wb, csr_req_tmp_if.rd, csr_req_tmp_if.csr_addr, csr_req_tmp_if.csr_mask, csr_req_tmp_if.is_io}),
|
|
||||||
.out ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.csr_op, csr_req_if.wb, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io})
|
|
||||||
);
|
|
||||||
|
|
||||||
VX_generic_register #(
|
|
||||||
.N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
|
||||||
) mul_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.stall (stall_mul),
|
|
||||||
.flush (flush_mul),
|
|
||||||
.in ({mul_req_tmp_if.valid, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.wb, mul_req_tmp_if.rd, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data}),
|
|
||||||
.out ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.mul_op, mul_req_if.wb, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data})
|
|
||||||
);
|
|
||||||
|
|
||||||
VX_generic_register #(
|
|
||||||
.N(`NUM_THREADS +`NW_BITS + 32 + `FPU_BITS + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `FRM_BITS)
|
|
||||||
) fpu_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.stall (stall_fpu),
|
|
||||||
.flush (flush_fpu),
|
|
||||||
.in ({fpu_req_tmp_if.valid, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rd_is_fp, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data, fpu_req_tmp_if.frm}),
|
|
||||||
.out ({fpu_req_if.valid, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.fpu_op, fpu_req_if.wb, fpu_req_if.rd, fpu_req_if.rd_is_fp, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data, fpu_req_if.frm})
|
|
||||||
);
|
|
||||||
|
|
||||||
VX_generic_register #(
|
|
||||||
.N(`NUM_THREADS + `NW_BITS + 32 + `GPU_BITS + (`NUM_THREADS * 32) + 32 + 32)
|
|
||||||
) gpu_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.stall (stall_gpu),
|
|
||||||
.flush (flush_gpu),
|
|
||||||
.in ({gpu_req_tmp_if.valid, gpu_req_tmp_if.warp_num, gpu_req_tmp_if.curr_PC, gpu_req_tmp_if.gpu_op, gpu_req_tmp_if.rs1_data, gpu_req_tmp_if.rs2_data, gpu_req_tmp_if.next_PC}),
|
|
||||||
.out ({gpu_req_if.valid, gpu_req_if.warp_num, gpu_req_if.curr_PC, gpu_req_if.gpu_op, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.next_PC})
|
|
||||||
);
|
);
|
||||||
|
|
||||||
`ifdef DBG_PRINT_PIPELINE
|
`ifdef DBG_PRINT_PIPELINE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if ((| alu_req_tmp_if.valid) && ~stall_alu) begin
|
if (alu_req_if.valid && ~stall) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, alu_req_tmp_if.warp_num, alu_req_tmp_if.curr_PC, alu_req_tmp_if.alu_op, alu_req_tmp_if.wb, alu_req_tmp_if.rd, alu_req_tmp_if.rs1_data, alu_req_tmp_if.rs2_data, alu_req_tmp_if.offset, alu_req_tmp_if.next_PC);
|
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC);
|
||||||
end
|
end
|
||||||
if ((| mul_req_tmp_if.valid) && ~stall_mul) begin
|
if (lsu_req_if.valid && ~stall) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.wb, mul_req_tmp_if.rd, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data);
|
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, lsu_req_if.rw, decode_tmp_if.rd, decode_tmp_if.wb, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset);
|
||||||
end
|
end
|
||||||
if ((| fpu_req_tmp_if.valid) && ~stall_fpu) begin
|
if (csr_req_if.valid && ~stall) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data);
|
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%d, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||||
end
|
end
|
||||||
if ((| lsu_req_tmp_if.valid) && ~stall_lsu) begin
|
if (mul_req_if.valid && ~stall) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, rw=%b, wb=%0d, rd=%0d, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.rw, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb, lsu_req_tmp_if.byteen, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset);
|
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||||
end
|
end
|
||||||
if ((| csr_req_tmp_if.valid) && ~stall_csr) begin
|
if (fpu_req_if.valid && ~stall) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, op=%0d, wb=%d, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_tmp_if.warp_num, csr_req_tmp_if.curr_PC, csr_req_tmp_if.csr_op, csr_req_tmp_if.wb, csr_req_tmp_if.rd, csr_req_tmp_if.csr_addr, csr_req_tmp_if.csr_mask);
|
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%d, rd=%0d, frm=%0h, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, fpu_req_if.frm, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||||
end
|
end
|
||||||
if ((| gpu_req_tmp_if.valid) && ~stall_gpu) begin
|
if (gpu_req_if.valid && ~stall) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=GPU, op=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, gpu_req_tmp_if.warp_num, gpu_req_tmp_if.curr_PC, gpu_req_tmp_if.gpu_op, gpu_req_tmp_if.rs1_data, gpu_req_tmp_if.rs2_data);
|
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
|
@ -19,18 +19,19 @@ module VX_lsu_unit #(
|
||||||
VX_commit_if lsu_commit_if
|
VX_commit_if lsu_commit_if
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] use_valid;
|
wire use_valid;
|
||||||
|
wire [`NUM_THREADS-1:0] use_thread_mask;
|
||||||
wire use_req_rw;
|
wire use_req_rw;
|
||||||
wire [`NUM_THREADS-1:0][29:0] use_req_addr;
|
wire [`NUM_THREADS-1:0][29:0] use_req_addr;
|
||||||
wire [`NUM_THREADS-1:0][1:0] use_req_offset;
|
wire [`NUM_THREADS-1:0][1:0] use_req_offset;
|
||||||
wire [`NUM_THREADS-1:0][3:0] use_req_byteen;
|
wire [`NUM_THREADS-1:0][3:0] use_req_byteen;
|
||||||
wire [`NUM_THREADS-1:0][31:0] use_req_data;
|
wire [`NUM_THREADS-1:0][31:0] use_req_data;
|
||||||
wire [`BYTEEN_BITS-1:0] mem_byteen;
|
wire [`BYTEEN_BITS-1:0] use_req_fullbyteen;
|
||||||
wire [`NR_BITS-1:0] use_rd;
|
wire [`NR_BITS-1:0] use_rd;
|
||||||
wire [`NW_BITS-1:0] use_warp_num;
|
wire [`NW_BITS-1:0] use_warp_num;
|
||||||
|
wire [`ISTAG_BITS-1:0] use_issue_tag;
|
||||||
wire use_wb;
|
wire use_wb;
|
||||||
wire [31:0] use_pc;
|
wire [31:0] use_pc;
|
||||||
wire mrq_full;
|
|
||||||
|
|
||||||
genvar i;
|
genvar i;
|
||||||
|
|
||||||
|
@ -60,126 +61,138 @@ module VX_lsu_unit #(
|
||||||
assign mem_req_data[i] = lsu_req_if.store_data[i] << {mem_req_offset[i], 3'b0};
|
assign mem_req_data[i] = lsu_req_if.store_data[i] << {mem_req_offset[i], 3'b0};
|
||||||
end
|
end
|
||||||
|
|
||||||
// Can accept new request
|
wire store_stalled;
|
||||||
wire stall = ~dcache_req_if.ready || mrq_full;
|
wire stall_in = store_stalled || ~dcache_req_if.ready;
|
||||||
assign lsu_req_if.ready = ~stall;
|
|
||||||
|
// Can accept new request?
|
||||||
|
assign lsu_req_if.ready = ~stall_in;
|
||||||
|
|
||||||
`IGNORE_WARNINGS_BEGIN
|
`IGNORE_WARNINGS_BEGIN
|
||||||
wire [`NUM_THREADS-1:0][31:0] use_address;
|
wire [`NUM_THREADS-1:0][31:0] use_address;
|
||||||
`IGNORE_WARNINGS_END
|
`IGNORE_WARNINGS_END
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(`NUM_THREADS + (`NUM_THREADS * 32) + `BYTEEN_BITS + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + `NW_BITS + 1 + 32)
|
.N(1 + `NW_BITS + `NUM_THREADS + `ISTAG_BITS + (`NUM_THREADS * 32) + `BYTEEN_BITS + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + 1 + 32)
|
||||||
) mem_req_reg (
|
) lsu_req_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall_in),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({lsu_req_if.valid, full_address, lsu_req_if.byteen, lsu_req_if.rw, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.curr_PC}),
|
.in ({lsu_req_if.valid, lsu_req_if.warp_num, lsu_req_if.thread_mask, lsu_req_if.issue_tag, full_address, lsu_req_if.byteen, lsu_req_if.rw, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.curr_PC}),
|
||||||
.out ({use_valid , use_address, mem_byteen , use_req_rw, use_req_addr, use_req_offset, use_req_byteen, use_req_data, use_rd , use_warp_num , use_wb , use_pc})
|
.out ({use_valid, use_warp_num, use_thread_mask, use_issue_tag, use_address, use_req_fullbyteen, use_req_rw, use_req_addr, use_req_offset, use_req_byteen, use_req_data, use_rd, use_wb, use_pc})
|
||||||
);
|
);
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0] mem_rsp_mask[`DCREQ_SIZE-1:0];
|
reg [`NUM_THREADS-1:0] mem_rsp_mask_buf [`ISSUEQ_SIZE-1:0];
|
||||||
|
reg [`NUM_THREADS-1:0][1:0] mem_rsp_offset_buf [`ISSUEQ_SIZE-1:0];
|
||||||
|
reg [`BYTEEN_BITS-1:0] mem_rsp_fullbyteen_buf [`ISSUEQ_SIZE-1:0];
|
||||||
|
reg [`NUM_THREADS-1:0][31:0] mem_rsp_data_all_buf [`ISSUEQ_SIZE-1:0];
|
||||||
|
reg [`NW_BITS-1:0] mem_rsp_warp_num_buf [`ISSUEQ_SIZE-1:0];
|
||||||
|
reg [31:0] mem_rsp_curr_PC_buf [`ISSUEQ_SIZE-1:0];
|
||||||
|
reg [`NR_BITS-1:0] mem_rsp_rd_buf [`ISSUEQ_SIZE-1:0];
|
||||||
|
|
||||||
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr;
|
reg [`NUM_THREADS-1:0][31:0] mem_rsp_data;
|
||||||
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset;
|
|
||||||
wire [`BYTEEN_BITS-1:0] core_rsp_mem_read;
|
|
||||||
|
|
||||||
wire mrq_push = (| dcache_req_if.valid) && dcache_req_if.ready
|
wire [`ISTAG_BITS-1:0] rsp_issue_tag = dcache_rsp_if.tag[0][`ISTAG_BITS-1:0];
|
||||||
&& (0 == use_req_rw); // only push read requests
|
|
||||||
|
|
||||||
wire mrq_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
wire [`NUM_THREADS-1:0] mem_rsp_mask = mem_rsp_mask_buf [rsp_issue_tag];
|
||||||
|
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset = mem_rsp_offset_buf [rsp_issue_tag];
|
||||||
|
wire [`BYTEEN_BITS-1:0] mem_rsp_fullbyteen = mem_rsp_fullbyteen_buf [rsp_issue_tag];
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] mem_rsp_data_all = mem_rsp_data_all_buf [rsp_issue_tag];
|
||||||
|
wire [`NW_BITS-1:0] mem_rsp_warp_num = mem_rsp_warp_num_buf [rsp_issue_tag];
|
||||||
|
wire [31:0] mem_rsp_curr_PC = mem_rsp_curr_PC_buf [rsp_issue_tag];
|
||||||
|
wire [`NR_BITS-1:0] mem_rsp_rd = mem_rsp_rd_buf [rsp_issue_tag];
|
||||||
|
|
||||||
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_read_addr = dcache_rsp_if.tag[0][`LOG2UP(`DCREQ_SIZE)-1:0];
|
wire [`NUM_THREADS-1:0] mem_rsp_mask_n = mem_rsp_mask & ~dcache_rsp_if.valid;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] mem_rsp_mask_upd = mem_rsp_mask[mrq_read_addr] & ~dcache_rsp_if.valid;
|
wire dcache_req_fire = (| dcache_req_if.valid) && dcache_req_if.ready;
|
||||||
|
wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
||||||
wire mrq_pop = mrq_pop_part && (0 == mem_rsp_mask_upd);
|
|
||||||
|
|
||||||
VX_index_queue #(
|
|
||||||
.DATAW (32 + 1 + (`NUM_THREADS * 2) + `BYTEEN_BITS + `NR_BITS + `NW_BITS),
|
|
||||||
.SIZE (`DCREQ_SIZE)
|
|
||||||
) mem_req_queue (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.write_data ({use_pc, use_wb, use_req_offset, mem_byteen, use_rd, use_warp_num}),
|
|
||||||
.write_addr (mrq_write_addr),
|
|
||||||
.push (mrq_push),
|
|
||||||
.full (mrq_full),
|
|
||||||
.pop (mrq_pop),
|
|
||||||
.read_addr (mrq_read_addr),
|
|
||||||
.read_data ({lsu_commit_if.curr_PC, lsu_commit_if.wb, mem_rsp_offset, core_rsp_mem_read, lsu_commit_if.rd, lsu_commit_if.warp_num}),
|
|
||||||
`UNUSED_PIN (empty)
|
|
||||||
);
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (mrq_push) begin
|
if (dcache_req_fire && (0 == use_req_rw)) begin
|
||||||
mem_rsp_mask[mrq_write_addr] <= use_valid;
|
mem_rsp_mask_buf[use_issue_tag] <= use_thread_mask;
|
||||||
|
mem_rsp_offset_buf[use_issue_tag] <= use_req_offset;
|
||||||
|
mem_rsp_fullbyteen_buf[use_issue_tag] <= use_req_fullbyteen;
|
||||||
|
mem_rsp_data_all_buf[use_issue_tag] <= 0;
|
||||||
|
mem_rsp_warp_num_buf[use_issue_tag] <= use_warp_num;
|
||||||
|
mem_rsp_curr_PC_buf[use_issue_tag] <= use_pc;
|
||||||
|
mem_rsp_rd_buf[use_issue_tag] <= use_rd;
|
||||||
end
|
end
|
||||||
if (mrq_pop_part) begin
|
if (dcache_rsp_fire) begin
|
||||||
mem_rsp_mask[mrq_read_addr] <= mem_rsp_mask_upd;
|
mem_rsp_mask_buf[rsp_issue_tag] <= mem_rsp_mask_n;
|
||||||
|
mem_rsp_data_all_buf[rsp_issue_tag] <= mem_rsp_data_all | mem_rsp_data;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
// Core Request
|
// Core Request
|
||||||
assign dcache_req_if.valid = use_valid & {`NUM_THREADS{~mrq_full}};
|
assign dcache_req_if.valid = {`NUM_THREADS{use_valid && ~store_stalled}} & use_thread_mask;
|
||||||
assign dcache_req_if.rw = {`NUM_THREADS{use_req_rw}};
|
assign dcache_req_if.rw = {`NUM_THREADS{use_req_rw}};
|
||||||
assign dcache_req_if.byteen = use_req_byteen;
|
assign dcache_req_if.byteen = use_req_byteen;
|
||||||
assign dcache_req_if.addr = use_req_addr;
|
assign dcache_req_if.addr = use_req_addr;
|
||||||
assign dcache_req_if.data = use_req_data;
|
assign dcache_req_if.data = use_req_data;
|
||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
assign dcache_req_if.tag = {use_pc, use_wb, use_rd, use_warp_num, mrq_write_addr};
|
assign dcache_req_if.tag = {use_pc, use_wb, use_rd, use_warp_num, use_issue_tag};
|
||||||
`else
|
`else
|
||||||
assign dcache_req_if.tag = mrq_write_addr;
|
assign dcache_req_if.tag = use_issue_tag;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Core Response
|
// Core Response
|
||||||
reg [`NUM_THREADS-1:0][31:0] core_rsp_data;
|
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||||
wire [15:0] rsp_data_shifted = 16'(dcache_rsp_if.data[i] >> {mem_rsp_offset[i], 3'b0});
|
wire [15:0] rsp_data_shifted = 16'(dcache_rsp_if.data[i] >> {mem_rsp_offset[i], 3'b0});
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (core_rsp_mem_read)
|
case (mem_rsp_fullbyteen)
|
||||||
`BYTEEN_SB: core_rsp_data[i] = {{24{rsp_data_shifted[7]}}, rsp_data_shifted[7:0]};
|
`BYTEEN_SB: mem_rsp_data[i] = {{24{rsp_data_shifted[7]}}, rsp_data_shifted[7:0]};
|
||||||
`BYTEEN_UB: core_rsp_data[i] = 32'(rsp_data_shifted[7:0]);
|
`BYTEEN_UB: mem_rsp_data[i] = 32'(rsp_data_shifted[7:0]);
|
||||||
`BYTEEN_SH: core_rsp_data[i] = {{16{rsp_data_shifted[15]}}, rsp_data_shifted[15:0]};
|
`BYTEEN_SH: mem_rsp_data[i] = {{16{rsp_data_shifted[15]}}, rsp_data_shifted[15:0]};
|
||||||
`BYTEEN_UH: core_rsp_data[i] = 32'(rsp_data_shifted[15:0]);
|
`BYTEEN_UH: mem_rsp_data[i] = 32'(rsp_data_shifted[15:0]);
|
||||||
default: core_rsp_data[i] = dcache_rsp_if.data[i];
|
default: mem_rsp_data[i] = dcache_rsp_if.data[i];
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign lsu_commit_if.valid = dcache_rsp_if.valid;
|
wire is_store_rsp = dcache_req_fire && use_req_rw;
|
||||||
assign lsu_commit_if.data = core_rsp_data;
|
wire is_load_rsp = (| dcache_rsp_if.valid) && (0 == mem_rsp_mask_n);
|
||||||
|
|
||||||
|
assign store_stalled = use_req_rw && (~lsu_commit_if.ready
|
||||||
|
|| is_load_rsp); // arbitration prioritizes LOAD
|
||||||
|
|
||||||
|
assign lsu_commit_if.valid = is_load_rsp || is_store_rsp;
|
||||||
|
assign lsu_commit_if.issue_tag = is_load_rsp ? rsp_issue_tag : use_issue_tag;
|
||||||
|
assign lsu_commit_if.data = mem_rsp_data | mem_rsp_data_all;
|
||||||
|
|
||||||
// Can accept new cache response
|
// Can accept new cache response
|
||||||
assign dcache_rsp_if.ready = lsu_commit_if.ready;
|
assign dcache_rsp_if.ready = lsu_commit_if.ready;
|
||||||
|
|
||||||
|
// scope registration
|
||||||
`SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.valid);
|
`SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.valid);
|
||||||
`SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num);
|
|
||||||
`SCOPE_ASSIGN(scope_dcache_req_curr_PC, use_pc);
|
|
||||||
`SCOPE_ASSIGN(scope_dcache_req_addr, use_address);
|
`SCOPE_ASSIGN(scope_dcache_req_addr, use_address);
|
||||||
`SCOPE_ASSIGN(scope_dcache_req_rw, core_req_rw);
|
`SCOPE_ASSIGN(scope_dcache_req_rw, dcache_req_if.rw );
|
||||||
`SCOPE_ASSIGN(scope_dcache_req_byteen,dcache_req_if.byteen);
|
`SCOPE_ASSIGN(scope_dcache_req_byteen,dcache_req_if.byteen);
|
||||||
`SCOPE_ASSIGN(scope_dcache_req_data, dcache_req_if.data);
|
`SCOPE_ASSIGN(scope_dcache_req_data, dcache_req_if.data);
|
||||||
`SCOPE_ASSIGN(scope_dcache_req_tag, dcache_req_if.tag);
|
`SCOPE_ASSIGN(scope_dcache_req_tag, dcache_req_if.tag);
|
||||||
`SCOPE_ASSIGN(scope_dcache_req_ready, dcache_req_if.ready);
|
`SCOPE_ASSIGN(scope_dcache_req_ready, dcache_req_if.ready);
|
||||||
|
`SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num);
|
||||||
|
`SCOPE_ASSIGN(scope_dcache_req_curr_PC, use_pc);
|
||||||
|
|
||||||
`SCOPE_ASSIGN(scope_dcache_rsp_valid, dcache_rsp_if.valid);
|
`SCOPE_ASSIGN(scope_dcache_rsp_valid, dcache_rsp_if.valid);
|
||||||
`SCOPE_ASSIGN(scope_dcache_rsp_data, dcache_rsp_if.data);
|
`SCOPE_ASSIGN(scope_dcache_rsp_data, dcache_rsp_if.data);
|
||||||
`SCOPE_ASSIGN(scope_dcache_rsp_tag, dcache_rsp_if.tag);
|
`SCOPE_ASSIGN(scope_dcache_rsp_tag, dcache_rsp_if.tag);
|
||||||
`SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_if.ready);
|
`SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_if.ready);
|
||||||
|
|
||||||
|
`UNUSED_VAR (mem_rsp_warp_num)
|
||||||
|
`UNUSED_VAR (mem_rsp_curr_PC)
|
||||||
|
`UNUSED_VAR (mem_rsp_rd)
|
||||||
|
`UNUSED_VAR (use_wb)
|
||||||
|
|
||||||
`ifdef DBG_PRINT_CORE_DCACHE
|
`ifdef DBG_PRINT_CORE_DCACHE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if ((| dcache_req_if.valid) && dcache_req_if.ready) begin
|
if ((| dcache_req_if.valid) && dcache_req_if.ready) begin
|
||||||
$display("%t: D$%0d req: valid=%b, warp=%0d, PC=%0h, addr=%0h, tag=%0h, rw=%0b, rd=%0d, byteen=%0h, data=%0h",
|
$display("%t: D$%0d req: valid=%b, warp=%0d, PC=%0h, addr=%0h, tag=%0h, rd=%0d, rw=%0b, byteen=%0h, data=%0h",
|
||||||
$time, CORE_ID, use_valid, use_warp_num, use_pc, use_address, mrq_write_addr, use_req_rw, use_rd, use_req_byteen, use_req_data);
|
$time, CORE_ID, dcache_req_if.valid, use_warp_num, use_pc, use_address, dcache_req_if.tag, use_rd, dcache_req_if.rw, dcache_req_if.byteen, dcache_req_if.data);
|
||||||
end
|
end
|
||||||
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
|
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
|
||||||
$display("%t: D$%0d rsp: valid=%b, warp=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
|
$display("%t: D$%0d rsp: valid=%b, warp=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
|
||||||
$time, CORE_ID, lsu_commit_if.valid, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, mrq_read_addr, lsu_commit_if.rd, lsu_commit_if.data);
|
$time, CORE_ID, dcache_rsp_if.valid, mem_rsp_warp_num, mem_rsp_curr_PC, dcache_rsp_if.tag, mem_rsp_rd, dcache_rsp_if.data);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
|
@ -245,8 +245,8 @@ module VX_mem_unit # (
|
||||||
.SNOOP_FORWARDING (0),
|
.SNOOP_FORWARDING (0),
|
||||||
.DRAM_ENABLE (1),
|
.DRAM_ENABLE (1),
|
||||||
.WRITE_ENABLE (0),
|
.WRITE_ENABLE (0),
|
||||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
|
||||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
|
||||||
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
|
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
|
||||||
) icache (
|
) icache (
|
||||||
`SCOPE_SIGNALS_CACHE_UNBIND
|
`SCOPE_SIGNALS_CACHE_UNBIND
|
||||||
|
|
|
@ -7,19 +7,26 @@ module VX_mul_unit #(
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
|
||||||
// Inputs
|
// Inputs
|
||||||
VX_mul_req_if mul_req_if,
|
VX_mul_req_if alu_req_if,
|
||||||
|
|
||||||
// Outputs
|
// Outputs
|
||||||
VX_commit_if mul_commit_if
|
VX_commit_if alu_commit_if
|
||||||
);
|
);
|
||||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
|
||||||
wire [`NUM_THREADS-1:0][63:0] mul_result;
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] div_result;
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rem_result;
|
|
||||||
|
|
||||||
wire [`MUL_BITS-1:0] alu_op = mul_req_if.mul_op;
|
wire [`MUL_BITS-1:0] alu_op = alu_req_if.mul_op;
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = mul_req_if.rs1_data;
|
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = mul_req_if.rs2_data;
|
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
||||||
|
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] mul_result, div_result;
|
||||||
|
|
||||||
|
wire stall_mul, stall_div;
|
||||||
|
|
||||||
|
wire is_mul_op = (alu_op == `MUL_MUL);
|
||||||
|
wire is_div_op = (alu_op == `MUL_DIV || alu_op == `MUL_DIVU);
|
||||||
|
|
||||||
|
reg [`NUM_THREADS-1:0] is_div_op_in;
|
||||||
|
wire [`NUM_THREADS-1:0] is_div_op_out;
|
||||||
|
wire is_mul_op_out;
|
||||||
|
|
||||||
genvar i;
|
genvar i;
|
||||||
|
|
||||||
|
@ -28,10 +35,30 @@ module VX_mul_unit #(
|
||||||
wire [32:0] mul_in1 = {(alu_op != `MUL_MULHU) & alu_in1[i][31], alu_in1[i]};
|
wire [32:0] mul_in1 = {(alu_op != `MUL_MULHU) & alu_in1[i][31], alu_in1[i]};
|
||||||
wire [32:0] mul_in2 = {(alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU) & alu_in2[i][31], alu_in2[i]};
|
wire [32:0] mul_in2 = {(alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU) & alu_in2[i][31], alu_in2[i]};
|
||||||
|
|
||||||
wire [32:0] div_in1 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in1[i][31], alu_in1[i]};
|
reg [32:0] div_in1, div_in2;
|
||||||
wire [32:0] div_in2 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in2[i][31], alu_in2[i]};
|
|
||||||
|
|
||||||
VX_mult #(
|
// handle divide by zero
|
||||||
|
always @(*) begin
|
||||||
|
is_div_op_in[i] = is_div_op;
|
||||||
|
div_in1 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in1[i][31], alu_in1[i]};
|
||||||
|
div_in2 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in2[i][31], alu_in2[i]};
|
||||||
|
|
||||||
|
if (0 == alu_in2[i]) begin
|
||||||
|
if (is_div_op) begin
|
||||||
|
div_in1 = {1'b0, 32'hFFFFFFFF}; // quotient = (0xFFFFFFFF / 1)
|
||||||
|
div_in2 = 1;
|
||||||
|
end else begin
|
||||||
|
is_div_op_in[i] = 1; // remainder = (in1 / 1)
|
||||||
|
div_in2 = 1;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
wire [63:0] mul_result_tmp;
|
||||||
|
wire [31:0] div_result_tmp;
|
||||||
|
wire [31:0] rem_result_tmp;
|
||||||
|
|
||||||
|
VX_multiplier #(
|
||||||
.WIDTHA(33),
|
.WIDTHA(33),
|
||||||
.WIDTHB(33),
|
.WIDTHB(33),
|
||||||
.WIDTHP(64),
|
.WIDTHP(64),
|
||||||
|
@ -40,9 +67,10 @@ module VX_mul_unit #(
|
||||||
) multiplier (
|
) multiplier (
|
||||||
.clk(clk),
|
.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
|
.clk_en(~stall_mul),
|
||||||
.dataa(mul_in1),
|
.dataa(mul_in1),
|
||||||
.datab(mul_in2),
|
.datab(mul_in2),
|
||||||
.result(mul_result[i])
|
.result(mul_result_tmp)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_divide #(
|
VX_divide #(
|
||||||
|
@ -53,75 +81,58 @@ module VX_mul_unit #(
|
||||||
.NSIGNED(1),
|
.NSIGNED(1),
|
||||||
.DSIGNED(1),
|
.DSIGNED(1),
|
||||||
.PIPELINE(`LATENCY_IDIV)
|
.PIPELINE(`LATENCY_IDIV)
|
||||||
) sdiv (
|
) divide (
|
||||||
.clk(clk),
|
.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
|
.clk_en(~stall_div),
|
||||||
.numer(div_in1),
|
.numer(div_in1),
|
||||||
.denom(div_in2),
|
.denom(div_in2),
|
||||||
.quotient(div_result[i]),
|
.quotient(div_result_tmp),
|
||||||
.remainder(rem_result[i])
|
.remainder(rem_result_tmp)
|
||||||
);
|
);
|
||||||
|
|
||||||
always @(*) begin
|
assign mul_result[i] = is_mul_op_out ? mul_result_tmp[31:0] : mul_result_tmp[63:32];
|
||||||
case (alu_op)
|
assign div_result[i] = is_div_op_out[i] ? div_result_tmp : rem_result_tmp;
|
||||||
`MUL_MUL: alu_result[i] = mul_result[i][31:0];
|
|
||||||
`MUL_MULH,
|
|
||||||
`MUL_MULHSU,
|
|
||||||
`MUL_MULHU: alu_result[i] = mul_result[i][63:32];
|
|
||||||
`MUL_DIV,
|
|
||||||
`MUL_DIVU: alu_result[i] = (alu_in2[i] == 0) ? 32'hffffffff : div_result[i];
|
|
||||||
`MUL_REM,
|
|
||||||
`MUL_REMU: alu_result[i] = (alu_in2[i] == 0) ? alu_in1[i] : rem_result[i];
|
|
||||||
default: alu_result[i] = alu_in1[i] + alu_in2[i]; // ADD, LUI, AUIPC, FENCE
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
wire stall;
|
wire mul_valid_out;
|
||||||
|
wire div_valid_out;
|
||||||
|
|
||||||
reg result_avail;
|
wire [`ISTAG_BITS-1:0] mul_issue_tag;
|
||||||
reg [4:0] pending_ctr;
|
wire [`ISTAG_BITS-1:0] div_issue_tag;
|
||||||
wire [4:0] instr_delay = `IS_DIV_OP(alu_op) ? `LATENCY_IDIV : `LATENCY_IMUL;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
VX_shift_register #(
|
||||||
if (reset) begin
|
.DATAW(1 + `ISTAG_BITS + 1),
|
||||||
result_avail <= 0;
|
.DEPTH(`LATENCY_IMUL)
|
||||||
pending_ctr <= 0;
|
) mul_delay (
|
||||||
end else begin
|
.clk(clk),
|
||||||
if (result_avail && !stall) begin
|
.reset(reset),
|
||||||
result_avail <= 0;
|
.enable(~stall_mul),
|
||||||
pending_ctr <= 0;
|
.in({alu_req_if.valid && ~`IS_DIV_OP(alu_op), alu_req_if.issue_tag, is_mul_op}),
|
||||||
end
|
.out({mul_valid_out, mul_issue_tag, is_mul_op_out})
|
||||||
if ((| mul_req_if.valid) && (pending_ctr == 0)) begin
|
|
||||||
pending_ctr <= instr_delay - 1;
|
|
||||||
if (instr_delay == 1)
|
|
||||||
result_avail <= 1;
|
|
||||||
end else if (pending_ctr != 0) begin
|
|
||||||
pending_ctr <= pending_ctr - 1;
|
|
||||||
if (pending_ctr == 1)
|
|
||||||
result_avail <= 1;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
wire pipeline_stall = ~result_avail && (| mul_req_if.valid);
|
|
||||||
|
|
||||||
assign stall = (~mul_commit_if.ready && (| mul_commit_if.valid))
|
|
||||||
|| pipeline_stall;
|
|
||||||
|
|
||||||
wire flush = mul_commit_if.ready && pipeline_stall;
|
|
||||||
|
|
||||||
VX_generic_register #(
|
|
||||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
|
||||||
) mul_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.stall (stall),
|
|
||||||
.flush (flush),
|
|
||||||
.in ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.rd, mul_req_if.wb, alu_result}),
|
|
||||||
.out ({mul_commit_if.valid, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
|
|
||||||
);
|
);
|
||||||
|
|
||||||
assign mul_req_if.ready = ~stall;
|
VX_shift_register #(
|
||||||
|
.DATAW(1 + `ISTAG_BITS + `NUM_THREADS),
|
||||||
|
.DEPTH(`LATENCY_IDIV)
|
||||||
|
) div_delay (
|
||||||
|
.clk(clk),
|
||||||
|
.reset(reset),
|
||||||
|
.enable(~stall_div),
|
||||||
|
.in({alu_req_if.valid && `IS_DIV_OP(alu_op), alu_req_if.issue_tag, is_div_op_in}),
|
||||||
|
.out({div_valid_out, div_issue_tag, is_div_op_out})
|
||||||
|
);
|
||||||
|
|
||||||
|
wire stall_out = (~alu_commit_if.ready && alu_commit_if.valid);
|
||||||
|
assign stall_mul = stall_out;
|
||||||
|
assign stall_div = stall_out
|
||||||
|
|| (mul_valid_out && div_valid_out); // arbitration prioritizes MUL
|
||||||
|
|
||||||
|
// can accept new request?
|
||||||
|
assign alu_req_if.ready = ~(stall_mul || stall_div);
|
||||||
|
|
||||||
|
assign alu_commit_if.valid = mul_valid_out || div_valid_out;
|
||||||
|
assign alu_commit_if.issue_tag = mul_valid_out ? mul_issue_tag : div_issue_tag;
|
||||||
|
assign alu_commit_if.data = mul_valid_out ? mul_result : div_result;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
|
@ -113,13 +113,14 @@ module VX_pipeline #(
|
||||||
VX_fpu_req_if fpu_req_if();
|
VX_fpu_req_if fpu_req_if();
|
||||||
VX_gpu_req_if gpu_req_if();
|
VX_gpu_req_if gpu_req_if();
|
||||||
VX_wb_if writeback_if();
|
VX_wb_if writeback_if();
|
||||||
|
VX_commit_is_if commit_is_if();
|
||||||
VX_wstall_if wstall_if();
|
VX_wstall_if wstall_if();
|
||||||
VX_join_if join_if();
|
VX_join_if join_if();
|
||||||
VX_commit_if alu_commit_if();
|
VX_commit_if alu_commit_if();
|
||||||
VX_commit_if lsu_commit_if();
|
VX_commit_if lsu_commit_if();
|
||||||
VX_commit_if csr_commit_if();
|
VX_commit_if csr_commit_if();
|
||||||
VX_commit_if mul_commit_if();
|
VX_commit_if mul_commit_if();
|
||||||
VX_commit_fp_if fpu_commit_if();
|
VX_commit_if fpu_commit_if();
|
||||||
VX_commit_if gpu_commit_if();
|
VX_commit_if gpu_commit_if();
|
||||||
|
|
||||||
VX_fetch #(
|
VX_fetch #(
|
||||||
|
@ -156,6 +157,7 @@ module VX_pipeline #(
|
||||||
|
|
||||||
.decode_if (decode_if),
|
.decode_if (decode_if),
|
||||||
.writeback_if (writeback_if),
|
.writeback_if (writeback_if),
|
||||||
|
.commit_is_if (commit_is_if),
|
||||||
|
|
||||||
.alu_req_if (alu_req_if),
|
.alu_req_if (alu_req_if),
|
||||||
.lsu_req_if (lsu_req_if),
|
.lsu_req_if (lsu_req_if),
|
||||||
|
@ -212,6 +214,7 @@ module VX_pipeline #(
|
||||||
.fpu_commit_if (fpu_commit_if),
|
.fpu_commit_if (fpu_commit_if),
|
||||||
.gpu_commit_if (gpu_commit_if),
|
.gpu_commit_if (gpu_commit_if),
|
||||||
|
|
||||||
|
.commit_is_if (commit_is_if),
|
||||||
.writeback_if (writeback_if),
|
.writeback_if (writeback_if),
|
||||||
.perf_cntrs_if (perf_cntrs_if)
|
.perf_cntrs_if (perf_cntrs_if)
|
||||||
);
|
);
|
||||||
|
|
|
@ -8,6 +8,7 @@ module VX_scheduler #(
|
||||||
|
|
||||||
VX_decode_if decode_if,
|
VX_decode_if decode_if,
|
||||||
VX_wb_if writeback_if,
|
VX_wb_if writeback_if,
|
||||||
|
VX_commit_is_if commit_is_if,
|
||||||
input wire gpr_busy,
|
input wire gpr_busy,
|
||||||
input wire alu_busy,
|
input wire alu_busy,
|
||||||
input wire lsu_busy,
|
input wire lsu_busy,
|
||||||
|
@ -15,29 +16,44 @@ module VX_scheduler #(
|
||||||
input wire mul_busy,
|
input wire mul_busy,
|
||||||
input wire fpu_busy,
|
input wire fpu_busy,
|
||||||
input wire gpu_busy,
|
input wire gpu_busy,
|
||||||
|
output wire [`ISTAG_BITS-1:0] issue_tag,
|
||||||
output wire schedule_delay,
|
output wire schedule_delay,
|
||||||
output wire is_empty
|
output wire is_empty
|
||||||
);
|
);
|
||||||
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
|
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0] rename_table [`NUM_WARPS-1:0][(`NUM_REGS*2)-1:0];
|
`ifdef EXT_F_ENABLE
|
||||||
reg busy_table [`NUM_WARPS-1:0][(`NUM_REGS*2)-1:0];
|
localparam NREGS = (`NUM_REGS * 2);
|
||||||
|
reg inuse_table [`NUM_WARPS-1:0][NREGS-1:0];
|
||||||
|
wire [`NR_BITS:0] read_rs1 = {decode_if.rs1_is_fp, decode_if.rs1};
|
||||||
|
wire [`NR_BITS:0] read_rs2 = {decode_if.rs2_is_fp, decode_if.rs2};
|
||||||
|
wire [`NR_BITS:0] read_rs3 = {1'b1, decode_if.rs3};
|
||||||
|
wire [`NR_BITS:0] read_rd = {decode_if.rd_is_fp, decode_if.rd};
|
||||||
|
wire [`NR_BITS:0] write_rd = {writeback_if.rd_is_fp, writeback_if.rd};
|
||||||
|
wire rs3_inuse = inuse_table[decode_if.warp_num][read_rs3];
|
||||||
|
`else
|
||||||
|
localparam NREGS = `NUM_REGS;
|
||||||
|
reg inuse_table [`NUM_WARPS-1:0][NREGS-1:0];
|
||||||
|
wire [`NR_BITS-1:0] read_rs1 = decode_if.rs1;
|
||||||
|
wire [`NR_BITS-1:0] read_rs2 = decode_if.rs2;
|
||||||
|
wire [`NR_BITS-1:0] read_rd = decode_if.rd;
|
||||||
|
wire [`NR_BITS-1:0] write_rd = writeback_if.rd;
|
||||||
|
wire rs3_inuse = 0;
|
||||||
|
`endif
|
||||||
|
|
||||||
|
reg [`NUM_THREADS-1:0] inuse_registers [`NUM_WARPS-1:0][NREGS-1:0];
|
||||||
reg [CTVW-1:0] count_valid;
|
reg [CTVW-1:0] count_valid;
|
||||||
|
|
||||||
reg [`NR_BITS:0] read_rd = {decode_if.rd_is_fp, decode_if.rd};
|
wire rs1_inuse = inuse_table[decode_if.warp_num][read_rs1];
|
||||||
reg [`NR_BITS:0] write_rd = {writeback_if.rd_is_fp, writeback_if.rd};
|
wire rs2_inuse = inuse_table[decode_if.warp_num][read_rs2];
|
||||||
|
wire rd_inuse = inuse_table[decode_if.warp_num][read_rd];
|
||||||
|
|
||||||
wire rs1_rename = busy_table[decode_if.warp_num][{decode_if.rs1_is_fp, decode_if.rs1}];
|
wire rs1_inuse_qual = rs1_inuse && decode_if.use_rs1;
|
||||||
wire rs2_rename = busy_table[decode_if.warp_num][{decode_if.rs1_is_fp, decode_if.rs2}];
|
wire rs2_inuse_qual = rs2_inuse && decode_if.use_rs2;
|
||||||
wire rs3_rename = busy_table[decode_if.warp_num][{1'b1, decode_if.rs3}];
|
wire rs3_inuse_qual = rs3_inuse && decode_if.use_rs3;
|
||||||
wire rd_rename = busy_table[decode_if.warp_num][read_rd];
|
wire rd_inuse_qual = rd_inuse && decode_if.wb;
|
||||||
|
|
||||||
wire rs1_rename_qual = rs1_rename && decode_if.use_rs1;
|
wire rename_valid = (rs1_inuse_qual || rs2_inuse_qual || rs3_inuse_qual || rd_inuse_qual);
|
||||||
wire rs2_rename_qual = rs2_rename && decode_if.use_rs2;
|
|
||||||
wire rs3_rename_qual = rs3_rename && decode_if.use_rs3;
|
|
||||||
wire rd_rename_qual = rd_rename && decode_if.wb;
|
|
||||||
|
|
||||||
wire rename_valid = (rs1_rename_qual || rs2_rename_qual || rs3_rename_qual || rd_rename_qual);
|
|
||||||
|
|
||||||
wire ex_stalled = ((gpr_busy)
|
wire ex_stalled = ((gpr_busy)
|
||||||
|| (alu_busy && (decode_if.ex_type == `EX_ALU))
|
|| (alu_busy && (decode_if.ex_type == `EX_ALU))
|
||||||
|
@ -47,41 +63,61 @@ module VX_scheduler #(
|
||||||
|| (fpu_busy && (decode_if.ex_type == `EX_FPU))
|
|| (fpu_busy && (decode_if.ex_type == `EX_FPU))
|
||||||
|| (gpu_busy && (decode_if.ex_type == `EX_GPU)));
|
|| (gpu_busy && (decode_if.ex_type == `EX_GPU)));
|
||||||
|
|
||||||
wire stall = (ex_stalled || rename_valid) && (| decode_if.valid);
|
wire iq_full;
|
||||||
|
|
||||||
wire acquire_rd = (| decode_if.valid) && (decode_if.wb != 0) && ~stall;
|
wire stall = (ex_stalled || rename_valid || iq_full) && decode_if.valid;
|
||||||
|
|
||||||
wire release_rd = (| writeback_if.valid);
|
wire acquire_rd = decode_if.valid && (decode_if.wb != 0) && ~stall;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][write_rd] & ~writeback_if.valid;
|
wire release_rd = writeback_if.valid;
|
||||||
|
|
||||||
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
|
wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[writeback_if.warp_num][write_rd] & ~writeback_if.thread_mask;
|
||||||
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
|
|
||||||
|
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == inuse_registers_n))) ? (count_valid + 1) :
|
||||||
|
(~acquire_rd && (release_rd && (0 == inuse_registers_n))) ? (count_valid - 1) :
|
||||||
count_valid;
|
count_valid;
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
integer i, w;
|
integer i, w;
|
||||||
for (w = 0; w < `NUM_WARPS; w++) begin
|
for (w = 0; w < `NUM_WARPS; w++) begin
|
||||||
for (i = 0; i < 32; i++) begin
|
for (i = 0; i < NREGS; i++) begin
|
||||||
rename_table[w][i] <= 0;
|
inuse_registers[w][i] <= 0;
|
||||||
busy_table[w][i] <= 0;
|
inuse_table[w][i] <= 0;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
count_valid <= 0;
|
count_valid <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
if (acquire_rd) begin
|
if (acquire_rd) begin
|
||||||
rename_table[decode_if.warp_num][read_rd] <= decode_if.valid;
|
inuse_registers[decode_if.warp_num][read_rd] <= decode_if.thread_mask;
|
||||||
busy_table[decode_if.warp_num][read_rd] <= 1;
|
inuse_table[decode_if.warp_num][read_rd] <= 1;
|
||||||
end
|
end
|
||||||
if (release_rd) begin
|
if (release_rd) begin
|
||||||
assert(rename_table[writeback_if.warp_num][write_rd] != 0);
|
assert(inuse_table[writeback_if.warp_num][write_rd] != 0);
|
||||||
rename_table[writeback_if.warp_num][write_rd] <= valid_wb_new_mask;
|
inuse_registers[writeback_if.warp_num][write_rd] <= inuse_registers_n;
|
||||||
busy_table[writeback_if.warp_num][write_rd] <= (| valid_wb_new_mask);
|
inuse_table[writeback_if.warp_num][write_rd] <= (| inuse_registers_n);
|
||||||
end
|
end
|
||||||
count_valid <= count_valid_next;
|
count_valid <= count_valid_next;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
wire ib_acquire = decode_if.valid && ~stall;
|
||||||
|
|
||||||
|
VX_cam_buffer #(
|
||||||
|
.DATAW ($bits(is_data_t)),
|
||||||
|
.SIZE (`ISSUEQ_SIZE),
|
||||||
|
.RPORTS (`NUM_EXS)
|
||||||
|
) issue_buffer (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.write_data ({decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.rd_is_fp, decode_if.wb}),
|
||||||
|
.write_addr (issue_tag),
|
||||||
|
.acquire_slot (ib_acquire),
|
||||||
|
.release_slot ({commit_is_if.alu_valid, commit_is_if.lsu_valid, commit_is_if.csr_valid, commit_is_if.mul_valid, commit_is_if.fpu_valid, commit_is_if.gpu_valid}),
|
||||||
|
.read_addr ({commit_is_if.alu_tag, commit_is_if.lsu_tag, commit_is_if.csr_tag, commit_is_if.mul_tag, commit_is_if.fpu_tag, commit_is_if.gpu_tag}),
|
||||||
|
.read_data ({commit_is_if.alu_data, commit_is_if.lsu_data, commit_is_if.csr_data, commit_is_if.mul_data, commit_is_if.fpu_data, commit_is_if.gpu_data}),
|
||||||
|
.full (iq_full)
|
||||||
|
);
|
||||||
|
|
||||||
assign decode_if.ready = ~stall;
|
assign decode_if.ready = ~stall;
|
||||||
|
|
||||||
assign schedule_delay = stall;
|
assign schedule_delay = stall;
|
||||||
|
@ -91,7 +127,7 @@ module VX_scheduler #(
|
||||||
`ifdef DBG_PRINT_PIPELINE
|
`ifdef DBG_PRINT_PIPELINE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (stall) begin
|
if (stall) begin
|
||||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, rename=%b%b%b, alu=%b, lsu=%b, csr=%b, mul=%b, fpu=%b, gpu=%b", $time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, rd_rename_qual, rs1_rename_qual, rs2_rename_qual, alu_busy, lsu_busy, csr_busy, mul_busy, fpu_busy, gpu_busy);
|
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, iq_full=%b, inuse=%b%b%b%b, alu=%b, lsu=%b, csr=%b, mul=%b, fpu=%b, gpu=%b", $time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, iq_full, rd_inuse_qual, rs1_inuse_qual, rs2_inuse_qual, rs3_inuse_qual, alu_busy, lsu_busy, csr_busy, mul_busy, fpu_busy, gpu_busy);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
|
@ -97,9 +97,9 @@ module VX_warp_sched #(
|
||||||
end else begin
|
end else begin
|
||||||
|
|
||||||
if (warp_ctl_if.wspawn) begin
|
if (warp_ctl_if.wspawn) begin
|
||||||
warp_active <= warp_ctl_if.wspawn_new_active;
|
warp_active <= warp_ctl_if.wspawn_wmask;
|
||||||
|
use_wspawn <= warp_ctl_if.wspawn_wmask & (~`NUM_WARPS'(1));
|
||||||
use_wspawn_pc <= warp_ctl_if.wspawn_pc;
|
use_wspawn_pc <= warp_ctl_if.wspawn_pc;
|
||||||
use_wspawn <= warp_ctl_if.wspawn_new_active & (~`NUM_WARPS'(1));
|
|
||||||
end
|
end
|
||||||
|
|
||||||
if (warp_ctl_if.is_barrier) begin
|
if (warp_ctl_if.is_barrier) begin
|
||||||
|
@ -112,6 +112,10 @@ module VX_warp_sched #(
|
||||||
end else if (warp_ctl_if.change_mask) begin
|
end else if (warp_ctl_if.change_mask) begin
|
||||||
thread_masks[warp_ctl_if.warp_num] <= warp_ctl_if.thread_mask;
|
thread_masks[warp_ctl_if.warp_num] <= warp_ctl_if.thread_mask;
|
||||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
||||||
|
if (0 == warp_ctl_if.thread_mask) begin
|
||||||
|
warp_active[warp_ctl_if.warp_num] <= 0;
|
||||||
|
visible_active[warp_ctl_if.warp_num] <= 0;
|
||||||
|
end
|
||||||
end else if (join_if.is_join && !didnt_split) begin
|
end else if (join_if.is_join && !didnt_split) begin
|
||||||
if (!join_fall) begin
|
if (!join_fall) begin
|
||||||
warp_pcs[join_if.warp_num] <= join_pc;
|
warp_pcs[join_if.warp_num] <= join_pc;
|
||||||
|
@ -128,11 +132,6 @@ module VX_warp_sched #(
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if (warp_ctl_if.whalt) begin
|
|
||||||
warp_active[warp_ctl_if.warp_num] <= 0;
|
|
||||||
visible_active[warp_ctl_if.warp_num] <= 0;
|
|
||||||
end
|
|
||||||
|
|
||||||
if (update_use_wspawn) begin
|
if (update_use_wspawn) begin
|
||||||
use_wspawn[warp_to_schedule] <= 0;
|
use_wspawn[warp_to_schedule] <= 0;
|
||||||
thread_masks[warp_to_schedule] <= 1;
|
thread_masks[warp_to_schedule] <= 1;
|
||||||
|
@ -167,7 +166,7 @@ module VX_warp_sched #(
|
||||||
if (scheduled_warp && !stall) begin
|
if (scheduled_warp && !stall) begin
|
||||||
warp_lock[warp_num] <= 1;
|
warp_lock[warp_num] <= 1;
|
||||||
end
|
end
|
||||||
if ((| ifetch_rsp_if.valid) && ifetch_rsp_if.ready) begin
|
if (ifetch_rsp_if.valid && ifetch_rsp_if.ready) begin
|
||||||
warp_lock[ifetch_rsp_if.warp_num] <= 0;
|
warp_lock[ifetch_rsp_if.warp_num] <= 0;
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -192,7 +191,7 @@ module VX_warp_sched #(
|
||||||
|
|
||||||
assign b_mask = barrier_stall_mask[warp_ctl_if.barrier_id][`NUM_WARPS-1:0];
|
assign b_mask = barrier_stall_mask[warp_ctl_if.barrier_id][`NUM_WARPS-1:0];
|
||||||
|
|
||||||
assign reached_barrier_limit = (b_count == warp_ctl_if.num_warps);
|
assign reached_barrier_limit = (b_count == warp_ctl_if.barrier_num_warps);
|
||||||
|
|
||||||
assign wstall_this_cycle = wstall_if.wstall && (wstall_if.warp_num == warp_to_schedule); // Maybe bug
|
assign wstall_this_cycle = wstall_if.wstall && (wstall_if.warp_num == warp_to_schedule); // Maybe bug
|
||||||
|
|
||||||
|
@ -263,17 +262,17 @@ module VX_warp_sched #(
|
||||||
`UNUSED_PIN (grant_onehot)
|
`UNUSED_PIN (grant_onehot)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign stall = ~ifetch_req_if.ready && (| ifetch_req_if.valid);
|
assign stall = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(`NUM_THREADS + 32 + `NW_BITS)
|
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
|
||||||
) fetch_reg (
|
) fetch_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({thread_mask, warp_pc, warp_num}),
|
.in ({(| thread_mask), thread_mask, warp_pc, warp_num}),
|
||||||
.out ({ifetch_req_if.valid, ifetch_req_if.curr_PC, ifetch_req_if.warp_num})
|
.out ({ifetch_req_if.valid, ifetch_req_if.thread_mask, ifetch_req_if.curr_PC, ifetch_req_if.warp_num})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign busy = (warp_active != 0);
|
assign busy = (warp_active != 0);
|
||||||
|
|
|
@ -10,79 +10,87 @@ module VX_writeback #(
|
||||||
VX_commit_if alu_commit_if,
|
VX_commit_if alu_commit_if,
|
||||||
VX_commit_if lsu_commit_if,
|
VX_commit_if lsu_commit_if,
|
||||||
VX_commit_if mul_commit_if,
|
VX_commit_if mul_commit_if,
|
||||||
VX_commit_fp_if fpu_commit_if,
|
VX_commit_if fpu_commit_if,
|
||||||
VX_commit_if csr_commit_if,
|
VX_commit_if csr_commit_if,
|
||||||
|
VX_commit_is_if commit_is_if,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
VX_wb_if writeback_if
|
VX_wb_if writeback_if
|
||||||
);
|
);
|
||||||
|
|
||||||
wire alu_valid = (| alu_commit_if.valid) && alu_commit_if.wb;
|
wire alu_valid = alu_commit_if.valid && commit_is_if.alu_data.wb;
|
||||||
wire lsu_valid = (| lsu_commit_if.valid) && lsu_commit_if.wb;
|
wire lsu_valid = lsu_commit_if.valid && commit_is_if.lsu_data.wb;
|
||||||
wire csr_valid = (| csr_commit_if.valid) && csr_commit_if.wb;
|
wire csr_valid = csr_commit_if.valid && commit_is_if.csr_data.wb;
|
||||||
wire mul_valid = (| mul_commit_if.valid) && mul_commit_if.wb;
|
wire mul_valid = mul_commit_if.valid && commit_is_if.mul_data.wb;
|
||||||
wire fpu_valid = (| fpu_commit_if.valid) && fpu_commit_if.wb;
|
wire fpu_valid = fpu_commit_if.valid && commit_is_if.fpu_data.wb;
|
||||||
|
|
||||||
VX_wb_if writeback_tmp_if();
|
VX_wb_if writeback_tmp_if();
|
||||||
|
|
||||||
assign writeback_tmp_if.valid = lsu_valid ? lsu_commit_if.valid :
|
assign writeback_tmp_if.valid = alu_valid ? alu_commit_if.valid :
|
||||||
fpu_valid ? fpu_commit_if.valid :
|
lsu_valid ? lsu_commit_if.valid :
|
||||||
mul_valid ? mul_commit_if.valid :
|
|
||||||
alu_valid ? alu_commit_if.valid :
|
|
||||||
csr_valid ? csr_commit_if.valid :
|
csr_valid ? csr_commit_if.valid :
|
||||||
|
mul_valid ? mul_commit_if.valid :
|
||||||
|
fpu_valid ? fpu_commit_if.valid :
|
||||||
0;
|
0;
|
||||||
|
|
||||||
assign writeback_tmp_if.warp_num = lsu_valid ? lsu_commit_if.warp_num :
|
assign writeback_tmp_if.warp_num = alu_valid ? commit_is_if.alu_data.warp_num :
|
||||||
fpu_valid ? fpu_commit_if.warp_num :
|
lsu_valid ? commit_is_if.lsu_data.warp_num :
|
||||||
mul_valid ? mul_commit_if.warp_num :
|
csr_valid ? commit_is_if.csr_data.warp_num :
|
||||||
alu_valid ? alu_commit_if.warp_num :
|
mul_valid ? commit_is_if.mul_data.warp_num :
|
||||||
csr_valid ? csr_commit_if.warp_num :
|
fpu_valid ? commit_is_if.fpu_data.warp_num :
|
||||||
0;
|
0;
|
||||||
|
|
||||||
assign writeback_tmp_if.rd = lsu_valid ? lsu_commit_if.rd :
|
assign writeback_tmp_if.thread_mask = alu_valid ? commit_is_if.alu_data.thread_mask :
|
||||||
fpu_valid ? fpu_commit_if.rd :
|
lsu_valid ? commit_is_if.lsu_data.thread_mask :
|
||||||
mul_valid ? mul_commit_if.rd :
|
csr_valid ? commit_is_if.csr_data.thread_mask :
|
||||||
alu_valid ? alu_commit_if.rd :
|
mul_valid ? commit_is_if.mul_data.thread_mask :
|
||||||
csr_valid ? csr_commit_if.rd :
|
fpu_valid ? commit_is_if.fpu_data.thread_mask :
|
||||||
0;
|
0;
|
||||||
|
|
||||||
assign writeback_tmp_if.rd_is_fp = lsu_valid ? 0 :
|
assign writeback_tmp_if.rd = alu_valid ? commit_is_if.alu_data.rd :
|
||||||
fpu_valid ? fpu_commit_if.rd_is_fp :
|
lsu_valid ? commit_is_if.lsu_data.rd :
|
||||||
mul_valid ? 0 :
|
csr_valid ? commit_is_if.csr_data.rd :
|
||||||
alu_valid ? 0 :
|
mul_valid ? commit_is_if.mul_data.rd :
|
||||||
|
fpu_valid ? commit_is_if.fpu_data.rd :
|
||||||
|
0;
|
||||||
|
|
||||||
|
assign writeback_tmp_if.rd_is_fp = alu_valid ? 0 :
|
||||||
|
lsu_valid ? 0 :
|
||||||
csr_valid ? 0 :
|
csr_valid ? 0 :
|
||||||
|
mul_valid ? 0 :
|
||||||
|
fpu_valid ? commit_is_if.fpu_data.rd_is_fp :
|
||||||
0;
|
0;
|
||||||
|
|
||||||
assign writeback_tmp_if.data = lsu_valid ? lsu_commit_if.data :
|
assign writeback_tmp_if.data = alu_valid ? alu_commit_if.data :
|
||||||
fpu_valid ? fpu_commit_if.data :
|
lsu_valid ? lsu_commit_if.data :
|
||||||
mul_valid ? mul_commit_if.data :
|
|
||||||
alu_valid ? alu_commit_if.data :
|
|
||||||
csr_valid ? csr_commit_if.data :
|
csr_valid ? csr_commit_if.data :
|
||||||
|
mul_valid ? mul_commit_if.data :
|
||||||
|
fpu_valid ? fpu_commit_if.data :
|
||||||
0;
|
0;
|
||||||
|
|
||||||
wire stall = ~writeback_if.ready && (| writeback_if.valid);
|
wire stall = ~writeback_if.ready && writeback_if.valid;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(`NUM_THREADS + `NW_BITS + `NR_BITS + (`NUM_THREADS * 32) + 1)
|
.N(1 + `NW_BITS + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1)
|
||||||
) wb_reg (
|
) wb_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.rd_is_fp, writeback_tmp_if.data}),
|
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.thread_mask, writeback_tmp_if.rd, writeback_tmp_if.rd_is_fp, writeback_tmp_if.data}),
|
||||||
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.rd_is_fp, writeback_if.data})
|
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.thread_mask, writeback_if.rd, writeback_if.rd_is_fp, writeback_if.data})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign lsu_commit_if.ready = !stall;
|
assign alu_commit_if.ready = !stall;
|
||||||
assign fpu_commit_if.ready = !stall && !lsu_valid;
|
assign lsu_commit_if.ready = !stall && !alu_valid;
|
||||||
assign mul_commit_if.ready = !stall && !lsu_valid && !fpu_valid;
|
assign csr_commit_if.ready = !stall && !alu_valid && !lsu_valid;
|
||||||
assign alu_commit_if.ready = !stall && !lsu_valid && !fpu_valid && !mul_valid;
|
assign mul_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid;
|
||||||
assign csr_commit_if.ready = !stall && !lsu_valid && !fpu_valid && !mul_valid && !alu_valid;
|
assign fpu_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid && !mul_valid;
|
||||||
|
|
||||||
// special workaround to control RISC-V benchmarks termination on Verilator
|
// special workaround to control RISC-V benchmarks termination on Verilator
|
||||||
reg [31:0] last_data_wb /* verilator public */;
|
reg [31:0] last_data_wb /* verilator public */;
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if ((| writeback_tmp_if.valid) && ~stall && (writeback_tmp_if.rd == 28)) begin
|
if (writeback_tmp_if.valid && ~stall && (writeback_tmp_if.rd == 28)) begin
|
||||||
last_data_wb <= writeback_tmp_if.data[0];
|
last_data_wb <= writeback_tmp_if.data[0];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
6
hw/rtl/cache/VX_bank.v
vendored
6
hw/rtl/cache/VX_bank.v
vendored
|
@ -513,8 +513,8 @@ module VX_bank #(
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall_bank_pipe),
|
.stall (stall_bank_pipe),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({mrvq_recover_ready_state_st1e, is_mrvq_st1e_st2, mrvq_init_ready_state_st1e , snp_to_mrvq_st1e, is_snp_st1e, snp_invalidate_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1e, wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
|
.in ({mrvq_recover_ready_state_st1e, is_mrvq_st1e_st2, mrvq_init_ready_state_st1e, snp_to_mrvq_st1e, is_snp_st1e, snp_invalidate_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1], qual_valid_st1e_2, addr_st1e, wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
|
||||||
.out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 })
|
.out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2})
|
||||||
);
|
);
|
||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
|
@ -587,7 +587,7 @@ module VX_bank #(
|
||||||
// Broadcast
|
// Broadcast
|
||||||
.is_fill_st1 (is_fill_st1[STAGE_1_CYCLES-1]),
|
.is_fill_st1 (is_fill_st1[STAGE_1_CYCLES-1]),
|
||||||
.fill_addr_st1 (addr_st1e),
|
.fill_addr_st1 (addr_st1e),
|
||||||
.pending_hazard (mrvq_pending_hazard_st1e),
|
.pending_hazard_st1 (mrvq_pending_hazard_st1e),
|
||||||
|
|
||||||
// Dequeue
|
// Dequeue
|
||||||
.miss_resrv_pop (mrvq_pop),
|
.miss_resrv_pop (mrvq_pop),
|
||||||
|
|
2
hw/rtl/cache/VX_cache.v
vendored
2
hw/rtl/cache/VX_cache.v
vendored
|
@ -133,7 +133,7 @@ module VX_cache #(
|
||||||
wire debug_core_req_wb;
|
wire debug_core_req_wb;
|
||||||
wire[`NR_BITS-1:0] debug_core_req_rd;
|
wire[`NR_BITS-1:0] debug_core_req_rd;
|
||||||
wire[`NW_BITS-1:0] debug_core_req_warp_num;
|
wire[`NW_BITS-1:0] debug_core_req_warp_num;
|
||||||
wire[`LOG2UP(CREQ_SIZE)-1:0] debug_core_req_idx;
|
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_core_req_idx;
|
||||||
/* verilator lint_on UNUSED */
|
/* verilator lint_on UNUSED */
|
||||||
|
|
||||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||||
|
|
4
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
4
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
|
@ -41,7 +41,7 @@ module VX_cache_miss_resrv #(
|
||||||
input wire is_fill_st1,
|
input wire is_fill_st1,
|
||||||
input wire[`LINE_ADDR_WIDTH-1:0] fill_addr_st1,
|
input wire[`LINE_ADDR_WIDTH-1:0] fill_addr_st1,
|
||||||
|
|
||||||
output wire pending_hazard,
|
output wire pending_hazard_st1,
|
||||||
|
|
||||||
// Miss dequeue
|
// Miss dequeue
|
||||||
input wire miss_resrv_pop,
|
input wire miss_resrv_pop,
|
||||||
|
@ -84,7 +84,7 @@ module VX_cache_miss_resrv #(
|
||||||
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
|
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
|
||||||
end
|
end
|
||||||
|
|
||||||
assign pending_hazard = |(valid_address_match);
|
assign pending_hazard_st1 = |(valid_address_match);
|
||||||
|
|
||||||
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
|
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
|
||||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr;
|
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr;
|
||||||
|
|
23
hw/rtl/cache/VX_snp_forwarder.v
vendored
23
hw/rtl/cache/VX_snp_forwarder.v
vendored
|
@ -41,8 +41,8 @@ module VX_snp_forwarder #(
|
||||||
|
|
||||||
reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
|
reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
|
||||||
|
|
||||||
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr, dbg_sfq_write_addr;
|
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr;
|
||||||
wire sfq_push, sfq_pop, sfq_full;
|
wire sfq_acquire, sfq_release, sfq_full;
|
||||||
|
|
||||||
wire fwdin_valid;
|
wire fwdin_valid;
|
||||||
wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdin_tag;
|
wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdin_tag;
|
||||||
|
@ -56,27 +56,26 @@ module VX_snp_forwarder #(
|
||||||
|
|
||||||
assign sfq_read_addr = fwdin_tag;
|
assign sfq_read_addr = fwdin_tag;
|
||||||
|
|
||||||
assign sfq_push = snp_req_valid && !sfq_full && fwdout_ready;
|
assign sfq_acquire = snp_req_valid && !sfq_full && fwdout_ready;
|
||||||
assign sfq_pop = snp_rsp_valid;
|
assign sfq_release = snp_rsp_valid;
|
||||||
|
|
||||||
VX_index_queue #(
|
VX_cam_buffer #(
|
||||||
.DATAW (`LOG2UP(SNRQ_SIZE) + 1 +`DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH),
|
.DATAW (`DRAM_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH),
|
||||||
.SIZE (SNRQ_SIZE)
|
.SIZE (SNRQ_SIZE)
|
||||||
) snp_fwd_queue (
|
) snp_fwd_buffer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.write_data ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
|
.write_data ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
|
||||||
.write_addr (sfq_write_addr),
|
.write_addr (sfq_write_addr),
|
||||||
.push (sfq_push),
|
.acquire_slot (sfq_acquire),
|
||||||
.pop (sfq_pop),
|
.release_slot (sfq_release),
|
||||||
.full (sfq_full),
|
|
||||||
.read_addr (sfq_read_addr),
|
.read_addr (sfq_read_addr),
|
||||||
.read_data ({snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}),
|
.read_data ({snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}),
|
||||||
`UNUSED_PIN (empty)
|
.full (sfq_full)
|
||||||
);
|
);
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (sfq_push) begin
|
if (sfq_acquire) begin
|
||||||
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS;
|
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS;
|
||||||
end
|
end
|
||||||
if (fwdin_fire) begin
|
if (fwdin_fire) begin
|
||||||
|
|
34
hw/rtl/cache/VX_tag_data_access.v
vendored
34
hw/rtl/cache/VX_tag_data_access.v
vendored
|
@ -21,6 +21,7 @@ module VX_tag_data_access #(
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
|
||||||
input wire stall,
|
input wire stall,
|
||||||
input wire is_snp_st1e,
|
input wire is_snp_st1e,
|
||||||
input wire snp_invalidate_st1e,
|
input wire snp_invalidate_st1e,
|
||||||
|
@ -78,17 +79,17 @@ module VX_tag_data_access #(
|
||||||
wire tags_match;
|
wire tags_match;
|
||||||
|
|
||||||
wire real_writefill = valid_req_st1e && writefill_st1e
|
wire real_writefill = valid_req_st1e && writefill_st1e
|
||||||
&& ((!use_read_valid_st1e) || (use_read_valid_st1e && !tags_match));
|
&& ((~use_read_valid_st1e) || (use_read_valid_st1e && ~tags_match));
|
||||||
|
|
||||||
wire[`TAG_SELECT_BITS-1:0] writetag_st1e = writeaddr_st1e[`TAG_LINE_ADDR_RNG];
|
wire[`TAG_SELECT_BITS-1:0] writetag_st1e = writeaddr_st1e[`TAG_LINE_ADDR_RNG];
|
||||||
wire[`LINE_SELECT_BITS-1:0] writeladdr_st1e = writeaddr_st1e[`LINE_SELECT_BITS-1:0];
|
wire[`LINE_SELECT_BITS-1:0] writeladdr_st1e = writeaddr_st1e[`LINE_SELECT_BITS-1:0];
|
||||||
|
|
||||||
VX_tag_data_structure #(
|
VX_tag_data_store #(
|
||||||
.CACHE_SIZE (CACHE_SIZE),
|
.CACHE_SIZE (CACHE_SIZE),
|
||||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||||
.NUM_BANKS (NUM_BANKS),
|
.NUM_BANKS (NUM_BANKS),
|
||||||
.WORD_SIZE (WORD_SIZE)
|
.WORD_SIZE (WORD_SIZE)
|
||||||
) tag_data_structure (
|
) tag_data_store (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall_bank_pipe(stall_bank_pipe),
|
.stall_bank_pipe(stall_bank_pipe),
|
||||||
|
@ -141,10 +142,12 @@ module VX_tag_data_access #(
|
||||||
assign use_read_dirtyb_st1e= read_dirtyb_st1c[STAGE_1_CYCLES-1];
|
assign use_read_dirtyb_st1e= read_dirtyb_st1c[STAGE_1_CYCLES-1];
|
||||||
assign use_read_data_st1e = read_data_st1c[STAGE_1_CYCLES-1];
|
assign use_read_data_st1e = read_data_st1c[STAGE_1_CYCLES-1];
|
||||||
|
|
||||||
|
for (i = 0; i < WORD_SIZE; i++) begin
|
||||||
if (`WORD_SELECT_WIDTH != 0) begin
|
if (`WORD_SELECT_WIDTH != 0) begin
|
||||||
assign readword_st1e = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH];
|
assign readword_st1e[i * 8 +: 8] = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH][i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
|
||||||
end else begin
|
end else begin
|
||||||
assign readword_st1e = use_read_data_st1e;
|
assign readword_st1e[i * 8 +: 8] = use_read_data_st1e[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] we;
|
wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] we;
|
||||||
|
@ -153,9 +156,9 @@ module VX_tag_data_access #(
|
||||||
wire should_write = mem_rw_st1e
|
wire should_write = mem_rw_st1e
|
||||||
&& valid_req_st1e
|
&& valid_req_st1e
|
||||||
&& use_read_valid_st1e
|
&& use_read_valid_st1e
|
||||||
&& !miss_st1e
|
&& ~miss_st1e
|
||||||
&& !is_snp_st1e
|
&& ~is_snp_st1e
|
||||||
&& !real_writefill;
|
&& ~real_writefill;
|
||||||
|
|
||||||
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
|
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
|
||||||
wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1e == `UP(`WORD_SELECT_WIDTH)'(i)))
|
wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1e == `UP(`WORD_SELECT_WIDTH)'(i)))
|
||||||
|
@ -168,22 +171,22 @@ module VX_tag_data_access #(
|
||||||
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = real_writefill ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1e;
|
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = real_writefill ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1e;
|
||||||
end
|
end
|
||||||
|
|
||||||
assign use_write_enable = (writefill_st1e && !real_writefill) ? 0 : we;
|
assign use_write_enable = (writefill_st1e && ~real_writefill) ? 0 : we;
|
||||||
assign use_write_data = data_write;
|
assign use_write_data = data_write;
|
||||||
|
|
||||||
// use "case equality" to handle uninitialized tag when block entry is not valid
|
// use "case equality" to handle uninitialized tag when block entry is not valid
|
||||||
assign tags_match = (writetag_st1e === use_read_tag_st1e);
|
assign tags_match = (writetag_st1e === use_read_tag_st1e);
|
||||||
|
|
||||||
wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && (use_read_dirty_st1e || snp_invalidate_st1e) && !force_request_miss_st1e;
|
wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && (use_read_dirty_st1e || snp_invalidate_st1e) && ~force_request_miss_st1e;
|
||||||
wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e;
|
wire req_invalid = valid_req_st1e && ~is_snp_st1e && ~use_read_valid_st1e && ~writefill_st1e;
|
||||||
wire req_miss = valid_req_st1e && !is_snp_st1e && use_read_valid_st1e && !writefill_st1e && !tags_match;
|
wire req_miss = valid_req_st1e && ~is_snp_st1e && use_read_valid_st1e && ~writefill_st1e && ~tags_match;
|
||||||
wire real_miss = req_invalid || req_miss;
|
wire real_miss = req_invalid || req_miss;
|
||||||
wire force_core_miss = (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e && !real_miss);
|
wire force_core_miss = (force_request_miss_st1e && ~is_snp_st1e && ~writefill_st1e && valid_req_st1e && ~real_miss);
|
||||||
assign snp_to_mrvq_st1e = valid_req_st1e && is_snp_st1e && force_request_miss_st1e;
|
assign snp_to_mrvq_st1e = valid_req_st1e && is_snp_st1e && force_request_miss_st1e;
|
||||||
|
|
||||||
// The second term is basically saying always make an entry ready if there's already antoher entry waiting, even if you yourself see a miss
|
// The second term is basically saying always make an entry ready if there's already antoher entry waiting, even if you yourself see a miss
|
||||||
assign mrvq_init_ready_state_st1e = snp_to_mrvq_st1e
|
assign mrvq_init_ready_state_st1e = snp_to_mrvq_st1e
|
||||||
|| (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e);
|
|| (force_request_miss_st1e && ~is_snp_st1e && ~writefill_st1e && valid_req_st1e);
|
||||||
|
|
||||||
assign miss_st1e = real_miss || snoop_hit_no_pending || force_core_miss;
|
assign miss_st1e = real_miss || snoop_hit_no_pending || force_core_miss;
|
||||||
assign dirty_st1e = valid_req_st1e && use_read_valid_st1e && use_read_dirty_st1e;
|
assign dirty_st1e = valid_req_st1e && use_read_valid_st1e && use_read_dirty_st1e;
|
||||||
|
@ -195,6 +198,3 @@ module VX_tag_data_access #(
|
||||||
assign invalidate_line = snoop_hit_no_pending;
|
assign invalidate_line = snoop_hit_no_pending;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
`include "VX_cache_config.vh"
|
`include "VX_cache_config.vh"
|
||||||
|
|
||||||
module VX_tag_data_structure #(
|
module VX_tag_data_store #(
|
||||||
// Size of cache in bytes
|
// Size of cache in bytes
|
||||||
parameter CACHE_SIZE = 0,
|
parameter CACHE_SIZE = 0,
|
||||||
// Size of line inside a bank in bytes
|
// Size of line inside a bank in bytes
|
|
@ -5,15 +5,14 @@
|
||||||
|
|
||||||
interface VX_alu_req_if ();
|
interface VX_alu_req_if ();
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire valid;
|
||||||
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
|
|
||||||
wire [`ALU_BITS-1:0] alu_op;
|
wire [`ALU_BITS-1:0] alu_op;
|
||||||
|
|
||||||
wire wb;
|
|
||||||
wire [`NR_BITS-1:0] rd;
|
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||||
|
|
||||||
|
|
|
@ -5,12 +5,9 @@
|
||||||
|
|
||||||
interface VX_commit_if ();
|
interface VX_commit_if ();
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire valid;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
wire [31:0] curr_PC;
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] data;
|
wire [`NUM_THREADS-1:0][31:0] data;
|
||||||
wire [`NR_BITS-1:0] rd;
|
|
||||||
wire wb;
|
|
||||||
wire ready;
|
wire ready;
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|
43
hw/rtl/interfaces/VX_commit_is_if.v
Normal file
43
hw/rtl/interfaces/VX_commit_is_if.v
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
`ifndef VX_COMMIT_IS_IF
|
||||||
|
`define VX_COMMIT_IS_IF
|
||||||
|
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
typedef struct packed {
|
||||||
|
logic [`NW_BITS-1:0] warp_num;
|
||||||
|
logic [`NUM_THREADS-1:0] thread_mask;
|
||||||
|
logic [31:0] curr_PC;
|
||||||
|
logic [`NR_BITS-1:0] rd;
|
||||||
|
logic rd_is_fp;
|
||||||
|
logic wb;
|
||||||
|
} is_data_t;
|
||||||
|
|
||||||
|
|
||||||
|
interface VX_commit_is_if ();
|
||||||
|
|
||||||
|
wire alu_valid;
|
||||||
|
wire lsu_valid;
|
||||||
|
wire csr_valid;
|
||||||
|
wire mul_valid;
|
||||||
|
wire fpu_valid;
|
||||||
|
wire gpu_valid;
|
||||||
|
|
||||||
|
wire [`ISTAG_BITS-1:0] alu_tag;
|
||||||
|
wire [`ISTAG_BITS-1:0] lsu_tag;
|
||||||
|
wire [`ISTAG_BITS-1:0] csr_tag;
|
||||||
|
wire [`ISTAG_BITS-1:0] mul_tag;
|
||||||
|
wire [`ISTAG_BITS-1:0] fpu_tag;
|
||||||
|
wire [`ISTAG_BITS-1:0] gpu_tag;
|
||||||
|
|
||||||
|
`IGNORE_WARNINGS_BEGIN
|
||||||
|
is_data_t alu_data;
|
||||||
|
is_data_t lsu_data;
|
||||||
|
is_data_t csr_data;
|
||||||
|
is_data_t mul_data;
|
||||||
|
is_data_t fpu_data;
|
||||||
|
is_data_t gpu_data;
|
||||||
|
`IGNORE_WARNINGS_END
|
||||||
|
|
||||||
|
endinterface
|
||||||
|
|
||||||
|
`endif
|
|
@ -5,7 +5,8 @@
|
||||||
|
|
||||||
interface VX_csr_req_if ();
|
interface VX_csr_req_if ();
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire valid;
|
||||||
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
|
|
||||||
|
|
|
@ -5,8 +5,9 @@
|
||||||
|
|
||||||
interface VX_decode_if ();
|
interface VX_decode_if ();
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire valid;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
wire [31:0] next_PC;
|
wire [31:0] next_PC;
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,10 @@
|
||||||
|
|
||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
`ifndef EXTF_F_ENABLE
|
||||||
|
`IGNORE_WARNINGS_BEGIN
|
||||||
|
`endif
|
||||||
|
|
||||||
interface VX_fpu_from_csr_if ();
|
interface VX_fpu_from_csr_if ();
|
||||||
|
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
|
|
|
@ -3,19 +3,19 @@
|
||||||
|
|
||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
`ifndef EXTF_F_ENABLE
|
||||||
|
`IGNORE_WARNINGS_BEGIN
|
||||||
|
`endif
|
||||||
|
|
||||||
interface VX_fpu_req_if ();
|
interface VX_fpu_req_if ();
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire valid;
|
||||||
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
wire [31:0] curr_PC;
|
|
||||||
|
|
||||||
wire [`FPU_BITS-1:0] fpu_op;
|
wire [`FPU_BITS-1:0] fpu_op;
|
||||||
wire [`FRM_BITS-1:0] frm;
|
wire [`FRM_BITS-1:0] frm;
|
||||||
|
|
||||||
wire wb;
|
|
||||||
wire [`NR_BITS-1:0] rd;
|
|
||||||
wire rd_is_fp;
|
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||||
|
|
|
@ -3,6 +3,10 @@
|
||||||
|
|
||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
`ifndef EXTF_F_ENABLE
|
||||||
|
`IGNORE_WARNINGS_BEGIN
|
||||||
|
`endif
|
||||||
|
|
||||||
interface VX_fpu_to_csr_if ();
|
interface VX_fpu_to_csr_if ();
|
||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
|
|
|
@ -5,9 +5,10 @@
|
||||||
|
|
||||||
interface VX_gpu_req_if();
|
interface VX_gpu_req_if();
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire valid;
|
||||||
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
wire [31:0] curr_PC;
|
|
||||||
|
|
||||||
wire [`GPU_BITS-1:0] gpu_op;
|
wire [`GPU_BITS-1:0] gpu_op;
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,8 @@
|
||||||
|
|
||||||
interface VX_ifetch_req_if ();
|
interface VX_ifetch_req_if ();
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire valid;
|
||||||
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
wire ready;
|
wire ready;
|
||||||
|
|
|
@ -5,7 +5,8 @@
|
||||||
|
|
||||||
interface VX_ifetch_rsp_if ();
|
interface VX_ifetch_rsp_if ();
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire valid;
|
||||||
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
wire [31:0] instr;
|
wire [31:0] instr;
|
||||||
|
|
|
@ -5,20 +5,22 @@
|
||||||
|
|
||||||
interface VX_lsu_req_if ();
|
interface VX_lsu_req_if ();
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire valid;
|
||||||
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
|
|
||||||
wire rw;
|
wire rw;
|
||||||
wire [`BYTEEN_BITS-1:0] byteen;
|
wire [`BYTEEN_BITS-1:0] byteen;
|
||||||
|
|
||||||
wire wb;
|
|
||||||
wire [`NR_BITS-1:0] rd;
|
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] store_data;
|
wire [`NUM_THREADS-1:0][31:0] store_data;
|
||||||
wire [`NUM_THREADS-1:0][31:0] base_addr;
|
wire [`NUM_THREADS-1:0][31:0] base_addr;
|
||||||
wire [31:0] offset;
|
wire [31:0] offset;
|
||||||
|
|
||||||
|
wire [`NR_BITS-1:0] rd;
|
||||||
|
wire wb;
|
||||||
|
|
||||||
wire ready;
|
wire ready;
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|
|
@ -3,17 +3,17 @@
|
||||||
|
|
||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
`ifndef EXT_M_ENABLE
|
||||||
|
`IGNORE_WARNINGS_BEGIN
|
||||||
|
`endif
|
||||||
|
|
||||||
interface VX_mul_req_if ();
|
interface VX_mul_req_if ();
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire valid;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
wire [31:0] curr_PC;
|
|
||||||
|
|
||||||
wire [`MUL_BITS-1:0] mul_op;
|
wire [`MUL_BITS-1:0] mul_op;
|
||||||
|
|
||||||
wire wb;
|
|
||||||
wire [`NR_BITS-1:0] rd;
|
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||||
|
|
||||||
|
|
|
@ -12,17 +12,14 @@ interface VX_warp_ctl_if ();
|
||||||
|
|
||||||
wire wspawn;
|
wire wspawn;
|
||||||
wire [31:0] wspawn_pc;
|
wire [31:0] wspawn_pc;
|
||||||
wire [`NUM_WARPS-1:0] wspawn_new_active;
|
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||||
|
|
||||||
wire whalt;
|
|
||||||
|
|
||||||
wire is_barrier;
|
wire is_barrier;
|
||||||
wire [`NB_BITS-1:0] barrier_id;
|
wire [`NB_BITS-1:0] barrier_id;
|
||||||
wire [`NW_BITS:0] num_warps;
|
wire [`NW_BITS:0] barrier_num_warps;
|
||||||
|
|
||||||
wire is_split;
|
wire is_split;
|
||||||
wire do_split;
|
wire do_split;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] split_new_mask;
|
wire [`NUM_THREADS-1:0] split_new_mask;
|
||||||
wire [`NUM_THREADS-1:0] split_later_mask;
|
wire [`NUM_THREADS-1:0] split_later_mask;
|
||||||
wire [31:0] split_save_pc;
|
wire [31:0] split_save_pc;
|
||||||
|
|
|
@ -5,7 +5,8 @@
|
||||||
|
|
||||||
interface VX_wb_if ();
|
interface VX_wb_if ();
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] valid;
|
wire valid;
|
||||||
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] warp_num;
|
||||||
wire [`NR_BITS-1:0] rd;
|
wire [`NR_BITS-1:0] rd;
|
||||||
wire rd_is_fp;
|
wire rd_is_fp;
|
||||||
|
|
74
hw/rtl/libs/VX_cam_buffer.v
Normal file
74
hw/rtl/libs/VX_cam_buffer.v
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
module VX_cam_buffer #(
|
||||||
|
parameter DATAW = 1,
|
||||||
|
parameter SIZE = 1,
|
||||||
|
parameter RPORTS = 1,
|
||||||
|
parameter ADDRW = `LOG2UP(SIZE)
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
|
input wire [DATAW-1:0] write_data,
|
||||||
|
output wire [ADDRW-1:0] write_addr,
|
||||||
|
input wire acquire_slot,
|
||||||
|
input wire [RPORTS-1:0][ADDRW-1:0] read_addr,
|
||||||
|
output reg [RPORTS-1:0][DATAW-1:0] read_data,
|
||||||
|
input wire [RPORTS-1:0] release_slot,
|
||||||
|
output wire full
|
||||||
|
);
|
||||||
|
reg [DATAW-1:0] entries [SIZE-1:0];
|
||||||
|
reg [SIZE-1:0] free_slots, free_slots_n;
|
||||||
|
reg [ADDRW-1:0] write_addr_r;
|
||||||
|
reg full_r;
|
||||||
|
|
||||||
|
wire free_valid;
|
||||||
|
wire [ADDRW-1:0] free_index;
|
||||||
|
|
||||||
|
VX_priority_encoder #(
|
||||||
|
.N(SIZE)
|
||||||
|
) free_slots_encoder (
|
||||||
|
.data_in (free_slots_n),
|
||||||
|
.data_out (free_index),
|
||||||
|
.valid_out (free_valid)
|
||||||
|
);
|
||||||
|
|
||||||
|
integer i;
|
||||||
|
|
||||||
|
always @(*) begin
|
||||||
|
free_slots_n = free_slots;
|
||||||
|
if (acquire_slot) begin
|
||||||
|
free_slots_n[write_addr_r] = 0;
|
||||||
|
end
|
||||||
|
for (i = 0; i < RPORTS; i++) begin
|
||||||
|
if (release_slot[i]) begin
|
||||||
|
free_slots_n[read_addr[i]] = 1;
|
||||||
|
end
|
||||||
|
assign read_data[i] = entries[read_addr[i]];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset) begin
|
||||||
|
free_slots <= {SIZE{1'b1}};
|
||||||
|
full_r <= 1'b0;
|
||||||
|
write_addr_r <= ADDRW'(1'b0);
|
||||||
|
end else begin
|
||||||
|
if (acquire_slot) begin
|
||||||
|
assert(1 == free_slots[write_addr]);
|
||||||
|
entries[write_addr] <= write_data;
|
||||||
|
end
|
||||||
|
for (i = 0; i < RPORTS; i++) begin
|
||||||
|
if (release_slot[i]) begin
|
||||||
|
assert(0 == free_slots[read_addr[i]]);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
free_slots <= free_slots_n;
|
||||||
|
write_addr_r <= free_index;
|
||||||
|
full_r <= ~free_valid;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
assign write_addr = write_addr_r;
|
||||||
|
assign full = full_r;
|
||||||
|
|
||||||
|
endmodule
|
|
@ -12,6 +12,7 @@ module VX_divide #(
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
|
||||||
|
input wire clk_en,
|
||||||
input wire [WIDTHN-1:0] numer,
|
input wire [WIDTHN-1:0] numer,
|
||||||
input wire [WIDTHD-1:0] denom,
|
input wire [WIDTHD-1:0] denom,
|
||||||
|
|
||||||
|
@ -31,7 +32,7 @@ module VX_divide #(
|
||||||
.quotient (quotient_unqual),
|
.quotient (quotient_unqual),
|
||||||
.remain (remainder_unqual),
|
.remain (remainder_unqual),
|
||||||
.aclr (1'b0),
|
.aclr (1'b0),
|
||||||
.clken (1'b1)
|
.clken (clk_en)
|
||||||
);
|
);
|
||||||
|
|
||||||
defparam
|
defparam
|
||||||
|
@ -43,8 +44,8 @@ module VX_divide #(
|
||||||
quartus_div.lpm_hint = "MAXIMIZE_SPEED=6,LPM_REMAINDERPOSITIVE=FALSE",
|
quartus_div.lpm_hint = "MAXIMIZE_SPEED=6,LPM_REMAINDERPOSITIVE=FALSE",
|
||||||
quartus_div.lpm_pipeline = PIPELINE;
|
quartus_div.lpm_pipeline = PIPELINE;
|
||||||
|
|
||||||
assign quotient = quotient_unqual[WIDTHQ-1:0];
|
assign quotient = quotient_unqual [WIDTHQ-1:0];
|
||||||
assign remainder = remainder_unqual[WIDTHR-1:0];
|
assign remainder = remainder_unqual [WIDTHR-1:0];
|
||||||
|
|
||||||
`else
|
`else
|
||||||
|
|
||||||
|
@ -82,8 +83,8 @@ module VX_divide #(
|
||||||
end
|
end
|
||||||
|
|
||||||
if (PIPELINE == 0) begin
|
if (PIPELINE == 0) begin
|
||||||
assign quotient = quotient_unqual[WIDTHQ-1:0];
|
assign quotient = quotient_unqual [WIDTHQ-1:0];
|
||||||
assign remainder = remainder_unqual[WIDTHR-1:0];
|
assign remainder = remainder_unqual [WIDTHR-1:0];
|
||||||
end else begin
|
end else begin
|
||||||
reg [WIDTHN-1:0] quotient_pipe [0:PIPELINE-1];
|
reg [WIDTHN-1:0] quotient_pipe [0:PIPELINE-1];
|
||||||
reg [WIDTHD-1:0] remainder_pipe [0:PIPELINE-1];
|
reg [WIDTHD-1:0] remainder_pipe [0:PIPELINE-1];
|
||||||
|
@ -95,10 +96,10 @@ module VX_divide #(
|
||||||
quotient_pipe[i] <= 0;
|
quotient_pipe[i] <= 0;
|
||||||
remainder_pipe[i] <= 0;
|
remainder_pipe[i] <= 0;
|
||||||
end
|
end
|
||||||
else begin
|
else if (clk_en) begin
|
||||||
if (i == 0) begin
|
if (i == 0) begin
|
||||||
quotient_pipe[0] <= quotient_unqual;
|
quotient_pipe[i] <= quotient_unqual;
|
||||||
remainder_pipe[0] <= remainder_unqual;
|
remainder_pipe[i] <= remainder_unqual;
|
||||||
end else begin
|
end else begin
|
||||||
quotient_pipe[i] <= quotient_pipe[i-1];
|
quotient_pipe[i] <= quotient_pipe[i-1];
|
||||||
remainder_pipe[i] <= remainder_pipe[i-1];
|
remainder_pipe[i] <= remainder_pipe[i-1];
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
module VX_generic_queue #(
|
module VX_generic_queue #(
|
||||||
parameter DATAW = 1,
|
parameter DATAW = 1,
|
||||||
parameter SIZE = 16,
|
parameter SIZE = 16,
|
||||||
parameter BUFFERED_OUTPUT = 1
|
parameter BUFFERED = 1
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
@ -58,7 +58,7 @@ module VX_generic_queue #(
|
||||||
reg [DATAW-1:0] data [SIZE-1:0];
|
reg [DATAW-1:0] data [SIZE-1:0];
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
if (0 == BUFFERED_OUTPUT) begin
|
if (0 == BUFFERED) begin
|
||||||
|
|
||||||
reg [`LOG2UP(SIZE):0] rd_ptr_r;
|
reg [`LOG2UP(SIZE):0] rd_ptr_r;
|
||||||
reg [`LOG2UP(SIZE):0] wr_ptr_r;
|
reg [`LOG2UP(SIZE):0] wr_ptr_r;
|
||||||
|
|
|
@ -1,75 +0,0 @@
|
||||||
`include "VX_define.vh"
|
|
||||||
|
|
||||||
module VX_mult #(
|
|
||||||
parameter WIDTHA = 1,
|
|
||||||
parameter WIDTHB = 1,
|
|
||||||
parameter WIDTHP = 1,
|
|
||||||
parameter SIGNED = 0,
|
|
||||||
parameter PIPELINE = 0
|
|
||||||
) (
|
|
||||||
input wire clk,
|
|
||||||
input wire reset,
|
|
||||||
|
|
||||||
input wire [WIDTHA-1:0] dataa,
|
|
||||||
input wire [WIDTHB-1:0] datab,
|
|
||||||
output wire [WIDTHP-1:0] result
|
|
||||||
);
|
|
||||||
|
|
||||||
`ifdef QUARTUS
|
|
||||||
|
|
||||||
lpm_mult quartus_mult (
|
|
||||||
.clock (clk),
|
|
||||||
.dataa (dataa),
|
|
||||||
.datab (datab),
|
|
||||||
.result (result),
|
|
||||||
.sclr (reset),
|
|
||||||
.aclr (1'b0),
|
|
||||||
.clken (1'b1),
|
|
||||||
.sum (1'b0)
|
|
||||||
);
|
|
||||||
|
|
||||||
defparam quartus_mult.lpm_type = "LPM_MULT",
|
|
||||||
quartus_mult.lpm_widtha = WIDTHA,
|
|
||||||
quartus_mult.lpm_widthb = WIDTHB,
|
|
||||||
quartus_mult.lpm_widthp = WIDTHP,
|
|
||||||
quartus_mult.lpm_representation = SIGNED ? "SIGNED" : "UNSIGNED",
|
|
||||||
quartus_mult.lpm_pipeline = PIPELINE,
|
|
||||||
quartus_mult.lpm_hint = "DEDICATED_MULTIPLIER_CIRCUITRY=YES,MAXIMIZE_SPEED=9";
|
|
||||||
`else
|
|
||||||
|
|
||||||
wire [WIDTHP-1:0] result_unqual;
|
|
||||||
|
|
||||||
if (SIGNED) begin
|
|
||||||
assign result_unqual = $signed(dataa) * $signed(datab);
|
|
||||||
end else begin
|
|
||||||
assign result_unqual = dataa * datab;
|
|
||||||
end
|
|
||||||
|
|
||||||
if (PIPELINE == 0) begin
|
|
||||||
assign result = result_unqual;
|
|
||||||
end else begin
|
|
||||||
|
|
||||||
reg [WIDTHP-1:0] result_pipe [0:PIPELINE-1];
|
|
||||||
|
|
||||||
genvar i;
|
|
||||||
for (i = 0; i < PIPELINE; i++) begin
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
result_pipe[i] <= 0;
|
|
||||||
end
|
|
||||||
else begin
|
|
||||||
if (i == 0) begin
|
|
||||||
result_pipe[0] <= result_unqual;
|
|
||||||
end else begin
|
|
||||||
result_pipe[i] <= result_pipe[i-1];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
assign result = result_pipe[PIPELINE-1];
|
|
||||||
end
|
|
||||||
|
|
||||||
`endif
|
|
||||||
|
|
||||||
endmodule
|
|
|
@ -8,6 +8,7 @@ module VX_priority_encoder #(
|
||||||
output reg valid_out
|
output reg valid_out
|
||||||
);
|
);
|
||||||
integer i;
|
integer i;
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
data_out = 0;
|
data_out = 0;
|
||||||
valid_out = 0;
|
valid_out = 0;
|
||||||
|
|
|
@ -15,7 +15,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||||
|
|
||||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
#DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||||
|
|
||||||
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/fp_cores -I../rtl/simulate
|
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/fp_cores -I../rtl/simulate
|
||||||
|
|
||||||
|
@ -35,7 +35,6 @@ VF += -cc Vortex.v -top-module Vortex
|
||||||
VF += verilator.vlt
|
VF += verilator.vlt
|
||||||
|
|
||||||
DBG += -DVCD_OUTPUT $(DBG_FLAGS)
|
DBG += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||||
DBG += -DDBG_CORE_REQ_INFO
|
|
||||||
|
|
||||||
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ double sc_time_stamp() {
|
||||||
|
|
||||||
Simulator::Simulator() {
|
Simulator::Simulator() {
|
||||||
// force random values for unitialized signals
|
// force random values for unitialized signals
|
||||||
Verilated::randReset(1);
|
Verilated::randReset(2);
|
||||||
|
|
||||||
// Turn off assertion before reset
|
// Turn off assertion before reset
|
||||||
Verilated::assertOn(false);
|
Verilated::assertOn(false);
|
||||||
|
@ -24,7 +24,8 @@ Simulator::Simulator() {
|
||||||
|
|
||||||
#ifdef VCD_OUTPUT
|
#ifdef VCD_OUTPUT
|
||||||
Verilated::traceEverOn(true);
|
Verilated::traceEverOn(true);
|
||||||
trace_ = new VerilatedVcdC;
|
trace_ = new VerilatedVcdC();
|
||||||
|
trace_->set_time_unit("1ns");
|
||||||
vortex_->trace(trace_, 99);
|
vortex_->trace(trace_, 99);
|
||||||
trace_->open("trace.vcd");
|
trace_->open("trace.vcd");
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -83,11 +83,11 @@ vx_num_cores:
|
||||||
.type vx_num_cycles, @function
|
.type vx_num_cycles, @function
|
||||||
.global vx_num_cycles
|
.global vx_num_cycles
|
||||||
vx_num_cycles:
|
vx_num_cycles:
|
||||||
csrr a0, CSR_CYCLL
|
csrr a0, CSR_CYCLE_L
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.type vx_num_instrs, @function
|
.type vx_num_instrs, @function
|
||||||
.global vx_num_instrs
|
.global vx_num_instrs
|
||||||
vx_num_instrs:
|
vx_num_instrs:
|
||||||
csrr a0, CSR_INSTL
|
csrr a0, CSR_INSTR_L
|
||||||
ret
|
ret
|
|
@ -49,7 +49,7 @@ vx_set_sp:
|
||||||
slli a1, a1, 10 # multiply by 1024
|
slli a1, a1, 10 # multiply by 1024
|
||||||
csrr a2, CSR_LTID # get local thread id
|
csrr a2, CSR_LTID # get local thread id
|
||||||
slli a2, a2, 2 # multiply by 4
|
slli a2, a2, 2 # multiply by 4
|
||||||
lui sp, STACK_BASE_ADDR # load base sp
|
lui sp, (SHARED_MEM_BASE_ADDR>>12) # load base sp
|
||||||
sub sp, sp, a1 # sub thread block
|
sub sp, sp, a1 # sub thread block
|
||||||
add sp, sp, a2 # reduce addr collision for perf
|
add sp, sp, a2 # reduce addr collision for perf
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||||
|
|
||||||
CFLAGS += -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld
|
CFLAGS += -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld
|
||||||
CFLAGS += -nostartfiles -ffreestanding -fno-exceptions -Wl,--gc-sections
|
CFLAGS += -nostartfiles -ffreestanding -fno-exceptions -Wl,--gc-sections
|
||||||
CFLAGS += -I$(VORTEX_RT_PATH)/include
|
CFLAGS += -I$(VORTEX_RT_PATH)/include -I../../../hw
|
||||||
|
|
||||||
LDFLAGS += $(VORTEX_RT_PATH)/libvortexrt.a
|
LDFLAGS += $(VORTEX_RT_PATH)/libvortexrt.a
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#include <vx_intrinsics.h>
|
#include <vx_intrinsics.h>
|
||||||
#include <vx_print.h>
|
#include <vx_print.h>
|
||||||
#include <vx_spawn.h>
|
#include <vx_spawn.h>
|
||||||
|
#include <VX_config.h>
|
||||||
|
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
|
@ -100,7 +101,7 @@ int main()
|
||||||
test_wsapwn();
|
test_wsapwn();
|
||||||
|
|
||||||
vx_print_str("Shared Memory test\n");
|
vx_print_str("Shared Memory test\n");
|
||||||
unsigned * ptr = (unsigned *) 0xFFFF0000;
|
unsigned * ptr = (unsigned *) SHARED_MEM_BASE_ADDR;
|
||||||
unsigned value = 0;
|
unsigned value = 0;
|
||||||
for (int i = 0; i < 5; i++)
|
for (int i = 0; i < 5; i++)
|
||||||
{
|
{
|
||||||
|
@ -112,7 +113,6 @@ int main()
|
||||||
vx_print_str("-------------------\n");
|
vx_print_str("-------------------\n");
|
||||||
value++;
|
value++;
|
||||||
ptr++;
|
ptr++;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vx_print_str("vx_spawn_warps mat_add_kernel\n");
|
vx_print_str("vx_spawn_warps mat_add_kernel\n");
|
||||||
|
|
|
@ -126,15 +126,3 @@ void intrinsics_tests()
|
||||||
vx_print_str("test_spawn\n");
|
vx_print_str("test_spawn\n");
|
||||||
test_wsapwn();
|
test_wsapwn();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -81,14 +81,14 @@ Disassembly of section .text:
|
||||||
80000114: 3a8000ef jal ra,800004bc <test_wsapwn>
|
80000114: 3a8000ef jal ra,800004bc <test_wsapwn>
|
||||||
80000118: 80001537 lui a0,0x80001
|
80000118: 80001537 lui a0,0x80001
|
||||||
8000011c: ddc50513 addi a0,a0,-548 # 80000ddc <__global_pointer$+0xfffff5d4>
|
8000011c: ddc50513 addi a0,a0,-548 # 80000ddc <__global_pointer$+0xfffff5d4>
|
||||||
80000120: ffff0437 lui s0,0xffff0
|
80000120: 6ffff437 lui s0,0x6ffff
|
||||||
80000124: 628000ef jal ra,8000074c <vx_print_str>
|
80000124: 628000ef jal ra,8000074c <vx_print_str>
|
||||||
80000128: 00000493 li s1,0
|
80000128: 00000493 li s1,0
|
||||||
8000012c: 80001b37 lui s6,0x80001
|
8000012c: 80001b37 lui s6,0x80001
|
||||||
80000130: 80001ab7 lui s5,0x80001
|
80000130: 80001ab7 lui s5,0x80001
|
||||||
80000134: 80001a37 lui s4,0x80001
|
80000134: 80001a37 lui s4,0x80001
|
||||||
80000138: 800019b7 lui s3,0x80001
|
80000138: 800019b7 lui s3,0x80001
|
||||||
8000013c: 01440913 addi s2,s0,20 # ffff0014 <__global_pointer$+0x7ffee80c>
|
8000013c: 01440913 addi s2,s0,20 # 6ffff014 <_start-0x10000fec>
|
||||||
80000140: 00942023 sw s1,0(s0)
|
80000140: 00942023 sw s1,0(s0)
|
||||||
80000144: 00040593 mv a1,s0
|
80000144: 00040593 mv a1,s0
|
||||||
80000148: df0b0513 addi a0,s6,-528 # 80000df0 <__global_pointer$+0xfffff5e8>
|
80000148: df0b0513 addi a0,s6,-528 # 80000df0 <__global_pointer$+0xfffff5e8>
|
||||||
|
|
Binary file not shown.
|
@ -17,7 +17,7 @@
|
||||||
:1000E80037150080130585DBEF00C0651305400058
|
:1000E80037150080130585DBEF00C0651305400058
|
||||||
:1000F800EF00405EEF00402E13051000EF00805D1A
|
:1000F800EF00405EEF00402E13051000EF00805D1A
|
||||||
:10010800371500801305C5DCEF00C063EF00803AA7
|
:10010800371500801305C5DCEF00C063EF00803AA7
|
||||||
:10011800371500801305C5DD3704FFFFEF00806247
|
:10011800371500801305C5DD37F4FF6FEF008062E7
|
||||||
:1001280093040000371B0080B71A0080371A00803C
|
:1001280093040000371B0080B71A0080371A00803C
|
||||||
:10013800B719008013094401232094009305040093
|
:10013800B719008013094401232094009305040093
|
||||||
:1001480013050BDFEF00006E9385040013858ADF2B
|
:1001480013050BDFEF00006E9385040013858ADF2B
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue