quartus build fixes

This commit is contained in:
Blaise Tine 2020-08-23 22:04:46 -07:00
parent 1c9445745f
commit f292e5003d
27 changed files with 241 additions and 206 deletions

View file

@ -4,4 +4,5 @@ set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
# set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
set_global_assignment -name VERILOG_MACRO QUARTUS
set_global_assignment -name VERILOG_MACRO SYNTHESIS
set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name MESSAGE_DISABLE 16818

View file

@ -82,7 +82,7 @@ module VX_alu_unit #(
end
end
reg [31:0] next_PC = alu_req_if.curr_PC + 4;
wire [31:0] next_PC = alu_req_if.curr_PC + 4;
VX_shift_register #(
.DATAW(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `NT_BITS + 1 + 1 + `BR_BITS + 2 + 32),
@ -131,7 +131,7 @@ module VX_alu_unit #(
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (0),
.flush (1'b0),
.in ({valid_r, wid_r, thread_mask_r, curr_PC_r, rd_r, wb_r, alu_jal_result, is_br_op_r, br_taken, br_dest}),
.out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.thread_mask, alu_commit_if.curr_PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_s, branch_ctl_if.taken, branch_ctl_if.dest})
);

View file

@ -28,7 +28,7 @@ module VX_commit #(
fpu_commit_if.valid,
gpu_commit_if.valid};
wire [`NE_BITS:0] num_commits;
wire [$clog2(`NUM_EXS+1)-1:0] num_commits;
VX_countones #(
.N(`NUM_EXS)
@ -54,7 +54,7 @@ module VX_commit #(
fflags_t fflags_r;
reg has_fflags_r;
reg [`NW_BITS-1:0] wid_r;
reg [`NE_BITS:0] num_commits_r;
reg [$clog2(`NUM_EXS+1)-1:0] num_commits_r;
reg csr_update_r;
always @(posedge clk) begin

View file

@ -13,13 +13,12 @@ module VX_csr_data #(
input wire read_enable,
input wire[`CSR_ADDR_BITS-1:0] read_addr,
output reg[31:0] read_data,
output wire[31:0] read_data,
input wire write_enable,
input wire[`CSR_ADDR_BITS-1:0] write_addr,
input wire[`CSR_WIDTH-1:0] write_data
);
reg [`CSR_WIDTH-1:0] csr_satp;
reg [`CSR_WIDTH-1:0] csr_mstatus;
reg [`CSR_WIDTH-1:0] csr_medeleg;
@ -36,6 +35,8 @@ module VX_csr_data #(
reg [`FRM_BITS-1:0] csr_frm [`NUM_WARPS-1:0];
reg [`FRM_BITS+`FFG_BITS-1:0] csr_fcsr [`NUM_WARPS-1:0]; // fflags + frm
reg [31:0] read_data_r;
always @(posedge clk) begin
if (cmt_to_csr_if.has_fflags) begin
csr_fflags[cmt_to_csr_if.wid] <= cmt_to_csr_if.fflags;
@ -90,50 +91,52 @@ module VX_csr_data #(
end
always @(*) begin
read_data_r = 'x;
case (read_addr)
`CSR_FFLAGS : read_data = 32'(csr_fflags[wid]);
`CSR_FRM : read_data = 32'(csr_frm[wid]);
`CSR_FCSR : read_data = 32'(csr_fcsr[wid]);
`CSR_FFLAGS : read_data_r = 32'(csr_fflags[wid]);
`CSR_FRM : read_data_r = 32'(csr_frm[wid]);
`CSR_FCSR : read_data_r = 32'(csr_fcsr[wid]);
`CSR_LWID : read_data = 32'(wid);
`CSR_LWID : read_data_r = 32'(wid);
`CSR_LTID ,
`CSR_GTID ,
`CSR_MHARTID ,
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(wid);
`CSR_GCID : read_data = CORE_ID;
`CSR_NT : read_data = `NUM_THREADS;
`CSR_NW : read_data = `NUM_WARPS;
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
`CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(wid);
`CSR_GCID : read_data_r = CORE_ID;
`CSR_NT : read_data_r = `NUM_THREADS;
`CSR_NW : read_data_r = `NUM_WARPS;
`CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS;
`CSR_SATP : read_data = 32'(csr_satp);
`CSR_SATP : read_data_r = 32'(csr_satp);
`CSR_MSTATUS : read_data = 32'(csr_mstatus);
`CSR_MISA : read_data = `ISA_CODE;
`CSR_MEDELEG : read_data = 32'(csr_medeleg);
`CSR_MIDELEG : read_data = 32'(csr_mideleg);
`CSR_MIE : read_data = 32'(csr_mie);
`CSR_MTVEC : read_data = 32'(csr_mtvec);
`CSR_MSTATUS : read_data_r = 32'(csr_mstatus);
`CSR_MISA : read_data_r = `ISA_CODE;
`CSR_MEDELEG : read_data_r = 32'(csr_medeleg);
`CSR_MIDELEG : read_data_r = 32'(csr_mideleg);
`CSR_MIE : read_data_r = 32'(csr_mie);
`CSR_MTVEC : read_data_r = 32'(csr_mtvec);
`CSR_MEPC : read_data = 32'(csr_mepc);
`CSR_MEPC : read_data_r = 32'(csr_mepc);
`CSR_PMPCFG0 : read_data = 32'(csr_pmpcfg[0]);
`CSR_PMPADDR0: read_data = 32'(csr_pmpaddr[0]);
`CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]);
`CSR_PMPADDR0: read_data_r = 32'(csr_pmpaddr[0]);
`CSR_CYCLE : read_data = csr_cycle[31:0];
`CSR_CYCLE_H : read_data = csr_cycle[63:32];
`CSR_INSTRET : read_data = csr_instret[31:0];
`CSR_INSTRET_H:read_data = csr_instret[63:32];
`CSR_CYCLE : read_data_r = csr_cycle[31:0];
`CSR_CYCLE_H : read_data_r = csr_cycle[63:32];
`CSR_INSTRET : read_data_r = csr_instret[31:0];
`CSR_INSTRET_H:read_data_r = csr_instret[63:32];
`CSR_MVENDORID:read_data = `VENDOR_ID;
`CSR_MARCHID : read_data = `ARCHITECTURE_ID;
`CSR_MIMPID : read_data = `IMPLEMENTATION_ID;
`CSR_MVENDORID:read_data_r = `VENDOR_ID;
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
default: begin
default: begin
assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr);
end
endcase
end
assign read_data = read_data_r;
assign csr_to_issue_if.frm = csr_frm[csr_to_issue_if.wid];
endmodule

View file

@ -94,7 +94,7 @@ module VX_csr_unit #(
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.flush (1'b0),
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.wid, csr_pipe_req_if.thread_mask, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
.out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.thread_mask, csr_pipe_rsp_if.curr_PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
);

View file

@ -14,13 +14,10 @@ module VX_gpr_ram (
reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0];
initial begin
// initialize ram
initial begin // initialize ram: set r0 = 0
for (integer j = 0; j < `NUM_WARPS; j++) begin
for (integer i = 0; i < `NUM_REGS; i++) begin
if (i == 0) begin
ram[j * `NUM_REGS + i] = {`NUM_THREADS{32'h00000000}}; // set r0 = 0
end
ram[j * `NUM_REGS + i] = (i == 0) ? {`NUM_THREADS{32'h0}} : {`NUM_THREADS{32'hx}};
end
end
end

View file

@ -76,7 +76,7 @@ module VX_gpu_unit #(
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.flush (1'b0),
.in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.thread_mask, gpu_req_if.curr_PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
.out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.thread_mask, gpu_commit_if.curr_PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
);

View file

@ -7,22 +7,25 @@ module VX_ibuffer #(
input wire reset,
// inputs
input wire freeze, // do not switch to another warp
input wire freeze, // keep current warp
VX_decode_if ibuf_enq_if,
// outputs
VX_decode_if ibuf_deq_if
);
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + 1 + `NUM_REGS;
localparam SIZE = `IBUF_SIZE;
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + 1 + `NUM_REGS;
localparam SIZE = `IBUF_SIZE;
localparam SIZEW = $clog2(SIZE+1);
localparam ADDRW = $clog2(SIZE);
localparam NWARPSW = $clog2(`NUM_WARPS+1);
`USE_FAST_BRAM reg [DATAW-1:0] entries [`NUM_WARPS-1:0][SIZE-1:0];
reg [`LOG2UP(SIZE+1)-1:0] size_r [`NUM_WARPS-1:0];
reg [`LOG2UP(SIZE):0] rd_ptr_r [`NUM_WARPS-1:0];
reg [`LOG2UP(SIZE):0] wr_ptr_r [`NUM_WARPS-1:0];
reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0];
reg [ADDRW:0] rd_ptr_r [`NUM_WARPS-1:0];
reg [ADDRW:0] wr_ptr_r [`NUM_WARPS-1:0];
wire [`NUM_WARPS-1:0] q_full;
wire [`NUM_WARPS-1:0][`LOG2UP(SIZE+1)-1:0] q_size;
wire [`NUM_WARPS-1:0][SIZEW-1:0] q_size;
wire [DATAW-1:0] q_data_in;
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
@ -35,8 +38,8 @@ module VX_ibuffer #(
wire writing = enq_fire && (i == ibuf_enq_if.wid);
wire reading = deq_fire && (i == ibuf_deq_if.wid);
wire [`LOG2UP(SIZE-1)-1:0] rd_ptr_a = rd_ptr_r[i][`LOG2UP(SIZE-1)-1:0];
wire [`LOG2UP(SIZE-1)-1:0] wr_ptr_a = wr_ptr_r[i][`LOG2UP(SIZE-1)-1:0];
wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[i][ADDRW-1:0];
wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[i][ADDRW-1:0];
always @(posedge clk) begin
if (reset) begin
@ -49,19 +52,19 @@ module VX_ibuffer #(
q_data_out[i] <= q_data_in;
end else begin
entries[i][wr_ptr_a] <= q_data_in;
wr_ptr_r[i] <= wr_ptr_r[i] + 1;
wr_ptr_r[i] <= wr_ptr_r[i] + ADDRW'(1);
end
if (!reading) begin
size_r[i] <= size_r[i] + 1;
size_r[i] <= size_r[i] + SIZEW'(1);
end
end
if (reading) begin
if (size_r[i] != 1) begin
q_data_out[i] <= q_data_prev[i];
rd_ptr_r[i] <= rd_ptr_r[i] + 1;
rd_ptr_r[i] <= rd_ptr_r[i] + ADDRW'(1);
end
if (!writing) begin
size_r[i] <= size_r[i] - 1;
size_r[i] <= size_r[i] - SIZEW'(1);
end
end
end
@ -75,8 +78,8 @@ module VX_ibuffer #(
///////////////////////////////////////////////////////////////////////////
reg [`NUM_WARPS-1:0] valid_table, valid_table_n;
reg [`NUM_WARPS-1:0] ready_table, ready_table_n;
reg [`LOG2UP(`NUM_WARPS+1)-1:0] active_warps;
reg [`NUM_WARPS-1:0] schedule_table, schedule_table_n;
reg [NWARPSW-1:0] num_warps;
reg [`NW_BITS-1:0] deq_wid, deq_wid_n;
reg deq_valid, deq_valid_n;
reg [DATAW-1:0] deq_instr, deq_instr_n;
@ -92,18 +95,19 @@ module VX_ibuffer #(
end
always @(*) begin
deq_wid_n = 0;
deq_valid_n = 0;
ready_table_n = ready_table;
deq_wid_n = 0;
deq_valid_n = 0;
deq_instr_n = 'x;
schedule_table_n = schedule_table;
if (deq_fire) begin
ready_table_n[ibuf_deq_if.wid] = (q_size[ibuf_deq_if.wid] != 1);
schedule_table_n[ibuf_deq_if.wid] = (q_size[ibuf_deq_if.wid] != 1);
end
for (integer i = 0; i < `NUM_WARPS; i++) begin
if (ready_table_n[i]) begin
if (schedule_table_n[i]) begin
deq_wid_n = `NW_BITS'(i);
deq_valid_n = 1;
deq_instr_n = (deq_fire && (ibuf_deq_if.wid == `NW_BITS'(i))) ? q_data_prev[i] : q_data_out[i];
ready_table_n[i] = 0;
schedule_table_n[i] = 0;
break;
end
end
@ -114,15 +118,15 @@ module VX_ibuffer #(
always @(posedge clk) begin
if (reset) begin
valid_table <= 0;
ready_table <= 0;
deq_valid <= 0;
active_warps <= 0;
valid_table <= 0;
schedule_table <= 0;
deq_valid <= 0;
num_warps <= 0;
end else begin
valid_table <= valid_table_n;
ready_table <= (| ready_table_n) ? ready_table_n : valid_table_n;
valid_table <= valid_table_n;
schedule_table <= (| schedule_table_n) ? schedule_table_n : valid_table_n;
if (enq_fire && (0 == active_warps)) begin
if (enq_fire && (0 == num_warps)) begin
deq_valid <= 1;
deq_wid <= ibuf_enq_if.wid;
deq_instr <= q_data_in;
@ -133,19 +137,21 @@ module VX_ibuffer #(
end
if (warp_added && !warp_removed) begin
active_warps <= active_warps + 1;
num_warps <= num_warps + NWARPSW'(1);
end else if (warp_removed && !warp_added) begin
active_warps <= active_warps - 1;
num_warps <= num_warps - NWARPSW'(1);
end
begin
integer k = 0;
`ifdef VERILATOR
begin // verify 'num_warps'
integer nw = 0;
for (integer i = 0; i < `NUM_WARPS; i++) begin
k += 32'(q_size[i] != 0);
nw += 32'(q_size[i] != 0);
end
assert(k == 32'(active_warps));
assert(~deq_fire || active_warps != 0);
assert(nw == 32'(num_warps));
assert(~deq_fire || num_warps != 0);
end
`endif
end
end

View file

@ -32,7 +32,7 @@ module VX_ipdom_stack #(
stack_2[wr_ptr] <= q2;
is_part[wr_ptr] <= 0;
rd_ptr <= wr_ptr;
wr_ptr <= wr_ptr + 1;
wr_ptr <= wr_ptr + DEPTH'(1);
end else if (pop) begin
wr_ptr <= wr_ptr - DEPTH'(is_part[rd_ptr]);
rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]);

View file

@ -78,7 +78,7 @@ module VX_lsu_unit #(
.clk (clk),
.reset (reset),
.stall (stall_in),
.flush (0),
.flush (1'b0),
.in ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.thread_mask, lsu_req_if.curr_PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}),
.out ({valid_in, req_wid, req_thread_mask, req_curr_PC, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data})
);
@ -91,21 +91,21 @@ module VX_lsu_unit #(
wire [1:0] rsp_sext;
reg [`NUM_THREADS-1:0][31:0] rsp_data;
reg [`NUM_THREADS-1:0] mem_rsp_mask[`LSUQ_SIZE-1:0];
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] mem_rsp_mask;
wire [`DCORE_TAG_ID_BITS-1:0] req_tag, rsp_tag;
wire lsuq_full;
wire lsuq_push = (| dcache_req_if.valid) && dcache_req_if.ready
&& (0 == req_rw); // only loads
&& (0 == req_rw); // loads only
wire lsuq_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
assign rsp_tag = dcache_rsp_if.tag[0][`DCORE_TAG_ID_BITS-1:0];
wire [`NUM_THREADS-1:0] mem_rsp_mask_upd = mem_rsp_mask[rsp_tag] & ~dcache_rsp_if.valid;
wire [`NUM_THREADS-1:0] mem_rsp_mask_n = mem_rsp_mask[rsp_tag] & ~dcache_rsp_if.valid;
wire lsuq_pop = lsuq_pop_part && (0 == mem_rsp_mask_upd);
wire lsuq_pop = lsuq_pop_part && (0 == mem_rsp_mask_n);
VX_cam_buffer #(
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2),
@ -128,10 +128,11 @@ module VX_lsu_unit #(
mem_rsp_mask[req_tag] <= req_thread_mask;
end
if (lsuq_pop_part) begin
mem_rsp_mask[rsp_tag] <= mem_rsp_mask_upd;
mem_rsp_mask[rsp_tag] <= mem_rsp_mask_n;
end
end
wire stall_out = ~lsu_commit_if.ready && lsu_commit_if.valid;
wire store_stall = valid_in && req_rw && stall_out;
// Core Request
@ -167,7 +168,6 @@ module VX_lsu_unit #(
wire is_store_req = valid_in && ~lsuq_full && req_rw && dcache_req_if.ready;
wire is_load_rsp = (| dcache_rsp_if.valid);
wire stall_out = ~lsu_commit_if.ready && lsu_commit_if.valid;
wire mem_rsp_stall = is_load_rsp && is_store_req; // arbitration prioritizes stores
wire arb_valid = is_store_req || is_load_rsp;

View file

@ -111,18 +111,16 @@ module VX_mul_unit #(
// handle divide by zero
always @(*) begin
if (~stall_div) begin
is_div_qual[i] = is_div;
div_in1_qual = alu_in1[i];
div_in2_qual = alu_in2[i];
if (0 == alu_in2[i]) begin
div_in2_qual = 1;
if (is_div) begin
div_in1_qual = 32'hFFFFFFFF; // quotient = (0xFFFFFFFF / 1)
end else begin
is_div_qual[i] = 1; // remainder = (in1 / 1)
end
end
is_div_qual[i] = is_div;
div_in1_qual = alu_in1[i];
div_in2_qual = alu_in2[i];
if (0 == alu_in2[i]) begin
div_in2_qual = 1;
if (is_div) begin
div_in1_qual = 32'hFFFFFFFF; // quotient = (0xFFFFFFFF / 1)
end else begin
is_div_qual[i] = 1; // remainder = (in1 / 1)
end
end
end
@ -192,7 +190,7 @@ module VX_mul_unit #(
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (0),
.flush (1'b0),
.in ({valid_out, rsp_wid, rsp_thread_mask, rsp_curr_PC, rsp_rd, rsp_wb, result}),
.out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.thread_mask, mul_commit_if.curr_PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
);

View file

@ -20,9 +20,9 @@ module VX_warp_sched #(
wire [31:0] join_pc;
wire [`NUM_THREADS-1:0] join_tm;
reg [`NUM_WARPS-1:0] warp_active; // real active warps (updated when a warp is activated or disabled)
reg [`NUM_WARPS-1:0] warp_stalled; // asserted when a branch/gpgpu instructions are issued
reg [`NUM_WARPS-1:0] warp_ready, warp_ready_n; // enforces round-robin, barrier, and non-speculating branches
reg [`NUM_WARPS-1:0] active_warps; // real active warps (updated when a warp is activated or disabled)
reg [`NUM_WARPS-1:0] stalled_warps; // asserted when a branch/gpgpu instructions are issued
reg [`NUM_WARPS-1:0] schedule_table, schedule_table_n; // enforces round-robin, barrier, and non-speculating branches
// Lock warp until instruction decode to resolve branches
reg [`NUM_WARPS-1:0] fetch_lock;
@ -47,17 +47,17 @@ module VX_warp_sched #(
reg didnt_split;
always @(*) begin
warp_ready_n = warp_ready;
schedule_table_n = schedule_table;
if (warp_ctl_if.valid
&& warp_ctl_if.tmc.valid
&& (0 == warp_ctl_if.tmc.thread_mask)) begin
warp_ready_n[warp_ctl_if.wid] = 0;
schedule_table_n[warp_ctl_if.wid] = 0;
end
if (wstall_if.wstall) begin
warp_ready_n[wstall_if.wid] = 0;
schedule_table_n[wstall_if.wid] = 0;
end
if (scheduled_warp) begin
warp_ready_n[warp_to_schedule] = 0;
schedule_table_n[warp_to_schedule] = 0;
end
end
@ -67,31 +67,31 @@ module VX_warp_sched #(
barrier_stall_mask[i] <= 0;
end
use_wspawn_pc <= 0;
use_wspawn <= 0;
warp_pcs[0] <= `STARTUP_ADDR;
warp_active[0] <= 1; // Activating first warp
warp_ready[0] <= 1; // set first warp as ready
thread_masks[0] <= 1; // Activating first thread in first warp
warp_stalled <= 0;
didnt_split <= 0;
fetch_lock <= 0;
use_wspawn_pc <= 0;
use_wspawn <= 0;
warp_pcs[0] <= `STARTUP_ADDR;
active_warps[0] <= 1; // Activating first warp
schedule_table[0] <= 1; // set first warp as ready
thread_masks[0] <= 1; // Activating first thread in first warp
stalled_warps <= 0;
didnt_split <= 0;
fetch_lock <= 0;
for (integer i = 1; i < `NUM_WARPS; i++) begin
warp_pcs[i] <= 0;
warp_active[i] <= 0;
warp_ready[i] <= 0;
thread_masks[i] <= 0;
warp_pcs[i] <= 0;
active_warps[i] <= 0;
schedule_table[i] <= 0;
thread_masks[i] <= 0;
end
end else begin
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
warp_active <= warp_ctl_if.wspawn.wmask;
active_warps <= warp_ctl_if.wspawn.wmask;
use_wspawn <= warp_ctl_if.wspawn.wmask & (~`NUM_WARPS'(1));
use_wspawn_pc <= warp_ctl_if.wspawn.pc;
end
if (warp_ctl_if.valid && warp_ctl_if.barrier.valid) begin
warp_stalled[warp_ctl_if.wid] <= 0;
stalled_warps[warp_ctl_if.wid] <= 0;
if (reached_barrier_limit) begin
barrier_stall_mask[warp_ctl_if.barrier.id] <= 0;
end else begin
@ -99,9 +99,9 @@ module VX_warp_sched #(
end
end else if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.thread_mask;
warp_stalled[warp_ctl_if.wid] <= 0;
stalled_warps[warp_ctl_if.wid] <= 0;
if (0 == warp_ctl_if.tmc.thread_mask) begin
warp_active[warp_ctl_if.wid] <= 0;
active_warps[warp_ctl_if.wid] <= 0;
end
end else if (join_if.is_join && !didnt_split) begin
if (!join_fall) begin
@ -110,7 +110,7 @@ module VX_warp_sched #(
thread_masks[join_if.wid] <= join_tm;
didnt_split <= 0;
end else if (warp_ctl_if.valid && warp_ctl_if.split.valid) begin
warp_stalled[warp_ctl_if.wid] <= 0;
stalled_warps[warp_ctl_if.wid] <= 0;
if (warp_ctl_if.split.diverged) begin
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_mask;
didnt_split <= 0;
@ -126,12 +126,12 @@ module VX_warp_sched #(
// Stalling the scheduling of warps
if (wstall_if.wstall) begin
warp_stalled[wstall_if.wid] <= 1;
stalled_warps[wstall_if.wid] <= 1;
end
// update 'warp_ready' when a warp is scheduled (update round-robin warp schedule)
// update 'schedule_table' when a warp is scheduled (update round-robin warp schedule)
if (scheduled_warp) begin
warp_pcs[warp_to_schedule] <= warp_pc + 4;
warp_pcs[warp_to_schedule] <= warp_pc + 4;
end
// Branch
@ -139,7 +139,7 @@ module VX_warp_sched #(
if (branch_ctl_if.taken) begin
warp_pcs[branch_ctl_if.wid] <= branch_ctl_if.dest;
end
warp_stalled[branch_ctl_if.wid] <= 0;
stalled_warps[branch_ctl_if.wid] <= 0;
end
// Lock warp until instruction decode to resolve branches
@ -150,8 +150,8 @@ module VX_warp_sched #(
fetch_lock[ifetch_rsp_if.wid] <= 0;
end
// reset 'warp_ready' when it goes to zero (reset round-robin warp schedule)
warp_ready <= (| warp_ready_n) ? warp_ready_n : (warp_active & ~total_warp_stalled);
// reset 'schedule_table' when it goes to zero
schedule_table <= (| schedule_table_n) ? schedule_table_n : (active_warps & ~total_warp_stalled);
end
end
@ -167,7 +167,7 @@ module VX_warp_sched #(
.count (active_barrier_count)
);
wire reached_barrier_limit = (active_barrier_count[`NW_BITS-1:0] == warp_ctl_if.barrier.size_m1);
assign reached_barrier_limit = (active_barrier_count[`NW_BITS-1:0] == warp_ctl_if.barrier.size_m1);
reg [`NUM_WARPS-1:0] total_barrier_stall;
always @(*) begin
@ -214,9 +214,9 @@ module VX_warp_sched #(
wire schedule;
assign total_warp_stalled = warp_stalled | total_barrier_stall | fetch_lock;
assign total_warp_stalled = stalled_warps | total_barrier_stall | fetch_lock;
wire [`NUM_WARPS-1:0] use_ready = warp_ready & ~total_warp_stalled;
wire [`NUM_WARPS-1:0] use_ready = schedule_table & ~total_warp_stalled;
VX_fixed_arbiter #(
.N(`NUM_WARPS)
@ -251,11 +251,11 @@ module VX_warp_sched #(
.clk (clk),
.reset (reset),
.stall (stall_out),
.flush (0),
.flush (1'b0),
.in ({scheduled_warp, thread_mask, warp_pc, warp_to_schedule}),
.out ({ifetch_req_if.valid, ifetch_req_if.thread_mask, ifetch_req_if.curr_PC, ifetch_req_if.wid})
);
assign busy = (warp_active != 0);
assign busy = (active_warps != 0);
endmodule

View file

@ -68,7 +68,7 @@ module VX_writeback #(
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.flush (1'b0),
.in ({writeback_tmp_if.valid, writeback_tmp_if.wid, writeback_tmp_if.thread_mask, writeback_tmp_if.rd, writeback_tmp_if.data}),
.out ({writeback_if.valid, writeback_if.wid, writeback_if.thread_mask, writeback_if.rd, writeback_if.data})
);

View file

@ -14,29 +14,33 @@ module VX_cache_core_req_bank_sel #(
`IGNORE_WARNINGS_BEGIN
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
`IGNORE_WARNINGS_END
input wire [NUM_BANKS-1:0] per_bank_ready,
output reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid,
input wire [NUM_BANKS-1:0] per_bank_ready,
output wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid,
output wire core_req_ready
);
reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid_r;
if (NUM_BANKS == 1) begin
always @(*) begin
per_bank_valid = 0;
per_bank_valid_r = 0;
for (integer i = 0; i < NUM_REQUESTS; i++) begin
per_bank_valid[0][i] = core_req_valid[i];
per_bank_valid_r[0][i] = core_req_valid[i];
end
end
assign core_req_ready = per_bank_ready;
end else begin
reg [NUM_BANKS-1:0] per_bank_ready_sel;
always @(*) begin
per_bank_valid = 0;
per_bank_valid_r = 0;
per_bank_ready_sel = {NUM_BANKS{1'b1}};
for (integer i = 0; i < NUM_REQUESTS; i++) begin
per_bank_valid[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
per_bank_ready_sel[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 0;
end
end
assign core_req_ready = & (per_bank_ready | per_bank_ready_sel);
end
assign per_bank_valid = per_bank_valid_r;
endmodule

View file

@ -38,7 +38,7 @@ module VX_fp_fpga #(
wire [NUM_FPC-1:0] per_core_valid_out;
wire fpnew_has_fflags;
fflags_t fpnew_fflags;
fflags_t [`NUM_THREADS-1:0] fpnew_fflags;
reg [FPC_BITS-1:0] core_select;
reg fmadd_negate;

View file

@ -71,15 +71,15 @@ module VX_fp_noncomp #(
fp_type_t tmp_a_type, tmp_b_type;
VX_fp_type fp_type_a (
.exponent(tmp_a_exponent[i]),
.mantissa(tmp_a_mantissa[i]),
.o_type(tmp_a_type[i])
.exponent(tmp_a_exponent),
.mantissa(tmp_a_mantissa),
.o_type(tmp_a_type)
);
VX_fp_type fp_type_b (
.exponent(tmp_b_exponent[i]),
.mantissa(tmp_b_mantissa[i]),
.o_type(tmp_b_type[i])
.exponent(tmp_b_exponent),
.mantissa(tmp_b_mantissa),
.o_type(tmp_b_type)
);
wire tmp_a_smaller = (dataa[i] < datab[i]) ^ (tmp_a_sign || tmp_b_sign);

View file

@ -131,7 +131,7 @@ module VX_fp_madd #(
.clk(clk),
.reset(reset),
.enable(enable1),
.in({in_tag_st0, in_valid_st0}),
.in({out_tag_st0, in_valid_st0}),
.out({out_tag_st1, out_valid_st1})
);

View file

@ -131,7 +131,7 @@ module VX_fp_msub #(
.clk(clk),
.reset(reset),
.enable(enable1),
.in({in_tag_st0, in_valid_st0}),
.in({out_tag_st0, in_valid_st0}),
.out({out_tag_st1, out_valid_st1})
);

View file

@ -9,7 +9,7 @@ interface VX_cmt_to_csr_if ();
wire [`NW_BITS-1:0] wid;
wire [`NE_BITS:0] num_commits;
wire [$clog2(`NUM_EXS+1)-1:0] num_commits;
wire has_fflags;
fflags_t fflags;

View file

@ -13,7 +13,7 @@ module VX_cam_buffer #(
input wire [DATAW-1:0] write_data,
input wire acquire_slot,
input wire [RPORTS-1:0][ADDRW-1:0] read_addr,
output reg [RPORTS-1:0][DATAW-1:0] read_data,
output wire [RPORTS-1:0][DATAW-1:0] read_data,
input wire [CPORTS-1:0][ADDRW-1:0] release_addr,
input wire [CPORTS-1:0] release_slot,
output wire full

View file

@ -2,18 +2,23 @@
`include "VX_platform.vh"
module VX_countones #(
parameter N = 10
parameter N = 10,
parameter N_BITS = $clog2(N+1)
) (
input wire [N-1:0] valids,
output reg [$clog2(N):0] count
input wire [N-1:0] valids,
output wire [N_BITS-1:0] count
);
reg [N_BITS-1:0] count_r;
always @(*) begin
count = 0;
count_r = 0;
for (integer i = N-1; i >= 0; i = i - 1) begin
if (valids[i]) begin
count = count + 1;
count_r = count_r + N_BITS'(1);
end
end
end
assign count = count_r;
endmodule

View file

@ -3,7 +3,9 @@
module VX_generic_queue #(
parameter DATAW = 1,
parameter SIZE = 2,
parameter BUFFERED = 1
parameter BUFFERED = 1,
parameter ADDRW = $clog2(SIZE),
parameter SIZEW = $clog2(SIZE+1)
) (
input wire clk,
input wire reset,
@ -13,13 +15,13 @@ module VX_generic_queue #(
output wire [DATAW-1:0] data_out,
output wire empty,
output wire full,
output wire [`LOG2UP(SIZE+1)-1:0] size
output wire [SIZEW-1:0] size
);
`STATIC_ASSERT(`ISPOW2(SIZE), "must be 0 or power of 2!")
reg [`LOG2UP(SIZE+1)-1:0] size_r;
wire reading;
wire writing;
reg [SIZEW-1:0] size_r;
wire reading;
wire writing;
assign reading = pop && !empty;
assign writing = push && !full;
@ -55,11 +57,11 @@ module VX_generic_queue #(
if (0 == BUFFERED) begin
reg [`LOG2UP(SIZE):0] rd_ptr_r;
reg [`LOG2UP(SIZE):0] wr_ptr_r;
reg [ADDRW:0] rd_ptr_r;
reg [ADDRW:0] wr_ptr_r;
wire [`LOG2UP(SIZE)-1:0] rd_ptr_a = rd_ptr_r[`LOG2UP(SIZE)-1:0];
wire [`LOG2UP(SIZE)-1:0] wr_ptr_a = wr_ptr_r[`LOG2UP(SIZE)-1:0];
wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[ADDRW-1:0];
wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[ADDRW-1:0];
always @(posedge clk) begin
if (reset) begin
@ -86,19 +88,19 @@ module VX_generic_queue #(
assign data_out = data[rd_ptr_a];
assign empty = (wr_ptr_r == rd_ptr_r);
assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[`LOG2UP(SIZE)] != rd_ptr_r[`LOG2UP(SIZE)]);
assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]);
assign size = size_r;
end else begin
reg [DATAW-1:0] head_r;
reg [DATAW-1:0] curr_r;
reg [`LOG2UP(SIZE)-1:0] wr_ptr_r;
reg [`LOG2UP(SIZE)-1:0] rd_ptr_r;
reg [`LOG2UP(SIZE)-1:0] rd_ptr_next_r;
reg empty_r;
reg full_r;
reg bypass_r;
reg [DATAW-1:0] head_r;
reg [DATAW-1:0] curr_r;
reg [ADDRW-1:0] wr_ptr_r;
reg [ADDRW-1:0] rd_ptr_r;
reg [ADDRW-1:0] rd_ptr_next_r;
reg empty_r;
reg full_r;
reg bypass_r;
always @(posedge clk) begin
if (reset) begin

View file

@ -4,19 +4,25 @@ module VX_onehot_encoder #(
parameter N = 6
) (
input wire [N-1:0] onehot,
output reg [`LOG2UP(N)-1:0] binary,
output reg valid
output wire [`LOG2UP(N)-1:0] binary,
output wire valid
);
always @(*) begin
valid = 1'b0;
binary = `LOG2UP(N)'(0);
reg [`LOG2UP(N)-1:0] binary_r;
reg valid_r;
always @(*) begin
binary_r = `LOG2UP(N)'(0);
valid_r = 1'b0;
for (integer i = 0; i < N; i++) begin
if (onehot[i]) begin
valid = 1'b1;
binary = `LOG2UP(N)'(i);
if (onehot[i]) begin
binary_r = `LOG2UP(N)'(i);
valid_r = 1'b1;
end
end
end
assign binary = binary_r;
assign valid = valid_r;
endmodule

View file

@ -3,19 +3,26 @@
module VX_priority_encoder #(
parameter N = 1
) (
input wire [N-1:0] data_in,
output reg [`LOG2UP(N)-1:0] data_out,
output reg valid_out
input wire [N-1:0] data_in,
output wire [`LOG2UP(N)-1:0] data_out,
output wire valid_out
);
reg [`LOG2UP(N)-1:0] data_out_r;
reg valid_out_r;
always @(*) begin
data_out = 0;
valid_out = 0;
for (integer i = N-1; i >= 0; i = i - 1) begin
data_out_r = 0;
valid_out_r = 0;
for (integer i = 0; i < N; i++) begin
if (data_in[i]) begin
data_out = `LOG2UP(N)'(i);
valid_out = 1;
data_out_r = `LOG2UP(N)'(i);
valid_out_r = 1;
break;
end
end
end
assign data_out = data_out_r;
assign valid_out = valid_out_r;
endmodule

View file

@ -14,7 +14,7 @@ module VX_scope #(
input wire changed,
input wire [DATAW-1:0] data_in,
input wire [BUSW-1:0] bus_in,
output reg [BUSW-1:0] bus_out,
output wire [BUSW-1:0] bus_out,
input wire bus_write,
input wire bus_read
);
@ -39,6 +39,7 @@ module VX_scope #(
reg [DELTAW-1:0] delta_store [SIZE-1:0];
reg [UPDW-1:0] prev_trigger_id;
reg [DELTAW-1:0] delta;
reg [BUSW-1:0] bus_out_r;
reg [`CLOG2(SIZE)-1:0] raddr, waddr, waddr_end;
@ -168,14 +169,16 @@ module VX_scope #(
always @(*) begin
case (out_cmd)
GET_VALID : bus_out = BUSW'(data_valid);
GET_WIDTH : bus_out = BUSW'(DATAW);
GET_COUNT : bus_out = BUSW'(waddr) + BUSW'(1);
GET_DATA : bus_out = read_delta ? BUSW'(delta_store[raddr]) : BUSW'(data_store[raddr] >> read_offset);
default : bus_out = 0;
GET_VALID : bus_out_r = BUSW'(data_valid);
GET_WIDTH : bus_out_r = BUSW'(DATAW);
GET_COUNT : bus_out_r = BUSW'(waddr) + BUSW'(1);
GET_DATA : bus_out_r = read_delta ? BUSW'(delta_store[raddr]) : BUSW'(data_store[raddr] >> read_offset);
default : bus_out_r = 0;
endcase
end
assign bus_out = bus_out_r;
`ifdef DBG_PRINT_SCOPE
always @(posedge clk) begin
if (bus_read) begin

View file

@ -6,21 +6,21 @@ module VX_skid_buffer #(
input wire clk,
input wire reset,
input wire valid_in,
output reg ready_in,
output wire ready_in,
input wire [DATAW-1:0] data_in,
output reg [DATAW-1:0] data_out,
output wire [DATAW-1:0] data_out,
input wire ready_out,
output reg valid_out
output wire valid_out
);
reg [DATAW-1:0] data_out_r;
reg [DATAW-1:0] buffer;
reg valid_out_r;
reg use_buffer;
always @(posedge clk) begin
if (reset) begin
use_buffer <= 0;
valid_out <= 0;
data_out <= 0;
buffer <= 0;
use_buffer <= 0;
valid_out_r <= 0;
end else begin
if (valid_in && ready_in && valid_out && !ready_out) begin
assert(!use_buffer);
@ -33,12 +33,14 @@ module VX_skid_buffer #(
buffer <= data_in;
end
if (!valid_out || ready_out) begin
valid_out <= valid_in || use_buffer;
data_out <= use_buffer ? buffer : data_in;
valid_out_r <= valid_in || use_buffer;
data_out_r <= use_buffer ? buffer : data_in;
end
end
end
assign ready_in = !use_buffer;
assign ready_in = !use_buffer;
assign valid_out = valid_out_r;
assign data_out = data_out_r;
endmodule

View file

@ -37,6 +37,7 @@ set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS OFF
set_global_assignment -name VERILOG_MACRO QUARTUS
set_global_assignment -name VERILOG_MACRO SYNTHESIS
set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name MESSAGE_DISABLE 16818
set idx 0
foreach arg $q_args_orig {