This commit is contained in:
Blaise Tine 2020-08-26 08:05:34 -04:00
commit c7cdb09822
9 changed files with 165 additions and 125 deletions

View file

@ -55,8 +55,6 @@
`define EXT_F_ENABLE
`define IBUF_ENABLE
// Device identification
`define VENDOR_ID 0
`define ARCHITECTURE_ID 0

View file

@ -2,52 +2,69 @@
module VX_gpr_bypass #(
parameter DATAW = 1,
parameter BUFFERED = 1
parameter PASSTHRU = 0
) (
input wire clk,
input wire reset,
input wire push,
input reg pop,
input wire pop,
input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out
);
reg [DATAW-1:0] buffer, buffer2;
reg use_buffer, use_buffer2;
reg delayed_push;
if (PASSTHRU) begin
reg delayed_push;
always @(posedge clk) begin
if (reset) begin
delayed_push <= 0;
end else begin
delayed_push <= push;
assert(!delayed_push || pop);
end
end
always @(posedge clk) begin
if (reset) begin
delayed_push <= 0;
use_buffer <= 0;
use_buffer2 <= 0;
end else begin
delayed_push <= push;
assert(!use_buffer2 || use_buffer);
if (pop) begin
if (use_buffer) begin
buffer <= buffer2;
use_buffer <= use_buffer2;
use_buffer2 <= 0;
end
end
if (delayed_push) begin
if (use_buffer) begin
assert(!use_buffer2); // queue full!
if (pop) begin
assign data_out = data_in;
end else begin
reg [DATAW-1:0] buffer, buffer2;
reg use_buffer, use_buffer2;
reg delayed_push;
always @(posedge clk) begin
if (reset) begin
delayed_push <= 0;
use_buffer <= 0;
use_buffer2 <= 0;
end else begin
delayed_push <= push;
assert(!use_buffer2 || use_buffer);
if (pop) begin
if (use_buffer) begin
buffer <= buffer2;
use_buffer <= use_buffer2;
use_buffer2 <= 0;
end
end
if (delayed_push) begin
if (use_buffer) begin
assert(!use_buffer2); // queue full!
if (pop) begin
buffer <= data_in;
end else begin
buffer2 <= data_in;
use_buffer2 <= 1;
end
use_buffer <= 1;
end else if (!pop) begin
buffer <= data_in;
end else begin
buffer2 <= data_in;
use_buffer2 <= 1;
end
use_buffer <= 1;
end else if (!pop) begin
buffer <= data_in;
use_buffer <= 1;
use_buffer <= 1;
end
end
end
end
assign data_out = use_buffer ? buffer : data_in;
end
assign data_out = use_buffer ? buffer : data_in;
endmodule

View file

@ -83,12 +83,11 @@ module VX_ibuffer #(
reg [`NW_BITS-1:0] deq_wid, deq_wid_n;
reg deq_valid, deq_valid_n;
reg [DATAW-1:0] deq_instr, deq_instr_n;
reg deq_is_size1, deq_is_size1_n;
always @(*) begin
valid_table_n = valid_table;
if (deq_fire && deq_is_size1) begin
valid_table_n[ibuf_deq_if.wid] = 0;
if (deq_fire) begin
valid_table_n[ibuf_deq_if.wid] = (q_size[deq_wid] != SIZEW'(1));
end
if (enq_fire) begin
valid_table_n[ibuf_enq_if.wid] = 1;
@ -96,32 +95,35 @@ module VX_ibuffer #(
end
always @(*) begin
deq_valid_n = 0;
deq_wid_n = 'x;
deq_instr_n = 'x;
deq_is_size1_n = 'x;
deq_valid_n = 0;
deq_wid_n = 'x;
deq_instr_n = 'x;
schedule_table_n = schedule_table;
if (deq_fire && deq_is_size1) begin
schedule_table_n[ibuf_deq_if.wid] = 0;
end
for (integer i = 0; i < `NUM_WARPS; i++) begin
if (schedule_table_n[i]) begin
deq_valid_n = 1;
deq_wid_n = `NW_BITS'(i);
deq_instr_n = (deq_fire && (ibuf_deq_if.wid == `NW_BITS'(i))) ? q_data_prev[i] : q_data_out[i];
deq_is_size1_n = (~(enq_fire && ibuf_enq_if.wid == `NW_BITS'(i))
&& (((deq_fire && ibuf_deq_if.wid == `NW_BITS'(i)) && (SIZEW'(2) == q_size[i]))
|| (SIZEW'(1) == q_size[i])));
schedule_table_n[i] = 0;
break;
schedule_table_n = schedule_table;
if (0 == num_warps) begin
deq_valid_n = enq_fire;
deq_wid_n = ibuf_enq_if.wid;
deq_instr_n = q_data_in;
end else if ((1 == num_warps) || freeze) begin
deq_valid_n = (!deq_fire || (q_size[deq_wid] != SIZEW'(1))) || enq_fire;
deq_wid_n = (!deq_fire || (q_size[deq_wid] != SIZEW'(1))) ? deq_wid : ibuf_enq_if.wid;
deq_instr_n = deq_fire ? ((q_size[deq_wid] != SIZEW'(1)) ? q_data_prev[deq_wid] : q_data_in) : q_data_out[deq_wid];
end else begin
for (integer i = 0; i < `NUM_WARPS; i++) begin
if (schedule_table_n[i]) begin
deq_valid_n = 1;
deq_wid_n = `NW_BITS'(i);
deq_instr_n = q_data_out[i];
schedule_table_n[i] = 0;
break;
end
end
end
end
end
wire warp_added = enq_fire && (0 == q_size[ibuf_enq_if.wid]) && (!deq_fire || ibuf_enq_if.wid != ibuf_deq_if.wid);
wire warp_removed = deq_fire && (1 == q_size[ibuf_deq_if.wid]) && (!enq_fire || ibuf_enq_if.wid != ibuf_deq_if.wid);
wire warp_added = enq_fire && (0 == q_size[ibuf_enq_if.wid]);
wire warp_removed = deq_fire && ~(enq_fire && ibuf_enq_if.wid == ibuf_deq_if.wid) && (1 == q_size[ibuf_deq_if.wid]);
always @(posedge clk) begin
if (reset) begin
@ -130,20 +132,18 @@ module VX_ibuffer #(
deq_valid <= 0;
num_warps <= 0;
end else begin
valid_table <= valid_table_n;
schedule_table <= (| schedule_table_n) ? schedule_table_n : valid_table_n;
valid_table <= valid_table_n;
if (enq_fire && (0 == num_warps)) begin
deq_valid <= 1;
deq_wid <= ibuf_enq_if.wid;
deq_instr <= q_data_in;
deq_is_size1 <= 1;
end else if (!freeze) begin
deq_valid <= deq_valid_n;
deq_wid <= deq_wid_n;
deq_instr <= deq_instr_n;
deq_is_size1 <= deq_is_size1_n;
end
if ((| schedule_table_n)) begin
schedule_table <= schedule_table_n;
end else begin
schedule_table <= valid_table_n;
schedule_table[deq_wid_n] <= 0;
end
deq_valid <= deq_valid_n;
deq_wid <= deq_wid_n;
deq_instr <= deq_instr_n;
if (warp_added && !warp_removed) begin
num_warps <= num_warps + NWARPSW'(1);
@ -151,14 +151,19 @@ module VX_ibuffer #(
num_warps <= num_warps - NWARPSW'(1);
end
`ifdef VERILATOR
`ifdef VERILATOR
/*if (enq_fire || deq_fire || deq_valid) begin
$display("*** %t: cur=%b(%0d), nxt=%b(%0d), enq=%b(%0d), deq=%b(%0d), nw=%0d(%0d,%0d,%0d,%0d), sched=%b, sched_n=%b",
$time, deq_valid, deq_wid, deq_valid_n, deq_wid_n, enq_fire, ibuf_enq_if.wid, deq_fire, ibuf_deq_if.wid, num_warps, size_r[0], size_r[1], size_r[2], size_r[3], schedule_table, schedule_table_n);
end*/
begin // verify 'num_warps'
integer nw = 0;
for (integer i = 0; i < `NUM_WARPS; i++) begin
nw += 32'(q_size[i] != 0);
end
assert(nw == 32'(num_warps));
assert(~deq_fire || num_warps != 0);
assert(nw == 32'(num_warps)) else $display("%t: error: invalid num_warps: nw=%0d, ref=%0d", $time, num_warps, nw);
assert(~deq_valid || (q_size[deq_wid] != 0)) else $display("%t: error: invalid schedule: wid=%0d", $time, deq_wid);
assert(~deq_fire || (q_size[deq_wid] != 0)) else $display("%t: error: invalid dequeu: wid=%0d", $time, deq_wid);
end
`endif
end

View file

@ -48,7 +48,8 @@ module VX_instr_demux (
);
VX_gpr_bypass #(
.DATAW ((2 * `NUM_THREADS * 32))
.DATAW (2 * `NUM_THREADS * 32),
.PASSTHRU (1) // ALU has no back-pressure, bypass not needed
) alu_bypass (
.clk (clk),
.reset (reset),
@ -231,6 +232,6 @@ module VX_instr_demux (
`ifdef EXT_F_ENABLE
|| (fpu_req_ready && (execute_if.ex_type == `EX_FPU))
`endif
|| (gpu_req_ready && (execute_if.ex_type == `EX_GPU));
|| (gpu_req_ready && (execute_if.ex_type == `EX_GPU));
endmodule

View file

@ -5,15 +5,15 @@ module VX_ipdom_stack #(
parameter WIDTH = 1,
parameter DEPTH = 1
) (
input wire clk,
input wire reset,
input reg [WIDTH - 1:0] q1,
input reg [WIDTH - 1:0] q2,
output wire[WIDTH - 1:0] d,
input wire push,
input wire pop,
output wire empty,
output wire full
input wire clk,
input wire reset,
input wire [WIDTH - 1:0] q1,
input wire [WIDTH - 1:0] q2,
output wire [WIDTH - 1:0] d,
input wire push,
input wire pop,
output wire empty,
output wire full
);
localparam STACK_SIZE = 2 ** DEPTH;

View file

@ -55,7 +55,7 @@ module VX_scoreboard #(
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.curr_PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_reg_mask[ibuf_deq_if.rd], inuse_reg_mask[ibuf_deq_if.rs1], inuse_reg_mask[ibuf_deq_if.rs2], inuse_reg_mask[ibuf_deq_if.rs3], exe_delay, gpr_delay);
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
end
end
`endif

View file

@ -23,42 +23,46 @@ module VX_writeback #(
wire mul_valid = mul_commit_if.valid && mul_commit_if.wb;
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
VX_writeback_if writeback_tmp_if();
assign writeback_tmp_if.valid = alu_valid ? alu_commit_if.valid :
lsu_valid ? lsu_commit_if.valid :
csr_valid ? csr_commit_if.valid :
mul_valid ? mul_commit_if.valid :
fpu_valid ? fpu_commit_if.valid :
0;
assign writeback_tmp_if.wid = alu_valid ? alu_commit_if.wid :
lsu_valid ? lsu_commit_if.wid :
csr_valid ? csr_commit_if.wid :
mul_valid ? mul_commit_if.wid :
fpu_valid ? fpu_commit_if.wid :
0;
wire wb_valid;
wire [`NW_BITS-1:0] wb_wid;
wire [`NUM_THREADS-1:0] wb_thread_mask;
wire [`NR_BITS-1:0] wb_rd;
wire [`NUM_THREADS-1:0][31:0] wb_data;
assign writeback_tmp_if.thread_mask = alu_valid ? alu_commit_if.thread_mask :
lsu_valid ? lsu_commit_if.thread_mask :
csr_valid ? csr_commit_if.thread_mask :
mul_valid ? mul_commit_if.thread_mask :
fpu_valid ? fpu_commit_if.thread_mask :
0;
assign wb_valid = alu_valid ? alu_commit_if.valid :
lsu_valid ? lsu_commit_if.valid :
csr_valid ? csr_commit_if.valid :
mul_valid ? mul_commit_if.valid :
fpu_valid ? fpu_commit_if.valid :
0;
assign writeback_tmp_if.rd = alu_valid ? alu_commit_if.rd :
lsu_valid ? lsu_commit_if.rd :
csr_valid ? csr_commit_if.rd :
mul_valid ? mul_commit_if.rd :
fpu_valid ? fpu_commit_if.rd :
0;
assign wb_wid = alu_valid ? alu_commit_if.wid :
lsu_valid ? lsu_commit_if.wid :
csr_valid ? csr_commit_if.wid :
mul_valid ? mul_commit_if.wid :
fpu_valid ? fpu_commit_if.wid :
0;
assign wb_thread_mask = alu_valid ? alu_commit_if.thread_mask :
lsu_valid ? lsu_commit_if.thread_mask :
csr_valid ? csr_commit_if.thread_mask :
mul_valid ? mul_commit_if.thread_mask :
fpu_valid ? fpu_commit_if.thread_mask :
0;
assign writeback_tmp_if.data = alu_valid ? alu_commit_if.data :
lsu_valid ? lsu_commit_if.data :
csr_valid ? csr_commit_if.data :
mul_valid ? mul_commit_if.data :
fpu_valid ? fpu_commit_if.data :
0;
assign wb_rd = alu_valid ? alu_commit_if.rd :
lsu_valid ? lsu_commit_if.rd :
csr_valid ? csr_commit_if.rd :
mul_valid ? mul_commit_if.rd :
fpu_valid ? fpu_commit_if.rd :
0;
assign wb_data = alu_valid ? alu_commit_if.data :
lsu_valid ? lsu_commit_if.data :
csr_valid ? csr_commit_if.data :
mul_valid ? mul_commit_if.data :
fpu_valid ? fpu_commit_if.data :
0;
wire stall = ~writeback_if.ready && writeback_if.valid;
@ -69,8 +73,8 @@ module VX_writeback #(
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({writeback_tmp_if.valid, writeback_tmp_if.wid, writeback_tmp_if.thread_mask, writeback_tmp_if.rd, writeback_tmp_if.data}),
.out ({writeback_if.valid, writeback_if.wid, writeback_if.thread_mask, writeback_if.rd, writeback_if.data})
.in ({wb_valid, wb_wid, wb_thread_mask, wb_rd, wb_data}),
.out ({writeback_if.valid, writeback_if.wid, writeback_if.thread_mask, writeback_if.rd, writeback_if.data})
);
assign alu_commit_if.ready = !stall;

View file

@ -12,6 +12,7 @@ double sc_time_stamp() {
Simulator::Simulator() {
// force random values for unitialized signals
Verilated::randReset(2);
Verilated::randSeed(50);
// Turn off assertion before reset
Verilated::assertOn(false);

View file

@ -38,6 +38,20 @@ set_global_assignment -name VERILOG_MACRO QUARTUS
set_global_assignment -name VERILOG_MACRO SYNTHESIS
set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name MESSAGE_DISABLE 16818
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
set_global_assignment -name POWER_USE_TA_VALUE 65
set_global_assignment -name SEED 1
set idx 0
foreach arg $q_args_orig {