mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
Merge branch 'master' of https://github.gatech.edu/casl/Vortex
This commit is contained in:
commit
c7cdb09822
9 changed files with 165 additions and 125 deletions
|
@ -55,8 +55,6 @@
|
|||
|
||||
`define EXT_F_ENABLE
|
||||
|
||||
`define IBUF_ENABLE
|
||||
|
||||
// Device identification
|
||||
`define VENDOR_ID 0
|
||||
`define ARCHITECTURE_ID 0
|
||||
|
|
|
@ -2,52 +2,69 @@
|
|||
|
||||
module VX_gpr_bypass #(
|
||||
parameter DATAW = 1,
|
||||
parameter BUFFERED = 1
|
||||
parameter PASSTHRU = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire push,
|
||||
input reg pop,
|
||||
input wire pop,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [DATAW-1:0] data_out
|
||||
);
|
||||
reg [DATAW-1:0] buffer, buffer2;
|
||||
reg use_buffer, use_buffer2;
|
||||
reg delayed_push;
|
||||
if (PASSTHRU) begin
|
||||
reg delayed_push;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
delayed_push <= 0;
|
||||
end else begin
|
||||
delayed_push <= push;
|
||||
assert(!delayed_push || pop);
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
delayed_push <= 0;
|
||||
use_buffer <= 0;
|
||||
use_buffer2 <= 0;
|
||||
end else begin
|
||||
delayed_push <= push;
|
||||
assert(!use_buffer2 || use_buffer);
|
||||
if (pop) begin
|
||||
if (use_buffer) begin
|
||||
buffer <= buffer2;
|
||||
use_buffer <= use_buffer2;
|
||||
use_buffer2 <= 0;
|
||||
end
|
||||
end
|
||||
if (delayed_push) begin
|
||||
if (use_buffer) begin
|
||||
assert(!use_buffer2); // queue full!
|
||||
if (pop) begin
|
||||
assign data_out = data_in;
|
||||
|
||||
end else begin
|
||||
|
||||
reg [DATAW-1:0] buffer, buffer2;
|
||||
reg use_buffer, use_buffer2;
|
||||
reg delayed_push;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
delayed_push <= 0;
|
||||
use_buffer <= 0;
|
||||
use_buffer2 <= 0;
|
||||
end else begin
|
||||
delayed_push <= push;
|
||||
assert(!use_buffer2 || use_buffer);
|
||||
if (pop) begin
|
||||
if (use_buffer) begin
|
||||
buffer <= buffer2;
|
||||
use_buffer <= use_buffer2;
|
||||
use_buffer2 <= 0;
|
||||
end
|
||||
end
|
||||
if (delayed_push) begin
|
||||
if (use_buffer) begin
|
||||
assert(!use_buffer2); // queue full!
|
||||
if (pop) begin
|
||||
buffer <= data_in;
|
||||
end else begin
|
||||
buffer2 <= data_in;
|
||||
use_buffer2 <= 1;
|
||||
end
|
||||
use_buffer <= 1;
|
||||
end else if (!pop) begin
|
||||
buffer <= data_in;
|
||||
end else begin
|
||||
buffer2 <= data_in;
|
||||
use_buffer2 <= 1;
|
||||
end
|
||||
use_buffer <= 1;
|
||||
end else if (!pop) begin
|
||||
buffer <= data_in;
|
||||
use_buffer <= 1;
|
||||
use_buffer <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = use_buffer ? buffer : data_in;
|
||||
end
|
||||
|
||||
assign data_out = use_buffer ? buffer : data_in;
|
||||
|
||||
endmodule
|
|
@ -83,12 +83,11 @@ module VX_ibuffer #(
|
|||
reg [`NW_BITS-1:0] deq_wid, deq_wid_n;
|
||||
reg deq_valid, deq_valid_n;
|
||||
reg [DATAW-1:0] deq_instr, deq_instr_n;
|
||||
reg deq_is_size1, deq_is_size1_n;
|
||||
|
||||
|
||||
always @(*) begin
|
||||
valid_table_n = valid_table;
|
||||
if (deq_fire && deq_is_size1) begin
|
||||
valid_table_n[ibuf_deq_if.wid] = 0;
|
||||
if (deq_fire) begin
|
||||
valid_table_n[ibuf_deq_if.wid] = (q_size[deq_wid] != SIZEW'(1));
|
||||
end
|
||||
if (enq_fire) begin
|
||||
valid_table_n[ibuf_enq_if.wid] = 1;
|
||||
|
@ -96,32 +95,35 @@ module VX_ibuffer #(
|
|||
end
|
||||
|
||||
always @(*) begin
|
||||
deq_valid_n = 0;
|
||||
deq_wid_n = 'x;
|
||||
deq_instr_n = 'x;
|
||||
deq_is_size1_n = 'x;
|
||||
deq_valid_n = 0;
|
||||
deq_wid_n = 'x;
|
||||
deq_instr_n = 'x;
|
||||
|
||||
schedule_table_n = schedule_table;
|
||||
if (deq_fire && deq_is_size1) begin
|
||||
schedule_table_n[ibuf_deq_if.wid] = 0;
|
||||
end
|
||||
|
||||
for (integer i = 0; i < `NUM_WARPS; i++) begin
|
||||
if (schedule_table_n[i]) begin
|
||||
deq_valid_n = 1;
|
||||
deq_wid_n = `NW_BITS'(i);
|
||||
deq_instr_n = (deq_fire && (ibuf_deq_if.wid == `NW_BITS'(i))) ? q_data_prev[i] : q_data_out[i];
|
||||
deq_is_size1_n = (~(enq_fire && ibuf_enq_if.wid == `NW_BITS'(i))
|
||||
&& (((deq_fire && ibuf_deq_if.wid == `NW_BITS'(i)) && (SIZEW'(2) == q_size[i]))
|
||||
|| (SIZEW'(1) == q_size[i])));
|
||||
schedule_table_n[i] = 0;
|
||||
break;
|
||||
schedule_table_n = schedule_table;
|
||||
|
||||
if (0 == num_warps) begin
|
||||
deq_valid_n = enq_fire;
|
||||
deq_wid_n = ibuf_enq_if.wid;
|
||||
deq_instr_n = q_data_in;
|
||||
end else if ((1 == num_warps) || freeze) begin
|
||||
deq_valid_n = (!deq_fire || (q_size[deq_wid] != SIZEW'(1))) || enq_fire;
|
||||
deq_wid_n = (!deq_fire || (q_size[deq_wid] != SIZEW'(1))) ? deq_wid : ibuf_enq_if.wid;
|
||||
deq_instr_n = deq_fire ? ((q_size[deq_wid] != SIZEW'(1)) ? q_data_prev[deq_wid] : q_data_in) : q_data_out[deq_wid];
|
||||
end else begin
|
||||
for (integer i = 0; i < `NUM_WARPS; i++) begin
|
||||
if (schedule_table_n[i]) begin
|
||||
deq_valid_n = 1;
|
||||
deq_wid_n = `NW_BITS'(i);
|
||||
deq_instr_n = q_data_out[i];
|
||||
schedule_table_n[i] = 0;
|
||||
break;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire warp_added = enq_fire && (0 == q_size[ibuf_enq_if.wid]) && (!deq_fire || ibuf_enq_if.wid != ibuf_deq_if.wid);
|
||||
wire warp_removed = deq_fire && (1 == q_size[ibuf_deq_if.wid]) && (!enq_fire || ibuf_enq_if.wid != ibuf_deq_if.wid);
|
||||
wire warp_added = enq_fire && (0 == q_size[ibuf_enq_if.wid]);
|
||||
wire warp_removed = deq_fire && ~(enq_fire && ibuf_enq_if.wid == ibuf_deq_if.wid) && (1 == q_size[ibuf_deq_if.wid]);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -130,20 +132,18 @@ module VX_ibuffer #(
|
|||
deq_valid <= 0;
|
||||
num_warps <= 0;
|
||||
end else begin
|
||||
valid_table <= valid_table_n;
|
||||
schedule_table <= (| schedule_table_n) ? schedule_table_n : valid_table_n;
|
||||
valid_table <= valid_table_n;
|
||||
|
||||
if (enq_fire && (0 == num_warps)) begin
|
||||
deq_valid <= 1;
|
||||
deq_wid <= ibuf_enq_if.wid;
|
||||
deq_instr <= q_data_in;
|
||||
deq_is_size1 <= 1;
|
||||
end else if (!freeze) begin
|
||||
deq_valid <= deq_valid_n;
|
||||
deq_wid <= deq_wid_n;
|
||||
deq_instr <= deq_instr_n;
|
||||
deq_is_size1 <= deq_is_size1_n;
|
||||
end
|
||||
if ((| schedule_table_n)) begin
|
||||
schedule_table <= schedule_table_n;
|
||||
end else begin
|
||||
schedule_table <= valid_table_n;
|
||||
schedule_table[deq_wid_n] <= 0;
|
||||
end
|
||||
|
||||
deq_valid <= deq_valid_n;
|
||||
deq_wid <= deq_wid_n;
|
||||
deq_instr <= deq_instr_n;
|
||||
|
||||
if (warp_added && !warp_removed) begin
|
||||
num_warps <= num_warps + NWARPSW'(1);
|
||||
|
@ -151,14 +151,19 @@ module VX_ibuffer #(
|
|||
num_warps <= num_warps - NWARPSW'(1);
|
||||
end
|
||||
|
||||
`ifdef VERILATOR
|
||||
`ifdef VERILATOR
|
||||
/*if (enq_fire || deq_fire || deq_valid) begin
|
||||
$display("*** %t: cur=%b(%0d), nxt=%b(%0d), enq=%b(%0d), deq=%b(%0d), nw=%0d(%0d,%0d,%0d,%0d), sched=%b, sched_n=%b",
|
||||
$time, deq_valid, deq_wid, deq_valid_n, deq_wid_n, enq_fire, ibuf_enq_if.wid, deq_fire, ibuf_deq_if.wid, num_warps, size_r[0], size_r[1], size_r[2], size_r[3], schedule_table, schedule_table_n);
|
||||
end*/
|
||||
begin // verify 'num_warps'
|
||||
integer nw = 0;
|
||||
for (integer i = 0; i < `NUM_WARPS; i++) begin
|
||||
nw += 32'(q_size[i] != 0);
|
||||
end
|
||||
assert(nw == 32'(num_warps));
|
||||
assert(~deq_fire || num_warps != 0);
|
||||
assert(nw == 32'(num_warps)) else $display("%t: error: invalid num_warps: nw=%0d, ref=%0d", $time, num_warps, nw);
|
||||
assert(~deq_valid || (q_size[deq_wid] != 0)) else $display("%t: error: invalid schedule: wid=%0d", $time, deq_wid);
|
||||
assert(~deq_fire || (q_size[deq_wid] != 0)) else $display("%t: error: invalid dequeu: wid=%0d", $time, deq_wid);
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
|
|
@ -48,7 +48,8 @@ module VX_instr_demux (
|
|||
);
|
||||
|
||||
VX_gpr_bypass #(
|
||||
.DATAW ((2 * `NUM_THREADS * 32))
|
||||
.DATAW (2 * `NUM_THREADS * 32),
|
||||
.PASSTHRU (1) // ALU has no back-pressure, bypass not needed
|
||||
) alu_bypass (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -231,6 +232,6 @@ module VX_instr_demux (
|
|||
`ifdef EXT_F_ENABLE
|
||||
|| (fpu_req_ready && (execute_if.ex_type == `EX_FPU))
|
||||
`endif
|
||||
|| (gpu_req_ready && (execute_if.ex_type == `EX_GPU));
|
||||
|| (gpu_req_ready && (execute_if.ex_type == `EX_GPU));
|
||||
|
||||
endmodule
|
|
@ -5,15 +5,15 @@ module VX_ipdom_stack #(
|
|||
parameter WIDTH = 1,
|
||||
parameter DEPTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input reg [WIDTH - 1:0] q1,
|
||||
input reg [WIDTH - 1:0] q2,
|
||||
output wire[WIDTH - 1:0] d,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
output wire empty,
|
||||
output wire full
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [WIDTH - 1:0] q1,
|
||||
input wire [WIDTH - 1:0] q2,
|
||||
output wire [WIDTH - 1:0] d,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
localparam STACK_SIZE = 2 ** DEPTH;
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ module VX_scoreboard #(
|
|||
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
|
||||
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
|
||||
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.curr_PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
|
||||
inuse_reg_mask[ibuf_deq_if.rd], inuse_reg_mask[ibuf_deq_if.rs1], inuse_reg_mask[ibuf_deq_if.rs2], inuse_reg_mask[ibuf_deq_if.rs3], exe_delay, gpr_delay);
|
||||
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -23,42 +23,46 @@ module VX_writeback #(
|
|||
wire mul_valid = mul_commit_if.valid && mul_commit_if.wb;
|
||||
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
|
||||
|
||||
VX_writeback_if writeback_tmp_if();
|
||||
|
||||
assign writeback_tmp_if.valid = alu_valid ? alu_commit_if.valid :
|
||||
lsu_valid ? lsu_commit_if.valid :
|
||||
csr_valid ? csr_commit_if.valid :
|
||||
mul_valid ? mul_commit_if.valid :
|
||||
fpu_valid ? fpu_commit_if.valid :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.wid = alu_valid ? alu_commit_if.wid :
|
||||
lsu_valid ? lsu_commit_if.wid :
|
||||
csr_valid ? csr_commit_if.wid :
|
||||
mul_valid ? mul_commit_if.wid :
|
||||
fpu_valid ? fpu_commit_if.wid :
|
||||
0;
|
||||
wire wb_valid;
|
||||
wire [`NW_BITS-1:0] wb_wid;
|
||||
wire [`NUM_THREADS-1:0] wb_thread_mask;
|
||||
wire [`NR_BITS-1:0] wb_rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] wb_data;
|
||||
|
||||
assign writeback_tmp_if.thread_mask = alu_valid ? alu_commit_if.thread_mask :
|
||||
lsu_valid ? lsu_commit_if.thread_mask :
|
||||
csr_valid ? csr_commit_if.thread_mask :
|
||||
mul_valid ? mul_commit_if.thread_mask :
|
||||
fpu_valid ? fpu_commit_if.thread_mask :
|
||||
0;
|
||||
assign wb_valid = alu_valid ? alu_commit_if.valid :
|
||||
lsu_valid ? lsu_commit_if.valid :
|
||||
csr_valid ? csr_commit_if.valid :
|
||||
mul_valid ? mul_commit_if.valid :
|
||||
fpu_valid ? fpu_commit_if.valid :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.rd = alu_valid ? alu_commit_if.rd :
|
||||
lsu_valid ? lsu_commit_if.rd :
|
||||
csr_valid ? csr_commit_if.rd :
|
||||
mul_valid ? mul_commit_if.rd :
|
||||
fpu_valid ? fpu_commit_if.rd :
|
||||
0;
|
||||
assign wb_wid = alu_valid ? alu_commit_if.wid :
|
||||
lsu_valid ? lsu_commit_if.wid :
|
||||
csr_valid ? csr_commit_if.wid :
|
||||
mul_valid ? mul_commit_if.wid :
|
||||
fpu_valid ? fpu_commit_if.wid :
|
||||
0;
|
||||
|
||||
assign wb_thread_mask = alu_valid ? alu_commit_if.thread_mask :
|
||||
lsu_valid ? lsu_commit_if.thread_mask :
|
||||
csr_valid ? csr_commit_if.thread_mask :
|
||||
mul_valid ? mul_commit_if.thread_mask :
|
||||
fpu_valid ? fpu_commit_if.thread_mask :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.data = alu_valid ? alu_commit_if.data :
|
||||
lsu_valid ? lsu_commit_if.data :
|
||||
csr_valid ? csr_commit_if.data :
|
||||
mul_valid ? mul_commit_if.data :
|
||||
fpu_valid ? fpu_commit_if.data :
|
||||
0;
|
||||
assign wb_rd = alu_valid ? alu_commit_if.rd :
|
||||
lsu_valid ? lsu_commit_if.rd :
|
||||
csr_valid ? csr_commit_if.rd :
|
||||
mul_valid ? mul_commit_if.rd :
|
||||
fpu_valid ? fpu_commit_if.rd :
|
||||
0;
|
||||
|
||||
assign wb_data = alu_valid ? alu_commit_if.data :
|
||||
lsu_valid ? lsu_commit_if.data :
|
||||
csr_valid ? csr_commit_if.data :
|
||||
mul_valid ? mul_commit_if.data :
|
||||
fpu_valid ? fpu_commit_if.data :
|
||||
0;
|
||||
|
||||
wire stall = ~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
|
@ -69,8 +73,8 @@ module VX_writeback #(
|
|||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({writeback_tmp_if.valid, writeback_tmp_if.wid, writeback_tmp_if.thread_mask, writeback_tmp_if.rd, writeback_tmp_if.data}),
|
||||
.out ({writeback_if.valid, writeback_if.wid, writeback_if.thread_mask, writeback_if.rd, writeback_if.data})
|
||||
.in ({wb_valid, wb_wid, wb_thread_mask, wb_rd, wb_data}),
|
||||
.out ({writeback_if.valid, writeback_if.wid, writeback_if.thread_mask, writeback_if.rd, writeback_if.data})
|
||||
);
|
||||
|
||||
assign alu_commit_if.ready = !stall;
|
||||
|
|
|
@ -12,6 +12,7 @@ double sc_time_stamp() {
|
|||
Simulator::Simulator() {
|
||||
// force random values for unitialized signals
|
||||
Verilated::randReset(2);
|
||||
Verilated::randSeed(50);
|
||||
|
||||
// Turn off assertion before reset
|
||||
Verilated::assertOn(false);
|
||||
|
|
|
@ -38,6 +38,20 @@ set_global_assignment -name VERILOG_MACRO QUARTUS
|
|||
set_global_assignment -name VERILOG_MACRO SYNTHESIS
|
||||
set_global_assignment -name VERILOG_MACRO NDEBUG
|
||||
set_global_assignment -name MESSAGE_DISABLE 16818
|
||||
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
|
||||
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
||||
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
set_global_assignment -name POWER_USE_TA_VALUE 65
|
||||
set_global_assignment -name SEED 1
|
||||
|
||||
set idx 0
|
||||
foreach arg $q_args_orig {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue