mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop
This commit is contained in:
commit
d79ff077b7
9 changed files with 116 additions and 146 deletions
|
@ -217,7 +217,7 @@ package VX_gpu_pkg;
|
|||
function logic [ISSUE_WIS_W-1:0] wid_to_wis(
|
||||
input logic [`NW_WIDTH-1:0] wid
|
||||
);
|
||||
wid_to_wis = ISSUE_WIS_W'(wid >> `CLOG2(`ISSUE_WIDTH));
|
||||
wid_to_wis = ISSUE_WIS_W'({1'b0, wid} >> `CLOG2(`ISSUE_WIDTH));
|
||||
endfunction
|
||||
|
||||
function logic [ISSUE_ADDRW-1:0] wis_to_addr(
|
||||
|
|
|
@ -49,12 +49,12 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
cache_perf_t perf_l3cache;
|
||||
mem_perf_t mem_perf;
|
||||
|
||||
assign mem_perf_if.icache = 'x;
|
||||
assign mem_perf_if.dcache = 'x;
|
||||
assign mem_perf_if.smem = 'x;
|
||||
assign mem_perf_if.icache = 'x;
|
||||
assign mem_perf_if.dcache = 'x;
|
||||
assign mem_perf_if.l2cache = 'x;
|
||||
assign mem_perf_if.l3cache = perf_l3cache;
|
||||
assign mem_perf_if.smem = 'x;
|
||||
assign mem_perf_if.mem = mem_perf;
|
||||
assign mem_perf_if.mem = mem_perf;
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if #(
|
||||
|
|
|
@ -130,6 +130,12 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_if();
|
||||
assign mem_perf_if.smem = '0;
|
||||
assign mem_perf_if.icache = '0;
|
||||
assign mem_perf_if.dcache = '0;
|
||||
assign mem_perf_if.l2cache = '0;
|
||||
assign mem_perf_if.l3cache = '0;
|
||||
assign mem_perf_if.mem = '0;
|
||||
`endif
|
||||
|
||||
`ifdef SCOPE
|
||||
|
|
|
@ -70,8 +70,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
batch_idx <= '0;
|
||||
end else if (batch_done) begin
|
||||
batch_idx <= batch_idx + BATCH_COUNT_W'(1);
|
||||
end else begin
|
||||
batch_idx <= batch_idx + BATCH_COUNT_W'(batch_done);
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
|
|
|
@ -46,6 +46,8 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0];
|
||||
reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n;
|
||||
|
||||
reg valid_out_r;
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n;
|
||||
|
@ -57,11 +59,11 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
reg rs3_ready, rs3_ready_n;
|
||||
reg data_ready, data_ready_n;
|
||||
|
||||
wire ready_out = operands_if[i].ready;
|
||||
|
||||
wire is_rs1_zero = (scoreboard_if[i].data.rs1 == 0);
|
||||
wire is_rs2_zero = (scoreboard_if[i].data.rs2 == 0);
|
||||
wire is_rs3_zero = (scoreboard_if[i].data.rs3 == 0);
|
||||
|
||||
VX_operands_if staging_if();
|
||||
wire is_rs3_zero = (scoreboard_if[i].data.rs3 == 0);
|
||||
|
||||
always @(*) begin
|
||||
state_n = state;
|
||||
|
@ -82,7 +84,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (staging_if.valid && staging_if.ready) begin
|
||||
if (valid_out_r && ready_out) begin
|
||||
data_ready_n = 0;
|
||||
end
|
||||
if (scoreboard_if[i].valid && data_ready_n == 0) begin
|
||||
|
@ -170,31 +172,70 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
gpr_rd_rid <= '0;
|
||||
gpr_rd_wis <= '0;
|
||||
cache_eop <= {ISSUE_RATIO{1'b1}};
|
||||
cache_eop <= {ISSUE_RATIO{1'b1}};
|
||||
data_ready <= 0;
|
||||
valid_out_r <= 0;
|
||||
end else begin
|
||||
state <= state_n;
|
||||
rs2 <= rs2_n;
|
||||
rs3 <= rs3_n;
|
||||
rs2_ready <= rs2_ready_n;
|
||||
rs3_ready <= rs3_ready_n;
|
||||
rs1_data <= rs1_data_n;
|
||||
rs2_data <= rs2_data_n;
|
||||
rs3_data <= rs3_data_n;
|
||||
gpr_rd_rid <= gpr_rd_rid_n;
|
||||
gpr_rd_wis <= gpr_rd_wis_n;
|
||||
cache_data <= cache_data_n;
|
||||
cache_reg <= cache_reg_n;
|
||||
cache_tmask <= cache_tmask_n;
|
||||
cache_eop <= cache_eop_n;
|
||||
data_ready <= data_ready_n;
|
||||
data_ready <= data_ready_n;
|
||||
if (~valid_out_r) begin
|
||||
valid_out_r <= scoreboard_if[i].valid && data_ready;
|
||||
end else if (ready_out) begin
|
||||
valid_out_r <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (~valid_out_r) begin
|
||||
data_out_r <= {scoreboard_if[i].data.uuid,
|
||||
scoreboard_if[i].data.wis,
|
||||
scoreboard_if[i].data.tmask,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.wb,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.op_type,
|
||||
scoreboard_if[i].data.op_mod,
|
||||
scoreboard_if[i].data.use_PC,
|
||||
scoreboard_if[i].data.use_imm,
|
||||
scoreboard_if[i].data.imm,
|
||||
scoreboard_if[i].data.rd};
|
||||
end
|
||||
|
||||
gpr_rd_rid <= gpr_rd_rid_n;
|
||||
gpr_rd_wis <= gpr_rd_wis_n;
|
||||
rs2_ready <= rs2_ready_n;
|
||||
rs3_ready <= rs3_ready_n;
|
||||
rs2 <= rs2_n;
|
||||
rs3 <= rs3_n;
|
||||
rs1_data <= rs1_data_n;
|
||||
rs2_data <= rs2_data_n;
|
||||
rs3_data <= rs3_data_n;
|
||||
cache_data <= cache_data_n;
|
||||
cache_reg <= cache_reg_n;
|
||||
cache_tmask <= cache_tmask_n;
|
||||
end
|
||||
|
||||
assign operands_if[i].valid = valid_out_r;
|
||||
assign {operands_if[i].data.uuid,
|
||||
operands_if[i].data.wis,
|
||||
operands_if[i].data.tmask,
|
||||
operands_if[i].data.PC,
|
||||
operands_if[i].data.wb,
|
||||
operands_if[i].data.ex_type,
|
||||
operands_if[i].data.op_type,
|
||||
operands_if[i].data.op_mod,
|
||||
operands_if[i].data.use_PC,
|
||||
operands_if[i].data.use_imm,
|
||||
operands_if[i].data.imm,
|
||||
operands_if[i].data.rd} = data_out_r;
|
||||
assign operands_if[i].data.rs1_data = rs1_data;
|
||||
assign operands_if[i].data.rs2_data = rs2_data;
|
||||
assign operands_if[i].data.rs3_data = rs3_data;
|
||||
|
||||
assign scoreboard_if[i].ready = ~valid_out_r && data_ready;
|
||||
|
||||
// GPR banks
|
||||
|
||||
`ifdef GPR_RESET
|
||||
|
@ -228,74 +269,6 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
.rdata (gpr_rd_data[j])
|
||||
);
|
||||
end
|
||||
|
||||
// staging buffer
|
||||
|
||||
`RESET_RELAY (stg_buf_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) stg_buf (
|
||||
.clk (clk),
|
||||
.reset (stg_buf_reset),
|
||||
.valid_in (scoreboard_if[i].valid),
|
||||
.ready_in (scoreboard_if[i].ready),
|
||||
.data_in ({
|
||||
scoreboard_if[i].data.uuid,
|
||||
scoreboard_if[i].data.wis,
|
||||
scoreboard_if[i].data.tmask,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.wb,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.op_type,
|
||||
scoreboard_if[i].data.op_mod,
|
||||
scoreboard_if[i].data.use_PC,
|
||||
scoreboard_if[i].data.use_imm,
|
||||
scoreboard_if[i].data.imm,
|
||||
scoreboard_if[i].data.rd}),
|
||||
.data_out ({
|
||||
staging_if.data.uuid,
|
||||
staging_if.data.wis,
|
||||
staging_if.data.tmask,
|
||||
staging_if.data.PC,
|
||||
staging_if.data.wb,
|
||||
staging_if.data.ex_type,
|
||||
staging_if.data.op_type,
|
||||
staging_if.data.op_mod,
|
||||
staging_if.data.use_PC,
|
||||
staging_if.data.use_imm,
|
||||
staging_if.data.imm,
|
||||
staging_if.data.rd}),
|
||||
.valid_out (staging_if.valid),
|
||||
.ready_out (staging_if.ready)
|
||||
);
|
||||
|
||||
assign staging_if.data.rs1_data = rs1_data;
|
||||
assign staging_if.data.rs2_data = rs2_data;
|
||||
assign staging_if.data.rs3_data = rs3_data;
|
||||
|
||||
// output buffer
|
||||
|
||||
wire valid_stg, ready_stg;
|
||||
assign valid_stg = staging_if.valid && data_ready;
|
||||
assign staging_if.ready = ready_stg && data_ready;
|
||||
|
||||
`RESET_RELAY (out_buf_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW + (3 * `NUM_THREADS * `XLEN)),
|
||||
.SIZE (2),
|
||||
.OUT_REG (2)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (out_buf_reset),
|
||||
.valid_in (valid_stg),
|
||||
.ready_in (ready_stg),
|
||||
.data_in (staging_if.data),
|
||||
.data_out (operands_if[i].data),
|
||||
.valid_out (operands_if[i].valid),
|
||||
.ready_out (operands_if[i].ready)
|
||||
);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -51,7 +51,6 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
reg [`UP(ISSUE_RATIO)-1:0][`NUM_REGS-1:0] inuse_regs;
|
||||
VX_ibuffer_if staging_if();
|
||||
|
||||
wire writeback_fire = writeback_if[i].valid && writeback_if[i].data.eop;
|
||||
|
||||
|
@ -84,10 +83,17 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg valid_out_r;
|
||||
wire ready_out;
|
||||
|
||||
wire [3:0] ready_masks = ~{inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
|
||||
wire deps_ready = (& ready_masks);
|
||||
|
||||
wire valid_in = ibuffer_if[i].valid && deps_ready;
|
||||
wire ready_in = ~valid_out_r && deps_ready;
|
||||
wire [DATAW-1:0] data_in = ibuffer_if[i].data;
|
||||
|
||||
assign ready_out = scoreboard_if[i].ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_out_r <= 0;
|
||||
|
@ -97,40 +103,25 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] <= 0;
|
||||
end
|
||||
if (~valid_out_r) begin
|
||||
valid_out_r <= ibuffer_if[i].valid && deps_ready;
|
||||
end else if (staging_if.ready) begin
|
||||
if (staging_if.data.wb) begin
|
||||
inuse_regs[staging_if.data.wis][staging_if.data.rd] <= 1;
|
||||
valid_out_r <= valid_in;
|
||||
end else if (ready_out) begin
|
||||
if (scoreboard_if[i].data.wb) begin
|
||||
inuse_regs[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= 1;
|
||||
`ifdef PERF_ENABLE
|
||||
inuse_units[staging_if.data.wis][staging_if.data.rd] <= staging_if.data.ex_type;
|
||||
inuse_units[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= scoreboard_if[i].data.ex_type;
|
||||
`endif
|
||||
end
|
||||
valid_out_r <= 0;
|
||||
end
|
||||
end
|
||||
if (~valid_out_r) begin
|
||||
data_out_r <= ibuffer_if[i].data;
|
||||
data_out_r <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign ibuffer_if[i].ready = ~valid_out_r && deps_ready;
|
||||
assign staging_if.valid = valid_out_r;
|
||||
assign staging_if.data = data_out_r;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (0),
|
||||
.OUT_REG (2)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (staging_if.valid),
|
||||
.ready_in (staging_if.ready),
|
||||
.data_in (staging_if.data),
|
||||
.data_out (scoreboard_if[i].data),
|
||||
.valid_out (scoreboard_if[i].valid),
|
||||
.ready_out (scoreboard_if[i].ready)
|
||||
);
|
||||
assign ibuffer_if[i].ready = ready_in;
|
||||
assign scoreboard_if[i].valid = valid_out_r;
|
||||
assign scoreboard_if[i].data = data_out_r;
|
||||
|
||||
`ifdef SIMULATION
|
||||
reg [31:0] timeout_ctr;
|
||||
|
|
|
@ -355,11 +355,14 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
wire [NUM_LANES-1:0][INT_WIDTH-1:0] tmp_result_s3;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
fflags_t i2f_regular_status_s3 = i2f_round_has_sticky_s3[i] ? 5'h1 : 5'h0;
|
||||
fflags_t f2i_regular_status_s3 = f2i_round_has_sticky_s3[i] ? 5'h1 : 5'h0;
|
||||
fflags_t i2f_regular_status_s3, f2i_regular_status_s3;
|
||||
fflags_t i2f_status_s3, f2i_status_s3;
|
||||
|
||||
fflags_t i2f_status_s3 = i2f_regular_status_s3;
|
||||
fflags_t f2i_status_s3 = f2i_result_is_special_s3[i] ? f2i_special_status_s3[i] : f2i_regular_status_s3;
|
||||
assign i2f_regular_status_s3 = {4'h0, i2f_round_has_sticky_s3[i]};
|
||||
assign f2i_regular_status_s3 = {4'h0, f2i_round_has_sticky_s3[i]};
|
||||
|
||||
assign i2f_status_s3 = i2f_regular_status_s3;
|
||||
assign f2i_status_s3 = f2i_result_is_special_s3[i] ? f2i_special_status_s3[i] : f2i_regular_status_s3;
|
||||
|
||||
wire [INT_WIDTH-1:0] i2f_result_s3 = fmt_result_s3[i];
|
||||
wire [INT_WIDTH-1:0] f2i_result_s3 = f2i_result_is_special_s3[i] ? f2i_special_result_s3[i] : rounded_int_res_s3[i];
|
||||
|
|
|
@ -201,9 +201,7 @@ module VX_fifo_queue #(
|
|||
rd_ptr_r <= '0;
|
||||
rd_ptr_n_r <= 1;
|
||||
end else begin
|
||||
if (push) begin
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(1);
|
||||
end
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
|
||||
if (pop) begin
|
||||
rd_ptr_r <= rd_ptr_n_r;
|
||||
if (DEPTH > 2) begin
|
||||
|
|
|
@ -9,16 +9,16 @@ all:
|
|||
$(MAKE) -C dotproduct
|
||||
$(MAKE) -C kmeans
|
||||
$(MAKE) -C spmv
|
||||
$(MAKE) -C transpose
|
||||
$(MAKE) -C cutcp
|
||||
$(MAKE) -C vectorhypot
|
||||
$(MAKE) -C stencil
|
||||
$(MAKE) -C mri-q
|
||||
$(MAKE) -C lbm
|
||||
$(MAKE) -C oclprintf
|
||||
$(MAKE) -C blackscholes
|
||||
$(MAKE) -C sgemm2
|
||||
$(MAKE) -C transpose
|
||||
$(MAKE) -C convolution
|
||||
# $(MAKE) -C cutcp
|
||||
# $(MAKE) -C sgemm2
|
||||
# $(MAKE) -C vectorhypot
|
||||
# $(MAKE) -C mri-q run-simx
|
||||
|
||||
run-simx:
|
||||
$(MAKE) -C vecadd run-simx
|
||||
|
@ -37,10 +37,10 @@ run-simx:
|
|||
$(MAKE) -C blackscholes run-simx
|
||||
$(MAKE) -C transpose run-simx
|
||||
$(MAKE) -C convolution run-simx
|
||||
$(MAKE) -C cutcp run-simx
|
||||
$(MAKE) -C vectorhypot run-simx
|
||||
$(MAKE) -C mri-q run-simx
|
||||
# $(MAKE) -C cutcp run-simx
|
||||
# $(MAKE) -C sgemm2 run-simx
|
||||
# $(MAKE) -C vectorhypot run-simx
|
||||
# $(MAKE) -C mri-q run-simx
|
||||
|
||||
run-rtlsim:
|
||||
$(MAKE) -C vecadd run-rtlsim
|
||||
|
@ -98,15 +98,15 @@ clean:
|
|||
$(MAKE) -C kmeans clean
|
||||
$(MAKE) -C spmv clean
|
||||
$(MAKE) -C transpose clean
|
||||
$(MAKE) -C cutcp clean
|
||||
$(MAKE) -C vectorhypot clean
|
||||
$(MAKE) -C stencil clean
|
||||
$(MAKE) -C mri-q clean
|
||||
$(MAKE) -C lbm clean
|
||||
$(MAKE) -C oclprintf clean
|
||||
$(MAKE) -C blackscholes clean
|
||||
$(MAKE) -C sgemm2 clean
|
||||
$(MAKE) -C convolution clean
|
||||
# $(MAKE) -C cutcp clean
|
||||
# $(MAKE) -C sgemm2 clean
|
||||
# $(MAKE) -C vectorhypot clean
|
||||
# $(MAKE) -C mri-q clean
|
||||
|
||||
clean-all:
|
||||
$(MAKE) -C vecadd clean-all
|
||||
|
@ -114,19 +114,18 @@ clean-all:
|
|||
$(MAKE) -C psort clean-all
|
||||
$(MAKE) -C saxpy clean-all
|
||||
$(MAKE) -C sfilter clean-all
|
||||
$(MAKE) -C sfilter clean-all
|
||||
$(MAKE) -C nearn clean-all
|
||||
$(MAKE) -C guassian clean-all
|
||||
$(MAKE) -C dotproduct clean-all
|
||||
$(MAKE) -C kmeans clean-all
|
||||
$(MAKE) -C spmv clean-all
|
||||
$(MAKE) -C transpose clean-all
|
||||
$(MAKE) -C cutcp clean-all
|
||||
$(MAKE) -C vectorhypot clean-all
|
||||
$(MAKE) -C stencil clean-all
|
||||
$(MAKE) -C mri-q clean-all
|
||||
$(MAKE) -C lbm clean-all
|
||||
$(MAKE) -C oclprintf clean-all
|
||||
$(MAKE) -C blackscholes clean-all
|
||||
$(MAKE) -C sgemm2 clean-all
|
||||
$(MAKE) -C convolution clean-all
|
||||
# $(MAKE) -C cutcp clean-all
|
||||
# $(MAKE) -C sgemm2 clean-all
|
||||
# $(MAKE) -C vectorhypot clean-all
|
||||
# $(MAKE) -C mri-q clean-all
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue