mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop
This commit is contained in:
commit
e06e6646a9
21 changed files with 169 additions and 193 deletions
|
@ -54,9 +54,9 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
|
|||
$ git clone --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
$ cd Vortex
|
||||
### Install prebuilt toolchain
|
||||
By default, the toolchain will install to /opt folder.
|
||||
You can install the toolchain to a different directory by overriding TOOLDIR (e.g. export TOOLDIR=$HOME/tools).
|
||||
|
||||
By default, the toolchain will install to /opt folder which requires sudo access.
|
||||
You can install the toolchain to a different location of your choice by setting TOOLDIR (e.g. export TOOLDIR=$HOME/tools).
|
||||
$ export TOOLDIR=/opt
|
||||
$ ./ci/toolchain_install.sh --all
|
||||
$ source ./ci/toolchain_env.sh
|
||||
### Build Vortex sources
|
||||
|
|
|
@ -136,6 +136,18 @@
|
|||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`else
|
||||
`define L2_LINE_SIZE `L1_LINE_SIZE
|
||||
`endif
|
||||
|
||||
`ifdef L3_ENABLE
|
||||
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`else
|
||||
`define L3_LINE_SIZE `L2_LINE_SIZE
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
|
|
|
@ -298,18 +298,6 @@
|
|||
`define L1_ENABLE
|
||||
`endif
|
||||
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`else
|
||||
`define L2_LINE_SIZE `L1_LINE_SIZE
|
||||
`endif
|
||||
|
||||
`ifdef L3_ENABLE
|
||||
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`else
|
||||
`define L3_LINE_SIZE `L2_LINE_SIZE
|
||||
`endif
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
||||
`define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE))
|
||||
`define VX_MEM_DATA_WIDTH (`L3_LINE_SIZE * 8)
|
||||
|
|
12
hw/rtl/cache/VX_cache_bypass.sv
vendored
12
hw/rtl/cache/VX_cache_bypass.sv
vendored
|
@ -130,20 +130,20 @@ module VX_cache_bypass #(
|
|||
|
||||
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_idxs;
|
||||
|
||||
wire core_req_in_fire = | (core_req_valid_in & core_req_ready_in);
|
||||
wire core_req_nc_ready = ~mem_req_valid_in && mem_req_ready_out;
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.TYPE (PASSTHRU ? "R" : "P"),
|
||||
.LOCK_ENABLE (1)
|
||||
) req_arb (
|
||||
) core_req_nc_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (core_req_in_fire),
|
||||
.reset (reset),
|
||||
.requests (core_req_valid_in_nc),
|
||||
.grant_index (core_req_nc_idx),
|
||||
.grant_onehot (core_req_nc_sel),
|
||||
.grant_valid (core_req_nc_valid)
|
||||
.grant_valid (core_req_nc_valid),
|
||||
.grant_unlock (core_req_nc_ready)
|
||||
);
|
||||
|
||||
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_idxs;
|
||||
|
@ -164,7 +164,7 @@ module VX_cache_bypass #(
|
|||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i])
|
||||
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (core_req_nc_ready && core_req_nc_sel[i])
|
||||
: core_req_ready_out[i];
|
||||
end
|
||||
|
||||
|
|
|
@ -47,8 +47,6 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0];
|
||||
reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n;
|
||||
|
||||
reg valid_out_r;
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n;
|
||||
|
@ -60,7 +58,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
reg rs3_ready, rs3_ready_n;
|
||||
reg data_ready, data_ready_n;
|
||||
|
||||
wire ready_out = operands_if[i].ready;
|
||||
wire stg_valid_in, stg_ready_in;
|
||||
|
||||
wire is_rs1_zero = (scoreboard_if[i].data.rs1 == 0);
|
||||
wire is_rs2_zero = (scoreboard_if[i].data.rs2 == 0);
|
||||
|
@ -85,7 +83,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (valid_out_r && ready_out) begin
|
||||
if (operands_if[i].valid && operands_if[i].ready) begin
|
||||
data_ready_n = 0;
|
||||
end
|
||||
if (scoreboard_if[i].valid && data_ready_n == 0) begin
|
||||
|
@ -173,37 +171,15 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
cache_eop <= {ISSUE_RATIO{1'b1}};
|
||||
data_ready <= 0;
|
||||
valid_out_r <= 0;
|
||||
end else begin
|
||||
state <= state_n;
|
||||
cache_eop <= cache_eop_n;
|
||||
data_ready <= data_ready_n;
|
||||
if (~valid_out_r) begin
|
||||
valid_out_r <= scoreboard_if[i].valid && data_ready;
|
||||
end else if (ready_out) begin
|
||||
valid_out_r <= 0;
|
||||
end
|
||||
data_ready <= data_ready_n;
|
||||
end
|
||||
|
||||
if (~valid_out_r) begin
|
||||
data_out_r <= {scoreboard_if[i].data.uuid,
|
||||
scoreboard_if[i].data.wis,
|
||||
scoreboard_if[i].data.tmask,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.wb,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.op_type,
|
||||
scoreboard_if[i].data.op_mod,
|
||||
scoreboard_if[i].data.use_PC,
|
||||
scoreboard_if[i].data.use_imm,
|
||||
scoreboard_if[i].data.imm,
|
||||
scoreboard_if[i].data.rd};
|
||||
end
|
||||
|
||||
gpr_rd_rid <= gpr_rd_rid_n;
|
||||
gpr_rd_wis <= gpr_rd_wis_n;
|
||||
rs2_ready <= rs2_ready_n;
|
||||
|
@ -216,10 +192,35 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
cache_data <= cache_data_n;
|
||||
cache_reg <= cache_reg_n;
|
||||
cache_tmask <= cache_tmask_n;
|
||||
end
|
||||
end
|
||||
|
||||
assign operands_if[i].valid = valid_out_r;
|
||||
assign {operands_if[i].data.uuid,
|
||||
assign stg_valid_in = scoreboard_if[i].valid && data_ready;
|
||||
assign scoreboard_if[i].ready = stg_ready_in && data_ready;
|
||||
|
||||
VX_toggle_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) staging_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (stg_valid_in),
|
||||
.data_in ({
|
||||
scoreboard_if[i].data.uuid,
|
||||
scoreboard_if[i].data.wis,
|
||||
scoreboard_if[i].data.tmask,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.wb,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.op_type,
|
||||
scoreboard_if[i].data.op_mod,
|
||||
scoreboard_if[i].data.use_PC,
|
||||
scoreboard_if[i].data.use_imm,
|
||||
scoreboard_if[i].data.imm,
|
||||
scoreboard_if[i].data.rd
|
||||
}),
|
||||
.ready_in (stg_ready_in),
|
||||
.valid_out (operands_if[i].valid),
|
||||
.data_out ({
|
||||
operands_if[i].data.uuid,
|
||||
operands_if[i].data.wis,
|
||||
operands_if[i].data.tmask,
|
||||
operands_if[i].data.PC,
|
||||
|
@ -230,13 +231,15 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
operands_if[i].data.use_PC,
|
||||
operands_if[i].data.use_imm,
|
||||
operands_if[i].data.imm,
|
||||
operands_if[i].data.rd} = data_out_r;
|
||||
operands_if[i].data.rd
|
||||
}),
|
||||
.ready_out (operands_if[i].ready)
|
||||
);
|
||||
|
||||
assign operands_if[i].data.rs1_data = rs1_data;
|
||||
assign operands_if[i].data.rs2_data = rs2_data;
|
||||
assign operands_if[i].data.rs3_data = rs3_data;
|
||||
|
||||
assign scoreboard_if[i].ready = ~valid_out_r && data_ready;
|
||||
|
||||
// GPR banks
|
||||
|
||||
reg [RAM_ADDRW-1:0] gpr_rd_addr;
|
||||
|
|
|
@ -111,7 +111,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
|
||||
reg [`SFU_WIDTH-1:0] sfu_type;
|
||||
always @(*) begin
|
||||
case (scoreboard_if[i].data.op_type)
|
||||
case (ibuffer_if[i].data.op_type)
|
||||
`INST_SFU_CSRRW,
|
||||
`INST_SFU_CSRRS,
|
||||
`INST_SFU_CSRRC: sfu_type = `SFU_CSRS;
|
||||
|
@ -152,51 +152,47 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
assign perf_issue_stalls_per_cycle[i] = ibuffer_if[i].valid && ~ibuffer_if[i].ready;
|
||||
`endif
|
||||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg valid_out_r;
|
||||
wire ready_out;
|
||||
wire [3:0] operands_busy = {inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
|
||||
wire operands_ready = ~(| operands_busy);
|
||||
|
||||
wire stg_valid_in, stg_ready_in;
|
||||
assign stg_valid_in = ibuffer_if[i].valid && operands_ready;
|
||||
assign ibuffer_if[i].ready = stg_ready_in && operands_ready;
|
||||
|
||||
wire [3:0] ready_masks = ~{inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
|
||||
wire deps_ready = (& ready_masks);
|
||||
|
||||
wire valid_in = ibuffer_if[i].valid && deps_ready;
|
||||
wire ready_in = ~valid_out_r && deps_ready;
|
||||
wire [DATAW-1:0] data_in = ibuffer_if[i].data;
|
||||
|
||||
assign ready_out = scoreboard_if[i].ready;
|
||||
VX_stream_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) staging_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (stg_valid_in),
|
||||
.data_in (ibuffer_if[i].data),
|
||||
.ready_in (stg_ready_in),
|
||||
.valid_out (scoreboard_if[i].valid),
|
||||
.data_out (scoreboard_if[i].data),
|
||||
.ready_out (scoreboard_if[i].ready)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_out_r <= 0;
|
||||
inuse_regs <= '0;
|
||||
end else begin
|
||||
if (writeback_fire) begin
|
||||
inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] <= 0;
|
||||
end
|
||||
if (~valid_out_r) begin
|
||||
valid_out_r <= valid_in;
|
||||
end else if (ready_out) begin
|
||||
if (scoreboard_if[i].data.wb) begin
|
||||
inuse_regs[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= 1;
|
||||
`ifdef PERF_ENABLE
|
||||
inuse_units[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= scoreboard_if[i].data.ex_type;
|
||||
if (scoreboard_if[i].data.ex_type == `EX_SFU) begin
|
||||
inuse_sfu[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= sfu_type;
|
||||
end
|
||||
`endif
|
||||
end
|
||||
valid_out_r <= 0;
|
||||
if (ibuffer_if[i].valid && ibuffer_if[i].ready && ibuffer_if[i].data.wb) begin
|
||||
inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= 1;
|
||||
end
|
||||
end
|
||||
if (~valid_out_r) begin
|
||||
data_out_r <= data_in;
|
||||
`ifdef PERF_ENABLE
|
||||
if (ibuffer_if[i].valid && ibuffer_if[i].ready && ibuffer_if[i].data.wb) begin
|
||||
inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= ibuffer_if[i].data.ex_type;
|
||||
if (ibuffer_if[i].data.ex_type == `EX_SFU) begin
|
||||
inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= sfu_type;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
||||
assign ibuffer_if[i].ready = ready_in;
|
||||
assign scoreboard_if[i].valid = valid_out_r;
|
||||
assign scoreboard_if[i].data = data_out_r;
|
||||
|
||||
`ifdef SIMULATION
|
||||
reg [31:0] timeout_ctr;
|
||||
|
||||
|
@ -208,7 +204,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
`TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
||||
~ready_masks, ibuffer_if[i].data.uuid));
|
||||
operands_busy, ibuffer_if[i].data.uuid));
|
||||
`endif
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
end else if (ibuffer_if[i].valid && ibuffer_if[i].ready) begin
|
||||
|
@ -220,7 +216,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
|
||||
("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
||||
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
||||
~ready_masks, ibuffer_if[i].data.uuid));
|
||||
operands_busy, ibuffer_if[i].data.uuid));
|
||||
|
||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
|
|
|
@ -21,15 +21,12 @@ module VX_cyclic_arbiter #(
|
|||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
input wire unlock,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
);
|
||||
`UNUSED_PARAM (LOCK_ENABLE)
|
||||
`UNUSED_VAR (unlock)
|
||||
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
|
@ -51,7 +48,7 @@ module VX_cyclic_arbiter #(
|
|||
end else begin
|
||||
if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin
|
||||
grant_index_r <= '0;
|
||||
end else begin
|
||||
end else if (!LOCK_ENABLE || ~grant_valid || grant_unlock) begin
|
||||
grant_index_r <= grant_index_r + LOG_NUM_REQS'(1);
|
||||
end
|
||||
end
|
||||
|
|
|
@ -21,17 +21,17 @@ module VX_fair_arbiter #(
|
|||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire unlock,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
);
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (unlock)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
|
||||
assign grant_index = '0;
|
||||
assign grant_onehot = requests;
|
||||
|
@ -48,18 +48,14 @@ module VX_fair_arbiter #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
buffer <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
buffer <= buffer_n;
|
||||
end
|
||||
end
|
||||
|
||||
VX_priority_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (LOCK_ENABLE)
|
||||
) priority_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (unlock),
|
||||
.requests (requests_qual),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot),
|
||||
|
|
|
@ -21,22 +21,23 @@ module VX_generic_arbiter #(
|
|||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire unlock,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
);
|
||||
if (TYPE == "P") begin
|
||||
|
||||
`UNUSED_PARAM (LOCK_ENABLE)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
|
||||
VX_priority_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (LOCK_ENABLE)
|
||||
.NUM_REQS (NUM_REQS),
|
||||
) priority_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (unlock),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
|
@ -50,12 +51,12 @@ module VX_generic_arbiter #(
|
|||
.LOCK_ENABLE (LOCK_ENABLE)
|
||||
) rr_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (unlock),
|
||||
.reset (reset),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot)
|
||||
.grant_onehot (grant_onehot),
|
||||
.grant_unlock (grant_unlock)
|
||||
);
|
||||
|
||||
end else if (TYPE == "F") begin
|
||||
|
@ -66,11 +67,11 @@ module VX_generic_arbiter #(
|
|||
) fair_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (unlock),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot)
|
||||
.grant_onehot (grant_onehot),
|
||||
.grant_unlock (grant_unlock)
|
||||
);
|
||||
|
||||
end else if (TYPE == "M") begin
|
||||
|
@ -81,11 +82,11 @@ module VX_generic_arbiter #(
|
|||
) matrix_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (unlock),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot)
|
||||
.grant_onehot (grant_onehot),
|
||||
.grant_unlock (grant_unlock)
|
||||
);
|
||||
|
||||
end else if (TYPE == "C") begin
|
||||
|
@ -96,11 +97,11 @@ module VX_generic_arbiter #(
|
|||
) cyclic_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (unlock),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot)
|
||||
.grant_onehot (grant_onehot),
|
||||
.grant_unlock (grant_unlock)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
|
|
@ -20,18 +20,18 @@ module VX_matrix_arbiter #(
|
|||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire unlock,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
);
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (unlock)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
|
||||
assign grant_index = '0;
|
||||
assign grant_onehot = requests;
|
||||
|
@ -71,18 +71,18 @@ module VX_matrix_arbiter #(
|
|||
end
|
||||
|
||||
if (LOCK_ENABLE == 0) begin
|
||||
`UNUSED_VAR (unlock)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
assign grant_onehot = grant_unqual;
|
||||
end else begin
|
||||
reg [NUM_REQS-1:0] grant_unqual_prev;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
grant_unqual_prev <= '0;
|
||||
end else if (unlock) begin
|
||||
end else if (grant_unlock) begin
|
||||
grant_unqual_prev <= grant_unqual;
|
||||
end
|
||||
end
|
||||
assign grant_onehot = unlock ? grant_unqual : grant_unqual_prev;
|
||||
assign grant_onehot = grant_unlock ? grant_unqual : grant_unqual_prev;
|
||||
end
|
||||
|
||||
VX_onehot_encoder #(
|
||||
|
|
|
@ -21,7 +21,7 @@ module VX_mem_rsp_sel #(
|
|||
parameter TAG_SEL_BITS = 0,
|
||||
parameter OUT_REG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// input response
|
||||
|
@ -46,18 +46,20 @@ input wire clk,
|
|||
|
||||
wire [LOG_NUM_REQS-1:0] grant_index;
|
||||
wire grant_valid;
|
||||
wire rsp_fire;
|
||||
wire grant_ready;
|
||||
|
||||
VX_priority_arbiter #(
|
||||
.NUM_REQS (NUM_REQS)
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (1),
|
||||
.TYPE ("P")
|
||||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (rsp_fire),
|
||||
.requests (rsp_valid_in),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
`UNUSED_PIN (grant_onehot),
|
||||
.grant_unlock(grant_ready)
|
||||
);
|
||||
|
||||
reg [NUM_REQS-1:0] rsp_valid_sel;
|
||||
|
@ -78,7 +80,7 @@ input wire clk,
|
|||
end
|
||||
end
|
||||
|
||||
assign rsp_fire = grant_valid && rsp_ready_unqual;
|
||||
assign grant_ready = rsp_ready_unqual;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (NUM_REQS + TAG_WIDTH + (NUM_REQS * DATA_WIDTH)),
|
||||
|
|
|
@ -16,22 +16,13 @@
|
|||
`TRACING_OFF
|
||||
module VX_priority_arbiter #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LOCK_ENABLE = 0,
|
||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
input wire unlock,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid
|
||||
);
|
||||
`UNUSED_PARAM (LOCK_ENABLE)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (unlock)
|
||||
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
assign grant_index = '0;
|
||||
|
|
|
@ -21,18 +21,18 @@ module VX_rr_arbiter #(
|
|||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire unlock,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
);
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (unlock)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
|
||||
assign grant_index = '0;
|
||||
assign grant_onehot = requests;
|
||||
|
@ -55,7 +55,7 @@ module VX_rr_arbiter #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
@ -85,7 +85,7 @@ module VX_rr_arbiter #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
@ -121,7 +121,7 @@ module VX_rr_arbiter #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
@ -165,7 +165,7 @@ module VX_rr_arbiter #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
@ -219,7 +219,7 @@ module VX_rr_arbiter #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
@ -285,7 +285,7 @@ module VX_rr_arbiter #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
@ -365,7 +365,7 @@ module VX_rr_arbiter #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
@ -399,7 +399,7 @@ module VX_rr_arbiter #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pointer_reg <= {NUM_REQS{1'b1}};
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
if (|req_masked) begin
|
||||
pointer_reg <= mask_higher_pri_regs;
|
||||
end else if (|requests) begin
|
||||
|
@ -443,7 +443,7 @@ module VX_rr_arbiter #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
|
|
@ -19,7 +19,6 @@ module VX_stream_arb #(
|
|||
parameter NUM_OUTPUTS = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter `STRING ARBITER = "P",
|
||||
parameter LOCK_ENABLE = 1,
|
||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||
parameter OUT_REG = 0 ,
|
||||
parameter NUM_REQS = (NUM_INPUTS + NUM_OUTPUTS - 1) / NUM_OUTPUTS,
|
||||
|
@ -57,7 +56,6 @@ module VX_stream_arb #(
|
|||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) arb_slice (
|
||||
|
@ -102,7 +100,6 @@ module VX_stream_arb #(
|
|||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) fanout_slice_arb (
|
||||
|
@ -129,7 +126,6 @@ module VX_stream_arb #(
|
|||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW + LOG_NUM_REQS2),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) fanout_join_arb (
|
||||
|
@ -158,25 +154,25 @@ module VX_stream_arb #(
|
|||
wire arb_valid;
|
||||
wire [NUM_REQS_W-1:0] arb_index;
|
||||
wire [NUM_REQS-1:0] arb_onehot;
|
||||
wire arb_unlock;
|
||||
wire arb_ready;
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.LOCK_ENABLE (1),
|
||||
.TYPE (ARBITER)
|
||||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_in),
|
||||
.unlock (arb_unlock),
|
||||
.grant_valid (arb_valid),
|
||||
.grant_index (arb_index),
|
||||
.grant_onehot (arb_onehot)
|
||||
.grant_onehot (arb_onehot),
|
||||
.grant_unlock (arb_ready)
|
||||
);
|
||||
|
||||
assign valid_in_r = arb_valid;
|
||||
assign data_in_r = data_in[arb_index];
|
||||
assign arb_unlock = | (valid_in_r & ready_in_r);
|
||||
assign arb_ready = ready_in_r;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign ready_in[i] = ready_in_r & arb_onehot[i];
|
||||
|
@ -217,7 +213,6 @@ module VX_stream_arb #(
|
|||
.NUM_OUTPUTS (BATCH_SIZE),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) arb_slice (
|
||||
|
@ -252,7 +247,6 @@ module VX_stream_arb #(
|
|||
.NUM_OUTPUTS (NUM_BATCHES),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) fanout_fork_arb (
|
||||
|
@ -280,7 +274,6 @@ module VX_stream_arb #(
|
|||
.NUM_OUTPUTS (BATCH_SIZE),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) fanout_slice_arb (
|
||||
|
@ -305,24 +298,24 @@ module VX_stream_arb #(
|
|||
wire [NUM_OUTPUTS-1:0] arb_requests;
|
||||
wire arb_valid;
|
||||
wire [NUM_OUTPUTS-1:0] arb_onehot;
|
||||
wire arb_unlock;
|
||||
wire arb_ready;
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_OUTPUTS),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.LOCK_ENABLE (1),
|
||||
.TYPE (ARBITER)
|
||||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (arb_requests),
|
||||
.unlock (arb_unlock),
|
||||
.grant_valid (arb_valid),
|
||||
`UNUSED_PIN (grant_index),
|
||||
.grant_onehot (arb_onehot)
|
||||
.grant_onehot (arb_onehot),
|
||||
.grant_unlock (arb_ready)
|
||||
);
|
||||
|
||||
assign arb_requests = ready_in_r;
|
||||
assign arb_unlock = | (valid_in & ready_in);
|
||||
assign arb_ready = valid_in[0];
|
||||
assign ready_in = arb_valid;
|
||||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
|
|
|
@ -21,8 +21,7 @@ module VX_stream_xbar #(
|
|||
parameter IN_WIDTH = `LOG2UP(NUM_INPUTS),
|
||||
parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS),
|
||||
parameter ARBITER = "P",
|
||||
parameter LOCK_ENABLE = 0,
|
||||
parameter OUT_REG = 0,
|
||||
parameter OUT_REG = 0,
|
||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||
parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1)
|
||||
) (
|
||||
|
@ -66,7 +65,6 @@ module VX_stream_xbar #(
|
|||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) xbar_arb (
|
||||
|
@ -95,7 +93,6 @@ module VX_stream_xbar #(
|
|||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) xbar_arb (
|
||||
|
|
|
@ -62,10 +62,10 @@ Cluster::Cluster(const SimContext& ctx,
|
|||
snprintf(sname, 100, "cluster%d-l2cache", cluster_id);
|
||||
l2cache_ = CacheSim::Create(sname, CacheSim::Config{
|
||||
!L2_ENABLED,
|
||||
log2ceil(L2_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // L
|
||||
log2ceil(L2_NUM_WAYS), // W
|
||||
0, // A
|
||||
log2ceil(L2_CACHE_SIZE),// C
|
||||
log2ceil(MEM_BLOCK_SIZE),// L
|
||||
log2ceil(L1_LINE_SIZE), // W
|
||||
log2ceil(L2_NUM_WAYS), // A
|
||||
log2ceil(L2_NUM_BANKS), // B
|
||||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
|
|
|
@ -210,7 +210,7 @@ void Core::schedule() {
|
|||
void Core::fetch() {
|
||||
perf_stats_.ifetch_latency += pending_ifetches_;
|
||||
|
||||
// handle icache reponse
|
||||
// handle icache response
|
||||
auto& icache_rsp_port = icache_rsp_ports.at(0);
|
||||
if (!icache_rsp_port.empty()){
|
||||
auto& mem_rsp = icache_rsp_port.front();
|
||||
|
|
|
@ -339,7 +339,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
break;
|
||||
}
|
||||
case 1: {
|
||||
// RV64I: SLLI
|
||||
// RV32I: SLLI
|
||||
rddata[t].i = rsdata[t][0].i << immsrc;
|
||||
break;
|
||||
}
|
||||
|
@ -360,11 +360,11 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
}
|
||||
case 5: {
|
||||
if (func7) {
|
||||
// RV64I: SRAI
|
||||
// RV32I: SRAI
|
||||
Word result = rsdata[t][0].i >> immsrc;
|
||||
rddata[t].i = result;
|
||||
} else {
|
||||
// RV64I: SRLI
|
||||
// RV32I: SRLI
|
||||
Word result = rsdata[t][0].u >> immsrc;
|
||||
rddata[t].i = result;
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ static void show_usage() {
|
|||
uint32_t num_threads = NUM_THREADS;
|
||||
uint32_t num_warps = NUM_WARPS;
|
||||
uint32_t num_cores = NUM_CORES;
|
||||
bool showStats = false;;
|
||||
bool showStats = false;
|
||||
bool riscv_test = false;
|
||||
const char* program = nullptr;
|
||||
|
||||
|
|
|
@ -33,8 +33,8 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
!L3_ENABLED,
|
||||
log2ceil(L3_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // L
|
||||
log2ceil(L3_NUM_WAYS), // W
|
||||
0, // A
|
||||
log2ceil(L2_LINE_SIZE), // W
|
||||
log2ceil(L3_NUM_WAYS), // A
|
||||
log2ceil(L3_NUM_BANKS), // B
|
||||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
|
@ -58,7 +58,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
l3cache_->CoreRspPorts.at(i).bind(&clusters_.at(i)->mem_rsp_port);
|
||||
}
|
||||
|
||||
// set up memory perf recording
|
||||
// set up memory profiling
|
||||
memsim_->MemReqPort.tx_callback([&](const MemReq& req, uint64_t cycle){
|
||||
__unused (cycle);
|
||||
perf_mem_reads_ += !req.write;
|
||||
|
|
|
@ -44,7 +44,7 @@ Socket::Socket(const SimContext& ctx,
|
|||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
1, // number of inputs
|
||||
true, // write-through
|
||||
false, // write-through
|
||||
false, // write response
|
||||
(uint8_t)arch.num_warps(), // mshr
|
||||
2, // pipeline latency
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue